From df2d36dc89a5758114c4e94ec5232b3106665ca0 Mon Sep 17 00:00:00 2001 From: seungw00-ji Date: Fri, 8 Oct 2021 11:10:08 +0900 Subject: [PATCH] add mmcv/mmcv folder --- mmcv/mmcv/__init__.py | 15 + mmcv/mmcv/arraymisc/__init__.py | 4 + mmcv/mmcv/arraymisc/quantization.py | 55 + mmcv/mmcv/cnn/__init__.py | 30 + mmcv/mmcv/cnn/alexnet.py | 62 + mmcv/mmcv/cnn/bricks/__init__.py | 31 + mmcv/mmcv/cnn/bricks/activation.py | 24 + mmcv/mmcv/cnn/bricks/context_block.py | 124 + mmcv/mmcv/cnn/bricks/conv.py | 43 + .../cnn/bricks/conv2d_adaptive_padding.py | 61 + mmcv/mmcv/cnn/bricks/conv_module.py | 190 + mmcv/mmcv/cnn/bricks/conv_ws.py | 147 + .../bricks/depthwise_separable_conv_module.py | 89 + mmcv/mmcv/cnn/bricks/generalized_attention.py | 405 + mmcv/mmcv/cnn/bricks/hsigmoid.py | 21 + mmcv/mmcv/cnn/bricks/hswish.py | 28 + mmcv/mmcv/cnn/bricks/non_local.py | 305 + mmcv/mmcv/cnn/bricks/norm.py | 143 + mmcv/mmcv/cnn/bricks/padding.py | 35 + mmcv/mmcv/cnn/bricks/plugin.py | 88 + mmcv/mmcv/cnn/bricks/registry.py | 8 + mmcv/mmcv/cnn/bricks/scale.py | 20 + mmcv/mmcv/cnn/bricks/swish.py | 24 + mmcv/mmcv/cnn/bricks/upsample.py | 83 + mmcv/mmcv/cnn/bricks/wrappers.py | 116 + mmcv/mmcv/cnn/resnet.py | 316 + mmcv/mmcv/cnn/utils/__init__.py | 12 + mmcv/mmcv/cnn/utils/flops_counter.py | 583 ++ mmcv/mmcv/cnn/utils/fuse_conv_bn.py | 58 + mmcv/mmcv/cnn/utils/weight_init.py | 66 + mmcv/mmcv/cnn/vgg.py | 175 + mmcv/mmcv/fileio/__init__.py | 11 + mmcv/mmcv/fileio/file_client.py | 297 + mmcv/mmcv/fileio/handlers/__init__.py | 7 + mmcv/mmcv/fileio/handlers/base.py | 25 + mmcv/mmcv/fileio/handlers/json_handler.py | 36 + mmcv/mmcv/fileio/handlers/pickle_handler.py | 26 + mmcv/mmcv/fileio/handlers/yaml_handler.py | 24 + mmcv/mmcv/fileio/io.py | 112 + mmcv/mmcv/fileio/parse.py | 51 + mmcv/mmcv/image/__init__.py | 24 + mmcv/mmcv/image/colorspace.py | 306 + mmcv/mmcv/image/geometric.py | 606 ++ mmcv/mmcv/image/io.py | 233 + mmcv/mmcv/image/misc.py | 43 + mmcv/mmcv/image/photometric.py | 226 + mmcv/mmcv/model_zoo/deprecated.json | 6 + mmcv/mmcv/model_zoo/mmcls.json | 13 + mmcv/mmcv/model_zoo/open_mmlab.json | 46 + mmcv/mmcv/onnx/__init__.py | 3 + mmcv/mmcv/onnx/onnx_utils/__init__.py | 0 mmcv/mmcv/onnx/onnx_utils/symbolic_helper.py | 312 + mmcv/mmcv/onnx/symbolic.py | 389 + mmcv/mmcv/ops/__init__.py | 42 + mmcv/mmcv/ops/bbox.py | 71 + mmcv/mmcv/ops/carafe.py | 284 + mmcv/mmcv/ops/cc_attention.py | 95 + mmcv/mmcv/ops/corner_pool.py | 125 + .../ops/csrc/bbox_overlaps_cuda_kernel.cuh | 83 + mmcv/mmcv/ops/csrc/carafe_cuda_kernel.cuh | 314 + .../ops/csrc/carafe_naive_cuda_kernel.cuh | 110 + .../ops/csrc/cc_attention_cuda_kernel.cuh | 185 + mmcv/mmcv/ops/csrc/common_cuda_helper.hpp | 110 + .../mmcv/ops/csrc/deform_conv_cuda_kernel.cuh | 362 + .../ops/csrc/deform_roi_pool_cuda_kernel.cuh | 183 + .../ops/csrc/masked_conv2d_cuda_kernel.cuh | 61 + .../modulated_deform_conv_cuda_kernel.cuh | 394 + mmcv/mmcv/ops/csrc/nms_cuda_kernel.cuh | 69 + mmcv/mmcv/ops/csrc/parrots/bbox_overlaps.cpp | 36 + .../ops/csrc/parrots/bbox_overlaps_cuda.cu | 22 + mmcv/mmcv/ops/csrc/parrots/carafe.cpp | 84 + mmcv/mmcv/ops/csrc/parrots/carafe_cuda.cu | 144 + mmcv/mmcv/ops/csrc/parrots/carafe_naive.cpp | 73 + .../ops/csrc/parrots/carafe_naive_cuda.cu | 46 + mmcv/mmcv/ops/csrc/parrots/cc_attention.cpp | 88 + .../csrc/parrots/cc_attention_cuda_kernel.cu | 109 + mmcv/mmcv/ops/csrc/parrots/corner_pool.cpp | 83 + mmcv/mmcv/ops/csrc/parrots/deform_conv.cpp | 181 + .../mmcv/ops/csrc/parrots/deform_conv_cuda.cu | 518 ++ .../mmcv/ops/csrc/parrots/deform_roi_pool.cpp | 93 + .../ops/csrc/parrots/deform_roi_pool_cuda.cu | 48 + mmcv/mmcv/ops/csrc/parrots/focal_loss.cpp | 130 + mmcv/mmcv/ops/csrc/parrots/focal_loss_cuda.cu | 88 + mmcv/mmcv/ops/csrc/parrots/masked_conv2d.cpp | 80 + .../ops/csrc/parrots/masked_conv2d_cuda.cu | 45 + .../csrc/parrots/modulated_deform_conv.cpp | 134 + .../parrots/modulated_deform_conv_cuda.cu | 341 + mmcv/mmcv/ops/csrc/parrots/nms.cpp | 248 + mmcv/mmcv/ops/csrc/parrots/nms_cuda.cu | 55 + .../ops/csrc/parrots/parrots_cpp_helper.cpp | 2 + .../ops/csrc/parrots/parrots_cuda_helper.cu | 3 + mmcv/mmcv/ops/csrc/parrots/psamask.cpp | 304 + mmcv/mmcv/ops/csrc/parrots/psamask_cuda.cu | 48 + mmcv/mmcv/ops/csrc/parrots/roi_align.cpp | 100 + mmcv/mmcv/ops/csrc/parrots/roi_align_cuda.cu | 52 + mmcv/mmcv/ops/csrc/parrots/roi_pool.cpp | 77 + mmcv/mmcv/ops/csrc/parrots/roi_pool_cuda.cu | 45 + mmcv/mmcv/ops/csrc/parrots/sync_bn.cpp | 139 + mmcv/mmcv/ops/csrc/parrots/sync_bn_cuda.cu | 104 + mmcv/mmcv/ops/csrc/parrots/tin_shift.cpp | 42 + mmcv/mmcv/ops/csrc/parrots/tin_shift_cuda.cu | 51 + mmcv/mmcv/ops/csrc/parrots_cpp_helper.hpp | 11 + mmcv/mmcv/ops/csrc/parrots_cuda_helper.hpp | 111 + .../ops/csrc/parrots_cudawarpfunction.cuh | 109 + mmcv/mmcv/ops/csrc/psamask_cuda_kernel.cuh | 140 + mmcv/mmcv/ops/csrc/pytorch/bbox_overlaps.cpp | 29 + .../ops/csrc/pytorch/bbox_overlaps_cuda.cu | 22 + mmcv/mmcv/ops/csrc/pytorch/carafe.cpp | 83 + mmcv/mmcv/ops/csrc/pytorch/carafe_cuda.cu | 179 + mmcv/mmcv/ops/csrc/pytorch/carafe_naive.cpp | 68 + .../ops/csrc/pytorch/carafe_naive_cuda.cu | 51 + mmcv/mmcv/ops/csrc/pytorch/cc_attention.cpp | 98 + .../ops/csrc/pytorch/cc_attention_cuda.cu | 142 + mmcv/mmcv/ops/csrc/pytorch/corner_pool.cpp | 239 + mmcv/mmcv/ops/csrc/pytorch/deform_conv.cpp | 138 + .../mmcv/ops/csrc/pytorch/deform_conv_cuda.cu | 517 ++ .../mmcv/ops/csrc/pytorch/deform_roi_pool.cpp | 81 + .../ops/csrc/pytorch/deform_roi_pool_cuda.cu | 54 + mmcv/mmcv/ops/csrc/pytorch/focal_loss.cpp | 130 + mmcv/mmcv/ops/csrc/pytorch/focal_loss_cuda.cu | 110 + mmcv/mmcv/ops/csrc/pytorch/info.cpp | 49 + mmcv/mmcv/ops/csrc/pytorch/masked_conv2d.cpp | 74 + .../ops/csrc/pytorch/masked_conv2d_cuda.cu | 53 + .../csrc/pytorch/modulated_deform_conv.cpp | 109 + .../pytorch/modulated_deform_conv_cuda.cu | 286 + mmcv/mmcv/ops/csrc/pytorch/nms.cpp | 260 + mmcv/mmcv/ops/csrc/pytorch/nms_cuda.cu | 52 + mmcv/mmcv/ops/csrc/pytorch/psamask.cpp | 255 + mmcv/mmcv/ops/csrc/pytorch/psamask_cuda.cu | 62 + mmcv/mmcv/ops/csrc/pytorch/pybind.cpp | 360 + mmcv/mmcv/ops/csrc/pytorch/roi_align.cpp | 129 + mmcv/mmcv/ops/csrc/pytorch/roi_align_cpu.cpp | 430 ++ mmcv/mmcv/ops/csrc/pytorch/roi_align_cuda.cu | 57 + mmcv/mmcv/ops/csrc/pytorch/roi_pool.cpp | 66 + mmcv/mmcv/ops/csrc/pytorch/roi_pool_cuda.cu | 49 + mmcv/mmcv/ops/csrc/pytorch/sync_bn.cpp | 158 + mmcv/mmcv/ops/csrc/pytorch/sync_bn_cuda.cu | 109 + mmcv/mmcv/ops/csrc/pytorch/tin_shift.cpp | 51 + mmcv/mmcv/ops/csrc/pytorch/tin_shift_cuda.cu | 53 + mmcv/mmcv/ops/csrc/pytorch_cpp_helper.hpp | 22 + mmcv/mmcv/ops/csrc/pytorch_cuda_helper.hpp | 18 + mmcv/mmcv/ops/csrc/roi_align_cuda_kernel.cuh | 205 + mmcv/mmcv/ops/csrc/roi_pool_cuda_kernel.cuh | 92 + .../csrc/sigmoid_focal_loss_cuda_kernel.cuh | 70 + .../csrc/softmax_focal_loss_cuda_kernel.cuh | 71 + mmcv/mmcv/ops/csrc/sync_bn_cuda_kernel.cuh | 330 + mmcv/mmcv/ops/csrc/tin_shift_cuda_kernel.cuh | 60 + mmcv/mmcv/ops/deform_conv.py | 324 + mmcv/mmcv/ops/deform_roi_pool.py | 203 + mmcv/mmcv/ops/deprecated_wrappers.py | 42 + mmcv/mmcv/ops/focal_loss.py | 211 + mmcv/mmcv/ops/info.py | 20 + mmcv/mmcv/ops/masked_conv.py | 110 + mmcv/mmcv/ops/merge_cells.py | 148 + mmcv/mmcv/ops/modulated_deform_conv.py | 273 + mmcv/mmcv/ops/nms.py | 307 + mmcv/mmcv/ops/point_sample.py | 214 + mmcv/mmcv/ops/psa_mask.py | 89 + mmcv/mmcv/ops/roi_align.py | 187 + mmcv/mmcv/ops/roi_pool.py | 86 + mmcv/mmcv/ops/saconv.py | 132 + mmcv/mmcv/ops/sync_bn.py | 195 + mmcv/mmcv/ops/tin_shift.py | 62 + mmcv/mmcv/parallel/__init__.py | 13 + mmcv/mmcv/parallel/_functions.py | 79 + mmcv/mmcv/parallel/collate.py | 84 + mmcv/mmcv/parallel/data_container.py | 89 + mmcv/mmcv/parallel/data_parallel.py | 89 + mmcv/mmcv/parallel/distributed.py | 85 + mmcv/mmcv/parallel/distributed_deprecated.py | 69 + mmcv/mmcv/parallel/registry.py | 7 + mmcv/mmcv/parallel/scatter_gather.py | 59 + mmcv/mmcv/parallel/utils.py | 20 + mmcv/mmcv/runner/__init__.py | 37 + mmcv/mmcv/runner/base_runner.py | 437 ++ mmcv/mmcv/runner/builder.py | 7 + mmcv/mmcv/runner/checkpoint.py | 380 + mmcv/mmcv/runner/dist_utils.py | 167 + mmcv/mmcv/runner/epoch_based_runner.py | 182 + mmcv/mmcv/runner/fp16_utils.py | 350 + mmcv/mmcv/runner/hooks/__init__.py | 22 + mmcv/mmcv/runner/hooks/checkpoint.py | 101 + mmcv/mmcv/runner/hooks/closure.py | 11 + mmcv/mmcv/runner/hooks/ema.py | 88 + mmcv/mmcv/runner/hooks/hook.py | 61 + mmcv/mmcv/runner/hooks/iter_timer.py | 18 + mmcv/mmcv/runner/hooks/logger/__init__.py | 12 + mmcv/mmcv/runner/hooks/logger/base.py | 173 + mmcv/mmcv/runner/hooks/logger/mlflow.py | 77 + mmcv/mmcv/runner/hooks/logger/pavi.py | 90 + mmcv/mmcv/runner/hooks/logger/tensorboard.py | 55 + mmcv/mmcv/runner/hooks/logger/text.py | 168 + mmcv/mmcv/runner/hooks/logger/wandb.py | 46 + mmcv/mmcv/runner/hooks/lr_updater.py | 416 ++ mmcv/mmcv/runner/hooks/memory.py | 25 + mmcv/mmcv/runner/hooks/momentum_updater.py | 199 + mmcv/mmcv/runner/hooks/optimizer.py | 150 + mmcv/mmcv/runner/hooks/sampler_seed.py | 14 + mmcv/mmcv/runner/hooks/sync_buffer.py | 22 + mmcv/mmcv/runner/iter_based_runner.py | 246 + mmcv/mmcv/runner/log_buffer.py | 41 + mmcv/mmcv/runner/optimizer/__init__.py | 8 + mmcv/mmcv/runner/optimizer/builder.py | 43 + .../runner/optimizer/default_constructor.py | 248 + mmcv/mmcv/runner/priority.py | 54 + mmcv/mmcv/runner/utils.py | 81 + mmcv/mmcv/utils/__init__.py | 52 + mmcv/mmcv/utils/config.py | 467 ++ mmcv/mmcv/utils/env.py | 93 + mmcv/mmcv/utils/ext_loader.py | 33 + mmcv/mmcv/utils/logging.py | 93 + mmcv/mmcv/utils/misc.py | 315 + mmcv/mmcv/utils/parrots_wrapper.py | 95 + mmcv/mmcv/utils/path.py | 98 + mmcv/mmcv/utils/progressbar.py | 208 + mmcv/mmcv/utils/registry.py | 171 + mmcv/mmcv/utils/timer.py | 118 + mmcv/mmcv/utils/version_utils.py | 67 + mmcv/mmcv/version.py | 29 + mmcv/mmcv/video/__init__.py | 11 + mmcv/mmcv/video/io.py | 323 + mmcv/mmcv/video/optflow.py | 169 + mmcv/mmcv/video/optflow_warp/__init__.py | 1 + mmcv/mmcv/video/optflow_warp/flow_warp.cpp | 76 + mmcv/mmcv/video/optflow_warp/flow_warp.hpp | 30 + .../video/optflow_warp/flow_warp_module.cpp | 6624 +++++++++++++++++ .../video/optflow_warp/flow_warp_module.pyx | 29 + mmcv/mmcv/video/processing.py | 159 + mmcv/mmcv/visualization/__init__.py | 9 + mmcv/mmcv/visualization/color.py | 51 + mmcv/mmcv/visualization/image.py | 150 + mmcv/mmcv/visualization/optflow.py | 112 + 232 files changed, 34909 insertions(+) create mode 100644 mmcv/mmcv/__init__.py create mode 100644 mmcv/mmcv/arraymisc/__init__.py create mode 100644 mmcv/mmcv/arraymisc/quantization.py create mode 100644 mmcv/mmcv/cnn/__init__.py create mode 100644 mmcv/mmcv/cnn/alexnet.py create mode 100644 mmcv/mmcv/cnn/bricks/__init__.py create mode 100644 mmcv/mmcv/cnn/bricks/activation.py create mode 100644 mmcv/mmcv/cnn/bricks/context_block.py create mode 100644 mmcv/mmcv/cnn/bricks/conv.py create mode 100644 mmcv/mmcv/cnn/bricks/conv2d_adaptive_padding.py create mode 100644 mmcv/mmcv/cnn/bricks/conv_module.py create mode 100644 mmcv/mmcv/cnn/bricks/conv_ws.py create mode 100644 mmcv/mmcv/cnn/bricks/depthwise_separable_conv_module.py create mode 100644 mmcv/mmcv/cnn/bricks/generalized_attention.py create mode 100644 mmcv/mmcv/cnn/bricks/hsigmoid.py create mode 100644 mmcv/mmcv/cnn/bricks/hswish.py create mode 100644 mmcv/mmcv/cnn/bricks/non_local.py create mode 100644 mmcv/mmcv/cnn/bricks/norm.py create mode 100644 mmcv/mmcv/cnn/bricks/padding.py create mode 100644 mmcv/mmcv/cnn/bricks/plugin.py create mode 100644 mmcv/mmcv/cnn/bricks/registry.py create mode 100644 mmcv/mmcv/cnn/bricks/scale.py create mode 100644 mmcv/mmcv/cnn/bricks/swish.py create mode 100644 mmcv/mmcv/cnn/bricks/upsample.py create mode 100644 mmcv/mmcv/cnn/bricks/wrappers.py create mode 100644 mmcv/mmcv/cnn/resnet.py create mode 100644 mmcv/mmcv/cnn/utils/__init__.py create mode 100644 mmcv/mmcv/cnn/utils/flops_counter.py create mode 100644 mmcv/mmcv/cnn/utils/fuse_conv_bn.py create mode 100644 mmcv/mmcv/cnn/utils/weight_init.py create mode 100644 mmcv/mmcv/cnn/vgg.py create mode 100644 mmcv/mmcv/fileio/__init__.py create mode 100644 mmcv/mmcv/fileio/file_client.py create mode 100644 mmcv/mmcv/fileio/handlers/__init__.py create mode 100644 mmcv/mmcv/fileio/handlers/base.py create mode 100644 mmcv/mmcv/fileio/handlers/json_handler.py create mode 100644 mmcv/mmcv/fileio/handlers/pickle_handler.py create mode 100644 mmcv/mmcv/fileio/handlers/yaml_handler.py create mode 100644 mmcv/mmcv/fileio/io.py create mode 100644 mmcv/mmcv/fileio/parse.py create mode 100644 mmcv/mmcv/image/__init__.py create mode 100644 mmcv/mmcv/image/colorspace.py create mode 100644 mmcv/mmcv/image/geometric.py create mode 100644 mmcv/mmcv/image/io.py create mode 100644 mmcv/mmcv/image/misc.py create mode 100644 mmcv/mmcv/image/photometric.py create mode 100644 mmcv/mmcv/model_zoo/deprecated.json create mode 100644 mmcv/mmcv/model_zoo/mmcls.json create mode 100644 mmcv/mmcv/model_zoo/open_mmlab.json create mode 100644 mmcv/mmcv/onnx/__init__.py create mode 100644 mmcv/mmcv/onnx/onnx_utils/__init__.py create mode 100644 mmcv/mmcv/onnx/onnx_utils/symbolic_helper.py create mode 100644 mmcv/mmcv/onnx/symbolic.py create mode 100644 mmcv/mmcv/ops/__init__.py create mode 100644 mmcv/mmcv/ops/bbox.py create mode 100644 mmcv/mmcv/ops/carafe.py create mode 100644 mmcv/mmcv/ops/cc_attention.py create mode 100644 mmcv/mmcv/ops/corner_pool.py create mode 100644 mmcv/mmcv/ops/csrc/bbox_overlaps_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/carafe_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/carafe_naive_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/cc_attention_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/common_cuda_helper.hpp create mode 100644 mmcv/mmcv/ops/csrc/deform_conv_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/deform_roi_pool_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/masked_conv2d_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/modulated_deform_conv_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/nms_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/parrots/bbox_overlaps.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/bbox_overlaps_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/carafe.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/carafe_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/carafe_naive.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/carafe_naive_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/cc_attention.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/cc_attention_cuda_kernel.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/corner_pool.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/deform_conv.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/deform_conv_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/deform_roi_pool.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/deform_roi_pool_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/focal_loss.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/focal_loss_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/masked_conv2d.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/masked_conv2d_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/nms.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/nms_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/parrots_cpp_helper.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/parrots_cuda_helper.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/psamask.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/psamask_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/roi_align.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/roi_align_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/roi_pool.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/roi_pool_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/sync_bn.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/sync_bn_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots/tin_shift.cpp create mode 100644 mmcv/mmcv/ops/csrc/parrots/tin_shift_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/parrots_cpp_helper.hpp create mode 100644 mmcv/mmcv/ops/csrc/parrots_cuda_helper.hpp create mode 100644 mmcv/mmcv/ops/csrc/parrots_cudawarpfunction.cuh create mode 100644 mmcv/mmcv/ops/csrc/psamask_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/pytorch/bbox_overlaps.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/bbox_overlaps_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/carafe.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/carafe_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/carafe_naive.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/carafe_naive_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/cc_attention.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/cc_attention_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/corner_pool.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/deform_conv.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/deform_conv_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/deform_roi_pool.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/deform_roi_pool_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/focal_loss.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/focal_loss_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/info.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/masked_conv2d.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/masked_conv2d_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/modulated_deform_conv_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/nms.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/nms_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/psamask.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/psamask_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/pybind.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/roi_align.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/roi_align_cpu.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/roi_align_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/roi_pool.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/roi_pool_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/sync_bn.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/sync_bn_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch/tin_shift.cpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch/tin_shift_cuda.cu create mode 100644 mmcv/mmcv/ops/csrc/pytorch_cpp_helper.hpp create mode 100644 mmcv/mmcv/ops/csrc/pytorch_cuda_helper.hpp create mode 100644 mmcv/mmcv/ops/csrc/roi_align_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/roi_pool_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/sigmoid_focal_loss_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/softmax_focal_loss_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/sync_bn_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/csrc/tin_shift_cuda_kernel.cuh create mode 100644 mmcv/mmcv/ops/deform_conv.py create mode 100644 mmcv/mmcv/ops/deform_roi_pool.py create mode 100644 mmcv/mmcv/ops/deprecated_wrappers.py create mode 100644 mmcv/mmcv/ops/focal_loss.py create mode 100644 mmcv/mmcv/ops/info.py create mode 100644 mmcv/mmcv/ops/masked_conv.py create mode 100644 mmcv/mmcv/ops/merge_cells.py create mode 100644 mmcv/mmcv/ops/modulated_deform_conv.py create mode 100644 mmcv/mmcv/ops/nms.py create mode 100644 mmcv/mmcv/ops/point_sample.py create mode 100644 mmcv/mmcv/ops/psa_mask.py create mode 100644 mmcv/mmcv/ops/roi_align.py create mode 100644 mmcv/mmcv/ops/roi_pool.py create mode 100644 mmcv/mmcv/ops/saconv.py create mode 100644 mmcv/mmcv/ops/sync_bn.py create mode 100644 mmcv/mmcv/ops/tin_shift.py create mode 100644 mmcv/mmcv/parallel/__init__.py create mode 100644 mmcv/mmcv/parallel/_functions.py create mode 100644 mmcv/mmcv/parallel/collate.py create mode 100644 mmcv/mmcv/parallel/data_container.py create mode 100644 mmcv/mmcv/parallel/data_parallel.py create mode 100644 mmcv/mmcv/parallel/distributed.py create mode 100644 mmcv/mmcv/parallel/distributed_deprecated.py create mode 100644 mmcv/mmcv/parallel/registry.py create mode 100644 mmcv/mmcv/parallel/scatter_gather.py create mode 100644 mmcv/mmcv/parallel/utils.py create mode 100644 mmcv/mmcv/runner/__init__.py create mode 100644 mmcv/mmcv/runner/base_runner.py create mode 100644 mmcv/mmcv/runner/builder.py create mode 100644 mmcv/mmcv/runner/checkpoint.py create mode 100644 mmcv/mmcv/runner/dist_utils.py create mode 100644 mmcv/mmcv/runner/epoch_based_runner.py create mode 100644 mmcv/mmcv/runner/fp16_utils.py create mode 100644 mmcv/mmcv/runner/hooks/__init__.py create mode 100644 mmcv/mmcv/runner/hooks/checkpoint.py create mode 100644 mmcv/mmcv/runner/hooks/closure.py create mode 100644 mmcv/mmcv/runner/hooks/ema.py create mode 100644 mmcv/mmcv/runner/hooks/hook.py create mode 100644 mmcv/mmcv/runner/hooks/iter_timer.py create mode 100644 mmcv/mmcv/runner/hooks/logger/__init__.py create mode 100644 mmcv/mmcv/runner/hooks/logger/base.py create mode 100644 mmcv/mmcv/runner/hooks/logger/mlflow.py create mode 100644 mmcv/mmcv/runner/hooks/logger/pavi.py create mode 100644 mmcv/mmcv/runner/hooks/logger/tensorboard.py create mode 100644 mmcv/mmcv/runner/hooks/logger/text.py create mode 100644 mmcv/mmcv/runner/hooks/logger/wandb.py create mode 100644 mmcv/mmcv/runner/hooks/lr_updater.py create mode 100644 mmcv/mmcv/runner/hooks/memory.py create mode 100644 mmcv/mmcv/runner/hooks/momentum_updater.py create mode 100644 mmcv/mmcv/runner/hooks/optimizer.py create mode 100644 mmcv/mmcv/runner/hooks/sampler_seed.py create mode 100644 mmcv/mmcv/runner/hooks/sync_buffer.py create mode 100644 mmcv/mmcv/runner/iter_based_runner.py create mode 100644 mmcv/mmcv/runner/log_buffer.py create mode 100644 mmcv/mmcv/runner/optimizer/__init__.py create mode 100644 mmcv/mmcv/runner/optimizer/builder.py create mode 100644 mmcv/mmcv/runner/optimizer/default_constructor.py create mode 100644 mmcv/mmcv/runner/priority.py create mode 100644 mmcv/mmcv/runner/utils.py create mode 100644 mmcv/mmcv/utils/__init__.py create mode 100644 mmcv/mmcv/utils/config.py create mode 100644 mmcv/mmcv/utils/env.py create mode 100644 mmcv/mmcv/utils/ext_loader.py create mode 100644 mmcv/mmcv/utils/logging.py create mode 100644 mmcv/mmcv/utils/misc.py create mode 100644 mmcv/mmcv/utils/parrots_wrapper.py create mode 100644 mmcv/mmcv/utils/path.py create mode 100644 mmcv/mmcv/utils/progressbar.py create mode 100644 mmcv/mmcv/utils/registry.py create mode 100644 mmcv/mmcv/utils/timer.py create mode 100644 mmcv/mmcv/utils/version_utils.py create mode 100644 mmcv/mmcv/version.py create mode 100644 mmcv/mmcv/video/__init__.py create mode 100644 mmcv/mmcv/video/io.py create mode 100644 mmcv/mmcv/video/optflow.py create mode 100644 mmcv/mmcv/video/optflow_warp/__init__.py create mode 100644 mmcv/mmcv/video/optflow_warp/flow_warp.cpp create mode 100644 mmcv/mmcv/video/optflow_warp/flow_warp.hpp create mode 100644 mmcv/mmcv/video/optflow_warp/flow_warp_module.cpp create mode 100644 mmcv/mmcv/video/optflow_warp/flow_warp_module.pyx create mode 100644 mmcv/mmcv/video/processing.py create mode 100644 mmcv/mmcv/visualization/__init__.py create mode 100644 mmcv/mmcv/visualization/color.py create mode 100644 mmcv/mmcv/visualization/image.py create mode 100644 mmcv/mmcv/visualization/optflow.py diff --git a/mmcv/mmcv/__init__.py b/mmcv/mmcv/__init__.py new file mode 100644 index 0000000..74ee044 --- /dev/null +++ b/mmcv/mmcv/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) Open-MMLab. All rights reserved. +# flake8: noqa +from .arraymisc import * +from .fileio import * +from .image import * +from .utils import * +from .version import * +from .video import * +from .visualization import * + +# The following modules are not imported to this level, so mmcv may be used +# without PyTorch. +# - runner +# - parallel +# - op diff --git a/mmcv/mmcv/arraymisc/__init__.py b/mmcv/mmcv/arraymisc/__init__.py new file mode 100644 index 0000000..2e3934c --- /dev/null +++ b/mmcv/mmcv/arraymisc/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .quantization import dequantize, quantize + +__all__ = ['quantize', 'dequantize'] diff --git a/mmcv/mmcv/arraymisc/quantization.py b/mmcv/mmcv/arraymisc/quantization.py new file mode 100644 index 0000000..47b6fa2 --- /dev/null +++ b/mmcv/mmcv/arraymisc/quantization.py @@ -0,0 +1,55 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import numpy as np + + +def quantize(arr, min_val, max_val, levels, dtype=np.int64): + """Quantize an array of (-inf, inf) to [0, levels-1]. + + Args: + arr (ndarray): Input array. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. + levels (int): Quantization levels. + dtype (np.type): The type of the quantized array. + + Returns: + tuple: Quantized array. + """ + if not (isinstance(levels, int) and levels > 1): + raise ValueError( + f'levels must be a positive integer, but got {levels}') + if min_val >= max_val: + raise ValueError( + f'min_val ({min_val}) must be smaller than max_val ({max_val})') + + arr = np.clip(arr, min_val, max_val) - min_val + quantized_arr = np.minimum( + np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1) + + return quantized_arr + + +def dequantize(arr, min_val, max_val, levels, dtype=np.float64): + """Dequantize an array. + + Args: + arr (ndarray): Input array. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. + levels (int): Quantization levels. + dtype (np.type): The type of the dequantized array. + + Returns: + tuple: Dequantized array. + """ + if not (isinstance(levels, int) and levels > 1): + raise ValueError( + f'levels must be a positive integer, but got {levels}') + if min_val >= max_val: + raise ValueError( + f'min_val ({min_val}) must be smaller than max_val ({max_val})') + + dequantized_arr = (arr + 0.5).astype(dtype) * (max_val - + min_val) / levels + min_val + + return dequantized_arr diff --git a/mmcv/mmcv/cnn/__init__.py b/mmcv/mmcv/cnn/__init__.py new file mode 100644 index 0000000..3c5fff7 --- /dev/null +++ b/mmcv/mmcv/cnn/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .alexnet import AlexNet +from .bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, + PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS, + ContextBlock, Conv2d, ConvAWS2d, ConvModule, + ConvTranspose2d, ConvWS2d, DepthwiseSeparableConvModule, + GeneralizedAttention, HSigmoid, HSwish, Linear, MaxPool2d, + NonLocal1d, NonLocal2d, NonLocal3d, Scale, Swish, + build_activation_layer, build_conv_layer, + build_norm_layer, build_padding_layer, build_plugin_layer, + build_upsample_layer, conv_ws_2d, is_norm) +from .resnet import ResNet, make_res_layer +from .utils import (bias_init_with_prob, caffe2_xavier_init, constant_init, + fuse_conv_bn, get_model_complexity_info, kaiming_init, + normal_init, uniform_init, xavier_init) +from .vgg import VGG, make_vgg_layer + +__all__ = [ + 'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer', + 'constant_init', 'xavier_init', 'normal_init', 'uniform_init', + 'kaiming_init', 'caffe2_xavier_init', 'bias_init_with_prob', 'ConvModule', + 'build_activation_layer', 'build_conv_layer', 'build_norm_layer', + 'build_padding_layer', 'build_upsample_layer', 'build_plugin_layer', + 'is_norm', 'NonLocal1d', 'NonLocal2d', 'NonLocal3d', 'ContextBlock', + 'HSigmoid', 'Swish', 'HSwish', 'GeneralizedAttention', 'ACTIVATION_LAYERS', + 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS', 'UPSAMPLE_LAYERS', + 'PLUGIN_LAYERS', 'Scale', 'get_model_complexity_info', 'conv_ws_2d', + 'ConvAWS2d', 'ConvWS2d', 'fuse_conv_bn', 'DepthwiseSeparableConvModule', + 'Linear', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d' +] diff --git a/mmcv/mmcv/cnn/alexnet.py b/mmcv/mmcv/cnn/alexnet.py new file mode 100644 index 0000000..4a5c7fb --- /dev/null +++ b/mmcv/mmcv/cnn/alexnet.py @@ -0,0 +1,62 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import logging + +import torch.nn as nn + +from ..runner import load_checkpoint + + +class AlexNet(nn.Module): + """AlexNet backbone. + + Args: + num_classes (int): number of classes for classification. + """ + + def __init__(self, num_classes=-1): + super(AlexNet, self).__init__() + self.num_classes = num_classes + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(64, 192, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + ) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + # use default initializer + pass + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + + x = self.features(x) + if self.num_classes > 0: + x = x.view(x.size(0), 256 * 6 * 6) + x = self.classifier(x) + + return x diff --git a/mmcv/mmcv/cnn/bricks/__init__.py b/mmcv/mmcv/cnn/bricks/__init__.py new file mode 100644 index 0000000..a685a6e --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/__init__.py @@ -0,0 +1,31 @@ +from .activation import build_activation_layer +from .context_block import ContextBlock +from .conv import build_conv_layer +from .conv2d_adaptive_padding import Conv2dAdaptivePadding +from .conv_module import ConvModule +from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d +from .depthwise_separable_conv_module import DepthwiseSeparableConvModule +from .generalized_attention import GeneralizedAttention +from .hsigmoid import HSigmoid +from .hswish import HSwish +from .non_local import NonLocal1d, NonLocal2d, NonLocal3d +from .norm import build_norm_layer, is_norm +from .padding import build_padding_layer +from .plugin import build_plugin_layer +from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, + PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS) +from .scale import Scale +from .swish import Swish +from .upsample import build_upsample_layer +from .wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d + +__all__ = [ + 'ConvModule', 'build_activation_layer', 'build_conv_layer', + 'build_norm_layer', 'build_padding_layer', 'build_upsample_layer', + 'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d', + 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention', + 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS', + 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d', + 'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear', + 'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d' +] diff --git a/mmcv/mmcv/cnn/bricks/activation.py b/mmcv/mmcv/cnn/bricks/activation.py new file mode 100644 index 0000000..66ffb92 --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/activation.py @@ -0,0 +1,24 @@ +import torch.nn as nn + +from mmcv.utils import build_from_cfg +from .registry import ACTIVATION_LAYERS + +for module in [ + nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU, + nn.Sigmoid, nn.Tanh +]: + ACTIVATION_LAYERS.register_module(module=module) + + +def build_activation_layer(cfg): + """Build activation layer. + + Args: + cfg (dict): The activation layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate an activation layer. + + Returns: + nn.Module: Created activation layer. + """ + return build_from_cfg(cfg, ACTIVATION_LAYERS) diff --git a/mmcv/mmcv/cnn/bricks/context_block.py b/mmcv/mmcv/cnn/bricks/context_block.py new file mode 100644 index 0000000..cb60abb --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/context_block.py @@ -0,0 +1,124 @@ +import torch +from torch import nn + +from ..utils import constant_init, kaiming_init +from .registry import PLUGIN_LAYERS + + +def last_zero_init(m): + if isinstance(m, nn.Sequential): + constant_init(m[-1], val=0) + else: + constant_init(m, val=0) + + +@PLUGIN_LAYERS.register_module() +class ContextBlock(nn.Module): + """ContextBlock module in GCNet. + + See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + (https://arxiv.org/abs/1904.11492) for details. + + Args: + in_channels (int): Channels of the input feature map. + ratio (float): Ratio of channels of transform bottleneck + pooling_type (str): Pooling method for context modeling. + Options are 'att' and 'avg', stand for attention pooling and + average pooling respectively. Default: 'att'. + fusion_types (Sequence[str]): Fusion method for feature fusion, + Options are 'channels_add', 'channel_mul', stand for channelwise + addition and multiplication respectively. Default: ('channel_add',) + """ + + _abbr_ = 'context_block' + + def __init__(self, + in_channels, + ratio, + pooling_type='att', + fusion_types=('channel_add', )): + super(ContextBlock, self).__init__() + assert pooling_type in ['avg', 'att'] + assert isinstance(fusion_types, (list, tuple)) + valid_fusion_types = ['channel_add', 'channel_mul'] + assert all([f in valid_fusion_types for f in fusion_types]) + assert len(fusion_types) > 0, 'at least one fusion should be used' + self.in_channels = in_channels + self.ratio = ratio + self.planes = int(in_channels * ratio) + self.pooling_type = pooling_type + self.fusion_types = fusion_types + if pooling_type == 'att': + self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1) + self.softmax = nn.Softmax(dim=2) + else: + self.avg_pool = nn.AdaptiveAvgPool2d(1) + if 'channel_add' in fusion_types: + self.channel_add_conv = nn.Sequential( + nn.Conv2d(self.in_channels, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) + else: + self.channel_add_conv = None + if 'channel_mul' in fusion_types: + self.channel_mul_conv = nn.Sequential( + nn.Conv2d(self.in_channels, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) + else: + self.channel_mul_conv = None + self.reset_parameters() + + def reset_parameters(self): + if self.pooling_type == 'att': + kaiming_init(self.conv_mask, mode='fan_in') + self.conv_mask.inited = True + + if self.channel_add_conv is not None: + last_zero_init(self.channel_add_conv) + if self.channel_mul_conv is not None: + last_zero_init(self.channel_mul_conv) + + def spatial_pool(self, x): + batch, channel, height, width = x.size() + if self.pooling_type == 'att': + input_x = x + # [N, C, H * W] + input_x = input_x.view(batch, channel, height * width) + # [N, 1, C, H * W] + input_x = input_x.unsqueeze(1) + # [N, 1, H, W] + context_mask = self.conv_mask(x) + # [N, 1, H * W] + context_mask = context_mask.view(batch, 1, height * width) + # [N, 1, H * W] + context_mask = self.softmax(context_mask) + # [N, 1, H * W, 1] + context_mask = context_mask.unsqueeze(-1) + # [N, 1, C, 1] + context = torch.matmul(input_x, context_mask) + # [N, C, 1, 1] + context = context.view(batch, channel, 1, 1) + else: + # [N, C, 1, 1] + context = self.avg_pool(x) + + return context + + def forward(self, x): + # [N, C, 1, 1] + context = self.spatial_pool(x) + + out = x + if self.channel_mul_conv is not None: + # [N, C, 1, 1] + channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) + out = out * channel_mul_term + if self.channel_add_conv is not None: + # [N, C, 1, 1] + channel_add_term = self.channel_add_conv(context) + out = out + channel_add_term + + return out diff --git a/mmcv/mmcv/cnn/bricks/conv.py b/mmcv/mmcv/cnn/bricks/conv.py new file mode 100644 index 0000000..e84cfed --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/conv.py @@ -0,0 +1,43 @@ +from torch import nn + +from .registry import CONV_LAYERS + +CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d) +CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d) +CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d) +CONV_LAYERS.register_module('Conv', module=nn.Conv2d) + + +def build_conv_layer(cfg, *args, **kwargs): + """Build convolution layer. + + Args: + cfg (None or dict): The conv layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate an activation layer. + args (argument list): Arguments passed to the `__init__` + method of the corresponding conv layer. + kwargs (keyword arguments): Keyword arguments passed to the `__init__` + method of the corresponding conv layer. + + Returns: + nn.Module: Created conv layer. + """ + if cfg is None: + cfg_ = dict(type='Conv2d') + else: + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in CONV_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') + else: + conv_layer = CONV_LAYERS.get(layer_type) + + layer = conv_layer(*args, **kwargs, **cfg_) + + return layer diff --git a/mmcv/mmcv/cnn/bricks/conv2d_adaptive_padding.py b/mmcv/mmcv/cnn/bricks/conv2d_adaptive_padding.py new file mode 100644 index 0000000..6b636b0 --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/conv2d_adaptive_padding.py @@ -0,0 +1,61 @@ +import math + +from torch import nn +from torch.nn import functional as F + +from .registry import CONV_LAYERS + + +@CONV_LAYERS.register_module() +class Conv2dAdaptivePadding(nn.Conv2d): + """Implementation of 2D convolution in tensorflow with `padding` as "same", + which applies padding to input (if needed) so that input image gets fully + covered by filter and stride you specified. For stride 1, this will ensure + that output image size is same as input. For stride of 2, output dimensions + will be half, for example. + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the + output. Default: ``True`` + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super().__init__(in_channels, out_channels, kernel_size, stride, 0, + dilation, groups, bias) + + def forward(self, x): + img_h, img_w = x.size()[-2:] + kernel_h, kernel_w = self.weight.size()[-2:] + stride_h, stride_w = self.stride + output_h = math.ceil(img_h / stride_h) + output_w = math.ceil(img_w / stride_w) + pad_h = ( + max((output_h - 1) * self.stride[0] + + (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0)) + pad_w = ( + max((output_w - 1) * self.stride[1] + + (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0)) + if pad_h > 0 or pad_w > 0: + x = F.pad(x, [ + pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 + ]) + return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups) diff --git a/mmcv/mmcv/cnn/bricks/conv_module.py b/mmcv/mmcv/cnn/bricks/conv_module.py new file mode 100644 index 0000000..d30c004 --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/conv_module.py @@ -0,0 +1,190 @@ +import warnings + +import torch.nn as nn + +from ..utils import constant_init, kaiming_init +from .activation import build_activation_layer +from .conv import build_conv_layer +from .norm import build_norm_layer +from .padding import build_padding_layer +from .registry import PLUGIN_LAYERS + + +@PLUGIN_LAYERS.register_module() +class ConvModule(nn.Module): + """A conv block that bundles conv/norm/activation layers. + + This block simplifies the usage of convolution layers, which are commonly + used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU). + It is based upon three build methods: `build_conv_layer()`, + `build_norm_layer()` and `build_activation_layer()`. + + Besides, we add some additional features in this module. + 1. Automatically set `bias` of the conv layer. + 2. Spectral norm is supported. + 3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only + supports zero and circular padding, and we add "reflect" padding mode. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int | tuple[int]): Same as nn.Conv2d. + stride (int | tuple[int]): Same as nn.Conv2d. + padding (int | tuple[int]): Same as nn.Conv2d. + dilation (int | tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + bias (bool | str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise + False. Default: "auto". + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU'). + inplace (bool): Whether to use inplace mode for activation. + Default: True. + with_spectral_norm (bool): Whether use spectral norm in conv module. + Default: False. + padding_mode (str): If the `padding_mode` has not been supported by + current `Conv2d` in PyTorch, we will use our own padding layer + instead. Currently, we support ['zeros', 'circular'] with official + implementation and ['reflect'] with our own implementation. + Default: 'zeros'. + order (tuple[str]): The order of conv/norm/activation layers. It is a + sequence of "conv", "norm" and "act". Common examples are + ("conv", "norm", "act") and ("act", "conv", "norm"). + Default: ('conv', 'norm', 'act'). + """ + + _abbr_ = 'conv_block' + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias='auto', + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + inplace=True, + with_spectral_norm=False, + padding_mode='zeros', + order=('conv', 'norm', 'act')): + super(ConvModule, self).__init__() + assert conv_cfg is None or isinstance(conv_cfg, dict) + assert norm_cfg is None or isinstance(norm_cfg, dict) + assert act_cfg is None or isinstance(act_cfg, dict) + official_padding_mode = ['zeros', 'circular'] + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.inplace = inplace + self.with_spectral_norm = with_spectral_norm + self.with_explicit_padding = padding_mode not in official_padding_mode + self.order = order + assert isinstance(self.order, tuple) and len(self.order) == 3 + assert set(order) == set(['conv', 'norm', 'act']) + + self.with_norm = norm_cfg is not None + self.with_activation = act_cfg is not None + # if the conv layer is before a norm layer, bias is unnecessary. + if bias == 'auto': + bias = not self.with_norm + self.with_bias = bias + + if self.with_norm and self.with_bias: + warnings.warn('ConvModule has norm and bias at the same time') + + if self.with_explicit_padding: + pad_cfg = dict(type=padding_mode) + self.padding_layer = build_padding_layer(pad_cfg, padding) + + # reset padding to 0 for conv module + conv_padding = 0 if self.with_explicit_padding else padding + # build convolution layer + self.conv = build_conv_layer( + conv_cfg, + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=conv_padding, + dilation=dilation, + groups=groups, + bias=bias) + # export the attributes of self.conv to a higher level for convenience + self.in_channels = self.conv.in_channels + self.out_channels = self.conv.out_channels + self.kernel_size = self.conv.kernel_size + self.stride = self.conv.stride + self.padding = padding + self.dilation = self.conv.dilation + self.transposed = self.conv.transposed + self.output_padding = self.conv.output_padding + self.groups = self.conv.groups + + if self.with_spectral_norm: + self.conv = nn.utils.spectral_norm(self.conv) + + # build normalization layers + if self.with_norm: + # norm layer is after conv layer + if order.index('norm') > order.index('conv'): + norm_channels = out_channels + else: + norm_channels = in_channels + self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels) + self.add_module(self.norm_name, norm) + + # build activation layer + if self.with_activation: + act_cfg_ = act_cfg.copy() + # nn.Tanh has no 'inplace' argument + if act_cfg_['type'] not in [ + 'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish' + ]: + act_cfg_.setdefault('inplace', inplace) + self.activate = build_activation_layer(act_cfg_) + + # Use msra init by default + self.init_weights() + + @property + def norm(self): + return getattr(self, self.norm_name) + + def init_weights(self): + # 1. It is mainly for customized conv layers with their own + # initialization manners, and we do not want ConvModule to + # overrides the initialization. + # 2. For customized conv layers without their own initialization + # manners, they will be initialized by this method with default + # `kaiming_init`. + # 3. For PyTorch's conv layers, they will be initialized anyway by + # their own `reset_parameters` methods. + if not hasattr(self.conv, 'init_weights'): + if self.with_activation and self.act_cfg['type'] == 'LeakyReLU': + nonlinearity = 'leaky_relu' + a = self.act_cfg.get('negative_slope', 0.01) + else: + nonlinearity = 'relu' + a = 0 + kaiming_init(self.conv, a=a, nonlinearity=nonlinearity) + if self.with_norm: + constant_init(self.norm, 1, bias=0) + + def forward(self, x, activate=True, norm=True): + for layer in self.order: + if layer == 'conv': + if self.with_explicit_padding: + x = self.padding_layer(x) + x = self.conv(x) + elif layer == 'norm' and norm and self.with_norm: + x = self.norm(x) + elif layer == 'act' and activate and self.with_activation: + x = self.activate(x) + return x diff --git a/mmcv/mmcv/cnn/bricks/conv_ws.py b/mmcv/mmcv/cnn/bricks/conv_ws.py new file mode 100644 index 0000000..5dea231 --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/conv_ws.py @@ -0,0 +1,147 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .registry import CONV_LAYERS + + +def conv_ws_2d(input, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + eps=1e-5): + c_in = weight.size(0) + weight_flat = weight.view(c_in, -1) + mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) + std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) + weight = (weight - mean) / (std + eps) + return F.conv2d(input, weight, bias, stride, padding, dilation, groups) + + +@CONV_LAYERS.register_module('ConvWS') +class ConvWS2d(nn.Conv2d): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + eps=1e-5): + super(ConvWS2d, self).__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.eps = eps + + def forward(self, x): + return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups, self.eps) + + +@CONV_LAYERS.register_module(name='ConvAWS') +class ConvAWS2d(nn.Conv2d): + """AWS (Adaptive Weight Standardization) + + This is a variant of Weight Standardization + (https://arxiv.org/pdf/1903.10520.pdf) + It is used in DetectoRS to avoid NaN + (https://arxiv.org/pdf/2006.02334.pdf) + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the conv kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If set True, adds a learnable bias to the + output. Default: True + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super().__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.register_buffer('weight_gamma', + torch.ones(self.out_channels, 1, 1, 1)) + self.register_buffer('weight_beta', + torch.zeros(self.out_channels, 1, 1, 1)) + + def _get_weight(self, weight): + weight_flat = weight.view(weight.size(0), -1) + mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) + std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) + weight = (weight - mean) / std + weight = self.weight_gamma * weight + self.weight_beta + return weight + + def forward(self, x): + weight = self._get_weight(self.weight) + return F.conv2d(x, weight, self.bias, self.stride, self.padding, + self.dilation, self.groups) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + """Override default load function. + + AWS overrides the function _load_from_state_dict to recover + weight_gamma and weight_beta if they are missing. If weight_gamma and + weight_beta are found in the checkpoint, this function will return + after super()._load_from_state_dict. Otherwise, it will compute the + mean and std of the pretrained weights and store them in weight_beta + and weight_gamma. + """ + + self.weight_gamma.data.fill_(-1) + local_missing_keys = [] + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, local_missing_keys, + unexpected_keys, error_msgs) + if self.weight_gamma.data.mean() > 0: + for k in local_missing_keys: + missing_keys.append(k) + return + weight = self.weight.data + weight_flat = weight.view(weight.size(0), -1) + mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) + std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) + self.weight_beta.data.copy_(mean) + self.weight_gamma.data.copy_(std) + missing_gamma_beta = [ + k for k in local_missing_keys + if k.endswith('weight_gamma') or k.endswith('weight_beta') + ] + for k in missing_gamma_beta: + local_missing_keys.remove(k) + for k in local_missing_keys: + missing_keys.append(k) diff --git a/mmcv/mmcv/cnn/bricks/depthwise_separable_conv_module.py b/mmcv/mmcv/cnn/bricks/depthwise_separable_conv_module.py new file mode 100644 index 0000000..4eaf34c --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/depthwise_separable_conv_module.py @@ -0,0 +1,89 @@ +import torch.nn as nn + +from .conv_module import ConvModule + + +class DepthwiseSeparableConvModule(nn.Module): + """Depthwise separable convolution module. + + See https://arxiv.org/pdf/1704.04861.pdf for details. + + This module can replace a ConvModule with the conv block replaced by two + conv block: depthwise conv block and pointwise conv block. The depthwise + conv block contains depthwise-conv/norm/activation layers. The pointwise + conv block contains pointwise-conv/norm/activation layers. It should be + noted that there will be norm/activation layer in the depthwise conv block + if `norm_cfg` and `act_cfg` are specified. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int or tuple[int]): Same as nn.Conv2d. Default: 1. + padding (int or tuple[int]): Same as nn.Conv2d. Default: 0. + dilation (int or tuple[int]): Same as nn.Conv2d. Default: 1. + norm_cfg (dict): Default norm config for both depthwise ConvModule and + pointwise ConvModule. Default: None. + act_cfg (dict): Default activation config for both depthwise ConvModule + and pointwise ConvModule. Default: dict(type='ReLU'). + dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is + 'default', it will be the same as `norm_cfg`. Default: 'default'. + dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: 'default'. + pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is + 'default', it will be the same as `norm_cfg`. Default: 'default'. + pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: 'default'. + kwargs (optional): Other shared arguments for depthwise and pointwise + ConvModule. See ConvModule for ref. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + dw_norm_cfg='default', + dw_act_cfg='default', + pw_norm_cfg='default', + pw_act_cfg='default', + **kwargs): + super(DepthwiseSeparableConvModule, self).__init__() + assert 'groups' not in kwargs, 'groups should not be specified' + + # if norm/activation config of depthwise/pointwise ConvModule is not + # specified, use default config. + dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg + dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg + pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg + pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg + + # depthwise convolution + self.depthwise_conv = ConvModule( + in_channels, + in_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=in_channels, + norm_cfg=dw_norm_cfg, + act_cfg=dw_act_cfg, + **kwargs) + + self.pointwise_conv = ConvModule( + in_channels, + out_channels, + 1, + norm_cfg=pw_norm_cfg, + act_cfg=pw_act_cfg, + **kwargs) + + def forward(self, x): + x = self.depthwise_conv(x) + x = self.pointwise_conv(x) + return x diff --git a/mmcv/mmcv/cnn/bricks/generalized_attention.py b/mmcv/mmcv/cnn/bricks/generalized_attention.py new file mode 100644 index 0000000..8a779bf --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/generalized_attention.py @@ -0,0 +1,405 @@ +import math + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import kaiming_init +from .registry import PLUGIN_LAYERS + + +@PLUGIN_LAYERS.register_module() +class GeneralizedAttention(nn.Module): + """GeneralizedAttention module. + + See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks' + (https://arxiv.org/abs/1711.07971) for details. + + Args: + in_channels (int): Channels of the input feature map. + spatial_range (int): The spatial range. -1 indicates no spatial range + constraint. Default: -1. + num_heads (int): The head number of empirical_attention module. + Default: 9. + position_embedding_dim (int): The position embedding dimension. + Default: -1. + position_magnitude (int): A multiplier acting on coord difference. + Default: 1. + kv_stride (int): The feature stride acting on key/value feature map. + Default: 2. + q_stride (int): The feature stride acting on query feature map. + Default: 1. + attention_type (str): A binary indicator string for indicating which + items in generalized empirical_attention module are used. + Default: '1111'. + + - '1000' indicates 'query and key content' (appr - appr) item, + - '0100' indicates 'query content and relative position' + (appr - position) item, + - '0010' indicates 'key content only' (bias - appr) item, + - '0001' indicates 'relative position only' (bias - position) item. + """ + + _abbr_ = 'gen_attention_block' + + def __init__(self, + in_channels, + spatial_range=-1, + num_heads=9, + position_embedding_dim=-1, + position_magnitude=1, + kv_stride=2, + q_stride=1, + attention_type='1111'): + + super(GeneralizedAttention, self).__init__() + + # hard range means local range for non-local operation + self.position_embedding_dim = ( + position_embedding_dim + if position_embedding_dim > 0 else in_channels) + + self.position_magnitude = position_magnitude + self.num_heads = num_heads + self.in_channels = in_channels + self.spatial_range = spatial_range + self.kv_stride = kv_stride + self.q_stride = q_stride + self.attention_type = [bool(int(_)) for _ in attention_type] + self.qk_embed_dim = in_channels // num_heads + out_c = self.qk_embed_dim * num_heads + + if self.attention_type[0] or self.attention_type[1]: + self.query_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_c, + kernel_size=1, + bias=False) + self.query_conv.kaiming_init = True + + if self.attention_type[0] or self.attention_type[2]: + self.key_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_c, + kernel_size=1, + bias=False) + self.key_conv.kaiming_init = True + + self.v_dim = in_channels // num_heads + self.value_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=self.v_dim * num_heads, + kernel_size=1, + bias=False) + self.value_conv.kaiming_init = True + + if self.attention_type[1] or self.attention_type[3]: + self.appr_geom_fc_x = nn.Linear( + self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_x.kaiming_init = True + + self.appr_geom_fc_y = nn.Linear( + self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_y.kaiming_init = True + + if self.attention_type[2]: + stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) + appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv + self.appr_bias = nn.Parameter(appr_bias_value) + + if self.attention_type[3]: + stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) + geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv + self.geom_bias = nn.Parameter(geom_bias_value) + + self.proj_conv = nn.Conv2d( + in_channels=self.v_dim * num_heads, + out_channels=in_channels, + kernel_size=1, + bias=True) + self.proj_conv.kaiming_init = True + self.gamma = nn.Parameter(torch.zeros(1)) + + if self.spatial_range >= 0: + # only works when non local is after 3*3 conv + if in_channels == 256: + max_len = 84 + elif in_channels == 512: + max_len = 42 + + max_len_kv = int((max_len - 1.0) / self.kv_stride + 1) + local_constraint_map = np.ones( + (max_len, max_len, max_len_kv, max_len_kv), dtype=np.int) + for iy in range(max_len): + for ix in range(max_len): + local_constraint_map[ + iy, ix, + max((iy - self.spatial_range) // + self.kv_stride, 0):min((iy + self.spatial_range + + 1) // self.kv_stride + + 1, max_len), + max((ix - self.spatial_range) // + self.kv_stride, 0):min((ix + self.spatial_range + + 1) // self.kv_stride + + 1, max_len)] = 0 + + self.local_constraint_map = nn.Parameter( + torch.from_numpy(local_constraint_map).byte(), + requires_grad=False) + + if self.q_stride > 1: + self.q_downsample = nn.AvgPool2d( + kernel_size=1, stride=self.q_stride) + else: + self.q_downsample = None + + if self.kv_stride > 1: + self.kv_downsample = nn.AvgPool2d( + kernel_size=1, stride=self.kv_stride) + else: + self.kv_downsample = None + + self.init_weights() + + def get_position_embedding(self, + h, + w, + h_kv, + w_kv, + q_stride, + kv_stride, + device, + feat_dim, + wave_length=1000): + h_idxs = torch.linspace(0, h - 1, h).to(device) + h_idxs = h_idxs.view((h, 1)) * q_stride + + w_idxs = torch.linspace(0, w - 1, w).to(device) + w_idxs = w_idxs.view((w, 1)) * q_stride + + h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to(device) + h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride + + w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to(device) + w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride + + # (h, h_kv, 1) + h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0) + h_diff *= self.position_magnitude + + # (w, w_kv, 1) + w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0) + w_diff *= self.position_magnitude + + feat_range = torch.arange(0, feat_dim / 4).to(device) + + dim_mat = torch.Tensor([wave_length]).to(device) + dim_mat = dim_mat**((4. / feat_dim) * feat_range) + dim_mat = dim_mat.view((1, 1, -1)) + + embedding_x = torch.cat( + ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2) + + embedding_y = torch.cat( + ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2) + + return embedding_x, embedding_y + + def forward(self, x_input): + num_heads = self.num_heads + + # use empirical_attention + if self.q_downsample is not None: + x_q = self.q_downsample(x_input) + else: + x_q = x_input + n, _, h, w = x_q.shape + + if self.kv_downsample is not None: + x_kv = self.kv_downsample(x_input) + else: + x_kv = x_input + _, _, h_kv, w_kv = x_kv.shape + + if self.attention_type[0] or self.attention_type[1]: + proj_query = self.query_conv(x_q).view( + (n, num_heads, self.qk_embed_dim, h * w)) + proj_query = proj_query.permute(0, 1, 3, 2) + + if self.attention_type[0] or self.attention_type[2]: + proj_key = self.key_conv(x_kv).view( + (n, num_heads, self.qk_embed_dim, h_kv * w_kv)) + + if self.attention_type[1] or self.attention_type[3]: + position_embed_x, position_embed_y = self.get_position_embedding( + h, w, h_kv, w_kv, self.q_stride, self.kv_stride, + x_input.device, self.position_embedding_dim) + # (n, num_heads, w, w_kv, dim) + position_feat_x = self.appr_geom_fc_x(position_embed_x).\ + view(1, w, w_kv, num_heads, self.qk_embed_dim).\ + permute(0, 3, 1, 2, 4).\ + repeat(n, 1, 1, 1, 1) + + # (n, num_heads, h, h_kv, dim) + position_feat_y = self.appr_geom_fc_y(position_embed_y).\ + view(1, h, h_kv, num_heads, self.qk_embed_dim).\ + permute(0, 3, 1, 2, 4).\ + repeat(n, 1, 1, 1, 1) + + position_feat_x /= math.sqrt(2) + position_feat_y /= math.sqrt(2) + + # accelerate for saliency only + if (np.sum(self.attention_type) == 1) and self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim).\ + repeat(n, 1, 1, 1) + + energy = torch.matmul(appr_bias, proj_key).\ + view(n, num_heads, 1, h_kv * w_kv) + + h = 1 + w = 1 + else: + # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for + if not self.attention_type[0]: + energy = torch.zeros( + n, + num_heads, + h, + w, + h_kv, + w_kv, + dtype=x_input.dtype, + device=x_input.device) + + # attention_type[0]: appr - appr + # attention_type[1]: appr - position + # attention_type[2]: bias - appr + # attention_type[3]: bias - position + if self.attention_type[0] or self.attention_type[2]: + if self.attention_type[0] and self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim) + energy = torch.matmul(proj_query + appr_bias, proj_key).\ + view(n, num_heads, h, w, h_kv, w_kv) + + elif self.attention_type[0]: + energy = torch.matmul(proj_query, proj_key).\ + view(n, num_heads, h, w, h_kv, w_kv) + + elif self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim).\ + repeat(n, 1, 1, 1) + + energy += torch.matmul(appr_bias, proj_key).\ + view(n, num_heads, 1, 1, h_kv, w_kv) + + if self.attention_type[1] or self.attention_type[3]: + if self.attention_type[1] and self.attention_type[3]: + geom_bias = self.geom_bias.\ + view(1, num_heads, 1, self.qk_embed_dim) + + proj_query_reshape = (proj_query + geom_bias).\ + view(n, num_heads, h, w, self.qk_embed_dim) + + energy_x = torch.matmul( + proj_query_reshape.permute(0, 1, 3, 2, 4), + position_feat_x.permute(0, 1, 2, 4, 3)) + energy_x = energy_x.\ + permute(0, 1, 3, 2, 4).unsqueeze(4) + + energy_y = torch.matmul( + proj_query_reshape, + position_feat_y.permute(0, 1, 2, 4, 3)) + energy_y = energy_y.unsqueeze(5) + + energy += energy_x + energy_y + + elif self.attention_type[1]: + proj_query_reshape = proj_query.\ + view(n, num_heads, h, w, self.qk_embed_dim) + proj_query_reshape = proj_query_reshape.\ + permute(0, 1, 3, 2, 4) + position_feat_x_reshape = position_feat_x.\ + permute(0, 1, 2, 4, 3) + position_feat_y_reshape = position_feat_y.\ + permute(0, 1, 2, 4, 3) + + energy_x = torch.matmul(proj_query_reshape, + position_feat_x_reshape) + energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) + + energy_y = torch.matmul(proj_query_reshape, + position_feat_y_reshape) + energy_y = energy_y.unsqueeze(5) + + energy += energy_x + energy_y + + elif self.attention_type[3]: + geom_bias = self.geom_bias.\ + view(1, num_heads, self.qk_embed_dim, 1).\ + repeat(n, 1, 1, 1) + + position_feat_x_reshape = position_feat_x.\ + view(n, num_heads, w*w_kv, self.qk_embed_dim) + + position_feat_y_reshape = position_feat_y.\ + view(n, num_heads, h * h_kv, self.qk_embed_dim) + + energy_x = torch.matmul(position_feat_x_reshape, geom_bias) + energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv) + + energy_y = torch.matmul(position_feat_y_reshape, geom_bias) + energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1) + + energy += energy_x + energy_y + + energy = energy.view(n, num_heads, h * w, h_kv * w_kv) + + if self.spatial_range >= 0: + cur_local_constraint_map = \ + self.local_constraint_map[:h, :w, :h_kv, :w_kv].\ + contiguous().\ + view(1, 1, h*w, h_kv*w_kv) + + energy = energy.masked_fill_(cur_local_constraint_map, + float('-inf')) + + attention = F.softmax(energy, 3) + + proj_value = self.value_conv(x_kv) + proj_value_reshape = proj_value.\ + view((n, num_heads, self.v_dim, h_kv * w_kv)).\ + permute(0, 1, 3, 2) + + out = torch.matmul(attention, proj_value_reshape).\ + permute(0, 1, 3, 2).\ + contiguous().\ + view(n, self.v_dim * self.num_heads, h, w) + + out = self.proj_conv(out) + + # output is downsampled, upsample back to input size + if self.q_downsample is not None: + out = F.interpolate( + out, + size=x_input.shape[2:], + mode='bilinear', + align_corners=False) + + out = self.gamma * out + x_input + return out + + def init_weights(self): + for m in self.modules(): + if hasattr(m, 'kaiming_init') and m.kaiming_init: + kaiming_init( + m, + mode='fan_in', + nonlinearity='leaky_relu', + bias=0, + distribution='uniform', + a=1) diff --git a/mmcv/mmcv/cnn/bricks/hsigmoid.py b/mmcv/mmcv/cnn/bricks/hsigmoid.py new file mode 100644 index 0000000..3edc229 --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/hsigmoid.py @@ -0,0 +1,21 @@ +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class HSigmoid(nn.Module): + """Hard Sigmoid Module. Apply the hard sigmoid function: + Hsigmoid(x) = min(max((x + 1) / 2, 0), 1) + + Returns: + Tensor: The output tensor. + """ + + def __init__(self): + super(HSigmoid, self).__init__() + + def forward(self, x): + x = (x + 1) / 2 + + return x.clamp_(0, 1) diff --git a/mmcv/mmcv/cnn/bricks/hswish.py b/mmcv/mmcv/cnn/bricks/hswish.py new file mode 100644 index 0000000..f1a22ad --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/hswish.py @@ -0,0 +1,28 @@ +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class HSwish(nn.Module): + """Hard Swish Module. + + This module applies the hard swish function: + + .. math:: + Hswish(x) = x * ReLU6(x + 3) / 6 + + Args: + inplace (bool): can optionally do the operation in-place. + Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, inplace=False): + super(HSwish, self).__init__() + self.act = nn.ReLU6(inplace) + + def forward(self, x): + return x * self.act(x + 3) / 6 diff --git a/mmcv/mmcv/cnn/bricks/non_local.py b/mmcv/mmcv/cnn/bricks/non_local.py new file mode 100644 index 0000000..3ee0656 --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/non_local.py @@ -0,0 +1,305 @@ +from abc import ABCMeta + +import torch +import torch.nn as nn + +from ..utils import constant_init, normal_init +from .conv_module import ConvModule +from .registry import PLUGIN_LAYERS + + +class _NonLocalNd(nn.Module, metaclass=ABCMeta): + """Basic Non-local module. + + This module is proposed in + "Non-local Neural Networks" + Paper reference: https://arxiv.org/abs/1711.07971 + Code reference: https://github.com/AlexHex7/Non-local_pytorch + + Args: + in_channels (int): Channels of the input feature map. + reduction (int): Channel reduction ratio. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + `1/sqrt(inter_channels)` when the mode is `embedded_gaussian`. + Default: True. + conv_cfg (None | dict): The config dict for convolution layers. + If not specified, it will use `nn.Conv2d` for convolution layers. + Default: None. + norm_cfg (None | dict): The config dict for normalization layers. + Default: None. (This parameter is only applicable to conv_out.) + mode (str): Options are `gaussian`, `concatenation`, + `embedded_gaussian` and `dot_product`. Default: embedded_gaussian. + """ + + def __init__(self, + in_channels, + reduction=2, + use_scale=True, + conv_cfg=None, + norm_cfg=None, + mode='embedded_gaussian', + **kwargs): + super(_NonLocalNd, self).__init__() + self.in_channels = in_channels + self.reduction = reduction + self.use_scale = use_scale + self.inter_channels = max(in_channels // reduction, 1) + self.mode = mode + + if mode not in [ + 'gaussian', 'embedded_gaussian', 'dot_product', 'concatenation' + ]: + raise ValueError("Mode should be in 'gaussian', 'concatenation', " + f"'embedded_gaussian' or 'dot_product', but got " + f'{mode} instead.') + + # g, theta, phi are defaulted as `nn.ConvNd`. + # Here we use ConvModule for potential usage. + self.g = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + self.conv_out = ConvModule( + self.inter_channels, + self.in_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + if self.mode != 'gaussian': + self.theta = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + self.phi = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + + if self.mode == 'concatenation': + self.concat_project = ConvModule( + self.inter_channels * 2, + 1, + kernel_size=1, + stride=1, + padding=0, + bias=False, + act_cfg=dict(type='ReLU')) + + self.init_weights(**kwargs) + + def init_weights(self, std=0.01, zeros_init=True): + if self.mode != 'gaussian': + for m in [self.g, self.theta, self.phi]: + normal_init(m.conv, std=std) + else: + normal_init(self.g.conv, std=std) + if zeros_init: + if self.conv_out.norm_cfg is None: + constant_init(self.conv_out.conv, 0) + else: + constant_init(self.conv_out.norm, 0) + else: + if self.conv_out.norm_cfg is None: + normal_init(self.conv_out.conv, std=std) + else: + normal_init(self.conv_out.norm, std=std) + + def gaussian(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def embedded_gaussian(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + if self.use_scale: + # theta_x.shape[-1] is `self.inter_channels` + pairwise_weight /= theta_x.shape[-1]**0.5 + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def dot_product(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + pairwise_weight /= pairwise_weight.shape[-1] + return pairwise_weight + + def concatenation(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + h = theta_x.size(2) + w = phi_x.size(3) + theta_x = theta_x.repeat(1, 1, 1, w) + phi_x = phi_x.repeat(1, 1, h, 1) + + concat_feature = torch.cat([theta_x, phi_x], dim=1) + pairwise_weight = self.concat_project(concat_feature) + n, _, h, w = pairwise_weight.size() + pairwise_weight = pairwise_weight.view(n, h, w) + pairwise_weight /= pairwise_weight.shape[-1] + + return pairwise_weight + + def forward(self, x): + # Assume `reduction = 1`, then `inter_channels = C` + # or `inter_channels = C` when `mode="gaussian"` + + # NonLocal1d x: [N, C, H] + # NonLocal2d x: [N, C, H, W] + # NonLocal3d x: [N, C, T, H, W] + n = x.size(0) + + # NonLocal1d g_x: [N, H, C] + # NonLocal2d g_x: [N, HxW, C] + # NonLocal3d g_x: [N, TxHxW, C] + g_x = self.g(x).view(n, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + # NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H] + # NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW] + # NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW] + if self.mode == 'gaussian': + theta_x = x.view(n, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + if self.sub_sample: + phi_x = self.phi(x).view(n, self.in_channels, -1) + else: + phi_x = x.view(n, self.in_channels, -1) + elif self.mode == 'concatenation': + theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) + else: + theta_x = self.theta(x).view(n, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(n, self.inter_channels, -1) + + pairwise_func = getattr(self, self.mode) + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = pairwise_func(theta_x, phi_x) + + # NonLocal1d y: [N, H, C] + # NonLocal2d y: [N, HxW, C] + # NonLocal3d y: [N, TxHxW, C] + y = torch.matmul(pairwise_weight, g_x) + # NonLocal1d y: [N, C, H] + # NonLocal2d y: [N, C, H, W] + # NonLocal3d y: [N, C, T, H, W] + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, + *x.size()[2:]) + + output = x + self.conv_out(y) + + return output + + +class NonLocal1d(_NonLocalNd): + """1D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv1d'). + """ + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv1d'), + **kwargs): + super(NonLocal1d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool1d(kernel_size=2) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer + + +@PLUGIN_LAYERS.register_module() +class NonLocal2d(_NonLocalNd): + """2D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv2d'). + """ + + _abbr_ = 'nonlocal_block' + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv2d'), + **kwargs): + super(NonLocal2d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer + + +class NonLocal3d(_NonLocalNd): + """3D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv3d'). + """ + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv3d'), + **kwargs): + super(NonLocal3d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer diff --git a/mmcv/mmcv/cnn/bricks/norm.py b/mmcv/mmcv/cnn/bricks/norm.py new file mode 100644 index 0000000..154ba1c --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/norm.py @@ -0,0 +1,143 @@ +import inspect + +import torch.nn as nn + +from mmcv.utils import is_tuple_of +from mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm, _InstanceNorm +from .registry import NORM_LAYERS + +NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d) +NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d) +NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm) +NORM_LAYERS.register_module('GN', module=nn.GroupNorm) +NORM_LAYERS.register_module('LN', module=nn.LayerNorm) +NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d) +NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d) + + +def infer_abbr(class_type): + """Infer abbreviation from the class name. + + When we build a norm layer with `build_norm_layer()`, we want to preserve + the norm type in variable names, e.g, self.bn1, self.gn. This method will + infer the abbreviation to map class types to abbreviations. + + Rule 1: If the class has the property "_abbr_", return the property. + Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or + InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and + "in" respectively. + Rule 3: If the class name contains "batch", "group", "layer" or "instance", + the abbreviation of this layer will be "bn", "gn", "ln" and "in" + respectively. + Rule 4: Otherwise, the abbreviation falls back to "norm". + + Args: + class_type (type): The norm layer type. + + Returns: + str: The inferred abbreviation. + """ + if not inspect.isclass(class_type): + raise TypeError( + f'class_type must be a type, but got {type(class_type)}') + if hasattr(class_type, '_abbr_'): + return class_type._abbr_ + if issubclass(class_type, _InstanceNorm): # IN is a subclass of BN + return 'in' + elif issubclass(class_type, _BatchNorm): + return 'bn' + elif issubclass(class_type, nn.GroupNorm): + return 'gn' + elif issubclass(class_type, nn.LayerNorm): + return 'ln' + else: + class_name = class_type.__name__.lower() + if 'batch' in class_name: + return 'bn' + elif 'group' in class_name: + return 'gn' + elif 'layer' in class_name: + return 'ln' + elif 'instance' in class_name: + return 'in' + else: + return 'norm' + + +def build_norm_layer(cfg, num_features, postfix=''): + """Build normalization layer. + + Args: + cfg (dict): The norm layer config, which should contain: + + - type (str): Layer type. + - layer args: Args needed to instantiate a norm layer. + - requires_grad (bool, optional): Whether stop gradient updates. + num_features (int): Number of input channels. + postfix (int | str): The postfix to be appended into norm abbreviation + to create named layer. + + Returns: + (str, nn.Module): The first element is the layer name consisting of + abbreviation and postfix, e.g., bn1, gn. The second element is the + created norm layer. + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in NORM_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') + + norm_layer = NORM_LAYERS.get(layer_type) + abbr = infer_abbr(norm_layer) + + assert isinstance(postfix, (int, str)) + name = abbr + str(postfix) + + requires_grad = cfg_.pop('requires_grad', True) + cfg_.setdefault('eps', 1e-5) + if layer_type != 'GN': + layer = norm_layer(num_features, **cfg_) + if layer_type == 'SyncBN': + layer._specify_ddp_gpu_num(1) + else: + assert 'num_groups' in cfg_ + layer = norm_layer(num_channels=num_features, **cfg_) + + for param in layer.parameters(): + param.requires_grad = requires_grad + + return name, layer + + +def is_norm(layer, exclude=None): + """Check if a layer is a normalization layer. + + Args: + layer (nn.Module): The layer to be checked. + exclude (type | tuple[type]): Types to be excluded. + + Returns: + bool: Whether the layer is a norm layer. + """ + if exclude is not None: + if not isinstance(exclude, tuple): + exclude = (exclude, ) + if not is_tuple_of(exclude, type): + raise TypeError( + f'"exclude" must be either None or type or a tuple of types, ' + f'but got {type(exclude)}: {exclude}') + + if exclude and isinstance(layer, exclude): + return False + + all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm) + return isinstance(layer, all_norm_bases) diff --git a/mmcv/mmcv/cnn/bricks/padding.py b/mmcv/mmcv/cnn/bricks/padding.py new file mode 100644 index 0000000..b7e8212 --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/padding.py @@ -0,0 +1,35 @@ +import torch.nn as nn + +from .registry import PADDING_LAYERS + +PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d) +PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d) +PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d) + + +def build_padding_layer(cfg, *args, **kwargs): + """Build padding layer. + + Args: + cfg (None or dict): The padding layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate a padding layer. + + Returns: + nn.Module: Created padding layer. + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + + cfg_ = cfg.copy() + padding_type = cfg_.pop('type') + if padding_type not in PADDING_LAYERS: + raise KeyError(f'Unrecognized padding type {padding_type}.') + else: + padding_layer = PADDING_LAYERS.get(padding_type) + + layer = padding_layer(*args, **kwargs, **cfg_) + + return layer diff --git a/mmcv/mmcv/cnn/bricks/plugin.py b/mmcv/mmcv/cnn/bricks/plugin.py new file mode 100644 index 0000000..07c010d --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/plugin.py @@ -0,0 +1,88 @@ +import inspect +import platform + +from .registry import PLUGIN_LAYERS + +if platform.system() == 'Windows': + import regex as re +else: + import re + + +def infer_abbr(class_type): + """Infer abbreviation from the class name. + + This method will infer the abbreviation to map class types to + abbreviations. + + Rule 1: If the class has the property "abbr", return the property. + Rule 2: Otherwise, the abbreviation falls back to snake case of class + name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``. + + Args: + class_type (type): The norm layer type. + + Returns: + str: The inferred abbreviation. + """ + + def camel2snack(word): + """Convert camel case word into snack case. + + Modified from `inflection lib + `_. + + Example:: + + >>> camel2snack("FancyBlock") + 'fancy_block' + """ + + word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word) + word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word) + word = word.replace('-', '_') + return word.lower() + + if not inspect.isclass(class_type): + raise TypeError( + f'class_type must be a type, but got {type(class_type)}') + if hasattr(class_type, '_abbr_'): + return class_type._abbr_ + else: + return camel2snack(class_type.__name__) + + +def build_plugin_layer(cfg, postfix='', **kwargs): + """Build plugin layer. + + Args: + cfg (None or dict): cfg should contain: + type (str): identify plugin layer type. + layer args: args needed to instantiate a plugin layer. + postfix (int, str): appended into norm abbreviation to + create named layer. Default: ''. + + Returns: + tuple[str, nn.Module]: + name (str): abbreviation + postfix + layer (nn.Module): created plugin layer + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in PLUGIN_LAYERS: + raise KeyError(f'Unrecognized plugin type {layer_type}') + + plugin_layer = PLUGIN_LAYERS.get(layer_type) + abbr = infer_abbr(plugin_layer) + + assert isinstance(postfix, (int, str)) + name = abbr + str(postfix) + + layer = plugin_layer(**kwargs, **cfg_) + + return name, layer diff --git a/mmcv/mmcv/cnn/bricks/registry.py b/mmcv/mmcv/cnn/bricks/registry.py new file mode 100644 index 0000000..368db79 --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/registry.py @@ -0,0 +1,8 @@ +from mmcv.utils import Registry + +CONV_LAYERS = Registry('conv layer') +NORM_LAYERS = Registry('norm layer') +ACTIVATION_LAYERS = Registry('activation layer') +PADDING_LAYERS = Registry('padding layer') +UPSAMPLE_LAYERS = Registry('upsample layer') +PLUGIN_LAYERS = Registry('plugin layer') diff --git a/mmcv/mmcv/cnn/bricks/scale.py b/mmcv/mmcv/cnn/bricks/scale.py new file mode 100644 index 0000000..be7109b --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/scale.py @@ -0,0 +1,20 @@ +import torch +import torch.nn as nn + + +class Scale(nn.Module): + """A learnable scale parameter. + + This layer scales the input by a learnable factor. It multiplies a + learnable scale parameter of shape (1,) with input of any shape. + + Args: + scale (float): Initial value of scale factor. Default: 1.0 + """ + + def __init__(self, scale=1.0): + super(Scale, self).__init__() + self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) + + def forward(self, x): + return x * self.scale diff --git a/mmcv/mmcv/cnn/bricks/swish.py b/mmcv/mmcv/cnn/bricks/swish.py new file mode 100644 index 0000000..f396dc5 --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/swish.py @@ -0,0 +1,24 @@ +import torch +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class Swish(nn.Module): + """Swish Module. + + This module applies the swish function: + + .. math:: + Swish(x) = x * Sigmoid(x) + + Returns: + Tensor: The output tensor. + """ + + def __init__(self): + super(Swish, self).__init__() + + def forward(self, x): + return x * torch.sigmoid(x) diff --git a/mmcv/mmcv/cnn/bricks/upsample.py b/mmcv/mmcv/cnn/bricks/upsample.py new file mode 100644 index 0000000..c1388c3 --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/upsample.py @@ -0,0 +1,83 @@ +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import xavier_init +from .registry import UPSAMPLE_LAYERS + +UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample) +UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample) + + +@UPSAMPLE_LAYERS.register_module(name='pixel_shuffle') +class PixelShufflePack(nn.Module): + """Pixel Shuffle upsample layer. + + This module packs `F.pixel_shuffle()` and a nn.Conv2d module together to + achieve a simple upsampling with pixel shuffle. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + scale_factor (int): Upsample ratio. + upsample_kernel (int): Kernel size of the conv layer to expand the + channels. + """ + + def __init__(self, in_channels, out_channels, scale_factor, + upsample_kernel): + super(PixelShufflePack, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.scale_factor = scale_factor + self.upsample_kernel = upsample_kernel + self.upsample_conv = nn.Conv2d( + self.in_channels, + self.out_channels * scale_factor * scale_factor, + self.upsample_kernel, + padding=(self.upsample_kernel - 1) // 2) + self.init_weights() + + def init_weights(self): + xavier_init(self.upsample_conv, distribution='uniform') + + def forward(self, x): + x = self.upsample_conv(x) + x = F.pixel_shuffle(x, self.scale_factor) + return x + + +def build_upsample_layer(cfg, *args, **kwargs): + """Build upsample layer. + + Args: + cfg (dict): The upsample layer config, which should contain: + + - type (str): Layer type. + - scale_factor (int): Upsample ratio, which is not applicable to + deconv. + - layer args: Args needed to instantiate a upsample layer. + args (argument list): Arguments passed to the ``__init__`` + method of the corresponding conv layer. + kwargs (keyword arguments): Keyword arguments passed to the + ``__init__`` method of the corresponding conv layer. + + Returns: + nn.Module: Created upsample layer. + """ + if not isinstance(cfg, dict): + raise TypeError(f'cfg must be a dict, but got {type(cfg)}') + if 'type' not in cfg: + raise KeyError( + f'the cfg dict must contain the key "type", but got {cfg}') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in UPSAMPLE_LAYERS: + raise KeyError(f'Unrecognized upsample type {layer_type}') + else: + upsample = UPSAMPLE_LAYERS.get(layer_type) + + if upsample is nn.Upsample: + cfg_['mode'] = layer_type + layer = upsample(*args, **kwargs, **cfg_) + return layer diff --git a/mmcv/mmcv/cnn/bricks/wrappers.py b/mmcv/mmcv/cnn/bricks/wrappers.py new file mode 100644 index 0000000..899e816 --- /dev/null +++ b/mmcv/mmcv/cnn/bricks/wrappers.py @@ -0,0 +1,116 @@ +r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py # noqa: E501 + +Wrap some nn modules to support empty tensor input. Currently, these wrappers +are mainly used in mask heads like fcn_mask_head and maskiou_heads since mask +heads are trained on only positive RoIs. +""" +import math + +import torch +import torch.nn as nn +from torch.nn.modules.utils import _pair + +from .registry import CONV_LAYERS, UPSAMPLE_LAYERS + +if torch.__version__ == 'parrots': + TORCH_VERSION = torch.__version__ +else: + # torch.__version__ could be 1.3.1+cu92, we only need the first two + # for comparison + TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2]) + + +def obsolete_torch_version(torch_version, version_threshold): + return torch_version == 'parrots' or torch_version <= version_threshold + + +class NewEmptyTensorOp(torch.autograd.Function): + + @staticmethod + def forward(ctx, x, new_shape): + ctx.shape = x.shape + return x.new_empty(new_shape) + + @staticmethod + def backward(ctx, grad): + shape = ctx.shape + return NewEmptyTensorOp.apply(grad, shape), None + + +@CONV_LAYERS.register_module('Conv', force=True) +class Conv2d(nn.Conv2d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size, + self.padding, self.stride, self.dilation): + o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module() +@CONV_LAYERS.register_module('deconv') +@UPSAMPLE_LAYERS.register_module('deconv', force=True) +class ConvTranspose2d(nn.ConvTranspose2d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size, + self.padding, self.stride, + self.dilation, self.output_padding): + out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super(ConvTranspose2d, self).forward(x) + + +class MaxPool2d(nn.MaxPool2d): + + def forward(self, x): + # PyTorch 1.7 does not support empty tensor inference yet + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 7)): + out_shape = list(x.shape[:2]) + for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size), + _pair(self.padding), _pair(self.stride), + _pair(self.dilation)): + o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 + o = math.ceil(o) if self.ceil_mode else math.floor(o) + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + return empty + + return super().forward(x) + + +class Linear(torch.nn.Linear): + + def forward(self, x): + # empty tensor forward of Linear layer is supported in Pytorch 1.6 + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)): + out_shape = [x.shape[0], self.out_features] + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) diff --git a/mmcv/mmcv/cnn/resnet.py b/mmcv/mmcv/cnn/resnet.py new file mode 100644 index 0000000..e0129ea --- /dev/null +++ b/mmcv/mmcv/cnn/resnet.py @@ -0,0 +1,316 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import logging + +import torch.nn as nn +import torch.utils.checkpoint as cp + +from ..runner import load_checkpoint +from .utils import constant_init, kaiming_init + + +def conv3x3(in_planes, out_planes, stride=1, dilation=1): + """3x3 convolution with padding.""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False): + super(BasicBlock, self).__init__() + assert style in ['pytorch', 'caffe'] + self.conv1 = conv3x3(inplanes, planes, stride, dilation) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + assert not with_cp + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False): + """Bottleneck block. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if + it is "caffe", the stride-two layer is the first 1x1 conv layer. + """ + super(Bottleneck, self).__init__() + assert style in ['pytorch', 'caffe'] + if style == 'pytorch': + conv1_stride = 1 + conv2_stride = stride + else: + conv1_stride = stride + conv2_stride = 1 + self.conv1 = nn.Conv2d( + inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False) + self.conv2 = nn.Conv2d( + planes, + planes, + kernel_size=3, + stride=conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.bn1 = nn.BatchNorm2d(planes) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + def forward(self, x): + + def _inner_forward(x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +def make_res_layer(block, + inplanes, + planes, + blocks, + stride=1, + dilation=1, + style='pytorch', + with_cp=False): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append( + block( + inplanes, + planes, + stride, + dilation, + downsample, + style=style, + with_cp=with_cp)) + inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append( + block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) + + return nn.Sequential(*layers) + + +class ResNet(nn.Module): + """ResNet backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + num_stages (int): Resnet stages, normally 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze + running stats (mean and var). + bn_frozen (bool): Whether to freeze weight and bias of BN layers. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + """ + + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, + depth, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + with_cp=False): + super(ResNet, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + assert num_stages >= 1 and num_stages <= 4 + block, stage_blocks = self.arch_settings[depth] + stage_blocks = stage_blocks[:num_stages] + assert len(strides) == len(dilations) == num_stages + assert max(out_indices) < num_stages + + self.out_indices = out_indices + self.style = style + self.frozen_stages = frozen_stages + self.bn_eval = bn_eval + self.bn_frozen = bn_frozen + self.with_cp = with_cp + + self.inplanes = 64 + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.res_layers = [] + for i, num_blocks in enumerate(stage_blocks): + stride = strides[i] + dilation = dilations[i] + planes = 64 * 2**i + res_layer = make_res_layer( + block, + self.inplanes, + planes, + num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + with_cp=with_cp) + self.inplanes = planes * block.expansion + layer_name = f'layer{i + 1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(ResNet, self).train(mode) + if self.bn_eval: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if self.bn_frozen: + for params in m.parameters(): + params.requires_grad = False + if mode and self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for param in self.bn1.parameters(): + param.requires_grad = False + self.bn1.eval() + self.bn1.weight.requires_grad = False + self.bn1.bias.requires_grad = False + for i in range(1, self.frozen_stages + 1): + mod = getattr(self, f'layer{i}') + mod.eval() + for param in mod.parameters(): + param.requires_grad = False diff --git a/mmcv/mmcv/cnn/utils/__init__.py b/mmcv/mmcv/cnn/utils/__init__.py new file mode 100644 index 0000000..fc96dfc --- /dev/null +++ b/mmcv/mmcv/cnn/utils/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .flops_counter import get_model_complexity_info +from .fuse_conv_bn import fuse_conv_bn +from .weight_init import (bias_init_with_prob, caffe2_xavier_init, + constant_init, kaiming_init, normal_init, + uniform_init, xavier_init) + +__all__ = [ + 'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init', + 'constant_init', 'kaiming_init', 'normal_init', 'uniform_init', + 'xavier_init', 'fuse_conv_bn' +] diff --git a/mmcv/mmcv/cnn/utils/flops_counter.py b/mmcv/mmcv/cnn/utils/flops_counter.py new file mode 100644 index 0000000..d783bb4 --- /dev/null +++ b/mmcv/mmcv/cnn/utils/flops_counter.py @@ -0,0 +1,583 @@ +# Modified from flops-counter.pytorch by Vladislav Sovrasov +# original repo: https://github.com/sovrasov/flops-counter.pytorch + +# MIT License + +# Copyright (c) 2018 Vladislav Sovrasov + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import sys +from functools import partial + +import numpy as np +import torch +import torch.nn as nn + + +def get_model_complexity_info(model, + input_shape, + print_per_layer_stat=True, + as_strings=True, + input_constructor=None, + flush=False, + ost=sys.stdout): + """Get complexity information of a model. + + This method can calculate FLOPs and parameter counts of a model with + corresponding input shape. It can also print complexity information for + each layer in a model. + + Supported layers are listed as below: + - Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``. + - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``, ``nn.LeakyReLU``, + ``nn.ReLU6``. + - Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``, + ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``, + ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``, + ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``, + ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``. + - BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``, + ``nn.BatchNorm3d``. + - Linear: ``nn.Linear``. + - Deconvolution: ``nn.ConvTranspose2d``. + - Upsample: ``nn.Upsample``. + + Args: + model (nn.Module): The model for complexity calculation. + input_shape (tuple): Input shape used for calculation. + print_per_layer_stat (bool): Whether to print complexity information + for each layer in a model. Default: True. + as_strings (bool): Output FLOPs and params counts in a string form. + Default: True. + input_constructor (None | callable): If specified, it takes a callable + method that generates input. otherwise, it will generate a random + tensor with input shape to calculate FLOPs. Default: None. + flush (bool): same as that in :func:`print`. Default: False. + ost (stream): same as ``file`` param in :func:`print`. + Default: sys.stdout. + + Returns: + tuple[float | str]: If ``as_strings`` is set to True, it will return + FLOPs and parameter counts in a string format. otherwise, it will + return those in a float number format. + """ + assert type(input_shape) is tuple + assert len(input_shape) >= 1 + assert isinstance(model, nn.Module) + flops_model = add_flops_counting_methods(model) + flops_model.eval() + flops_model.start_flops_count() + if input_constructor: + input = input_constructor(input_shape) + _ = flops_model(**input) + else: + try: + batch = torch.ones(()).new_empty( + (1, *input_shape), + dtype=next(flops_model.parameters()).dtype, + device=next(flops_model.parameters()).device) + except StopIteration: + # Avoid StopIteration for models which have no parameters, + # like `nn.Relu()`, `nn.AvgPool2d`, etc. + batch = torch.ones(()).new_empty((1, *input_shape)) + + _ = flops_model(batch) + + flops_count, params_count = flops_model.compute_average_flops_cost() + if print_per_layer_stat: + print_model_with_flops( + flops_model, flops_count, params_count, ost=ost, flush=flush) + flops_model.stop_flops_count() + + if as_strings: + return flops_to_string(flops_count), params_to_string(params_count) + + return flops_count, params_count + + +def flops_to_string(flops, units='GFLOPs', precision=2): + """Convert FLOPs number into a string. + + Note that Here we take a multiply-add counts as one FLOP. + + Args: + flops (float): FLOPs number to be converted. + units (str | None): Converted FLOPs units. Options are None, 'GFLOPs', + 'MFLOPs', 'KFLOPs', 'FLOPs'. If set to None, it will automatically + choose the most suitable unit for FLOPs. Default: 'GFLOPs'. + precision (int): Digit number after the decimal point. Default: 2. + + Returns: + str: The converted FLOPs number with units. + + Examples: + >>> flops_to_string(1e9) + '1.0 GFLOPs' + >>> flops_to_string(2e5, 'MFLOPs') + '0.2 MFLOPs' + >>> flops_to_string(3e-9, None) + '3e-09 FLOPs' + """ + if units is None: + if flops // 10**9 > 0: + return str(round(flops / 10.**9, precision)) + ' GFLOPs' + elif flops // 10**6 > 0: + return str(round(flops / 10.**6, precision)) + ' MFLOPs' + elif flops // 10**3 > 0: + return str(round(flops / 10.**3, precision)) + ' KFLOPs' + else: + return str(flops) + ' FLOPs' + else: + if units == 'GFLOPs': + return str(round(flops / 10.**9, precision)) + ' ' + units + elif units == 'MFLOPs': + return str(round(flops / 10.**6, precision)) + ' ' + units + elif units == 'KFLOPs': + return str(round(flops / 10.**3, precision)) + ' ' + units + else: + return str(flops) + ' FLOPs' + + +def params_to_string(num_params, units=None, precision=2): + """Convert parameter number into a string. + + Args: + num_params (float): Parameter number to be converted. + units (str | None): Converted FLOPs units. Options are None, 'M', + 'K' and ''. If set to None, it will automatically choose the most + suitable unit for Parameter number. Default: None. + precision (int): Digit number after the decimal point. Default: 2. + + Returns: + str: The converted parameter number with units. + + Examples: + >>> params_to_string(1e9) + '1000.0 M' + >>> params_to_string(2e5) + '200.0 k' + >>> params_to_string(3e-9) + '3e-09' + """ + if units is None: + if num_params // 10**6 > 0: + return str(round(num_params / 10**6, precision)) + ' M' + elif num_params // 10**3: + return str(round(num_params / 10**3, precision)) + ' k' + else: + return str(num_params) + else: + if units == 'M': + return str(round(num_params / 10.**6, precision)) + ' ' + units + elif units == 'K': + return str(round(num_params / 10.**3, precision)) + ' ' + units + else: + return str(num_params) + + +def print_model_with_flops(model, + total_flops, + total_params, + units='GFLOPs', + precision=3, + ost=sys.stdout, + flush=False): + """Print a model with FLOPs for each layer. + + Args: + model (nn.Module): The model to be printed. + total_flops (float): Total FLOPs of the model. + total_params (float): Total parameter counts of the model. + units (str | None): Converted FLOPs units. Default: 'GFLOPs'. + precision (int): Digit number after the decimal point. Default: 3. + ost (stream): same as `file` param in :func:`print`. + Default: sys.stdout. + flush (bool): same as that in :func:`print`. Default: False. + + Example: + >>> class ExampleModel(nn.Module): + + >>> def __init__(self): + >>> super().__init__() + >>> self.conv1 = nn.Conv2d(3, 8, 3) + >>> self.conv2 = nn.Conv2d(8, 256, 3) + >>> self.conv3 = nn.Conv2d(256, 8, 3) + >>> self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) + >>> self.flatten = nn.Flatten() + >>> self.fc = nn.Linear(8, 1) + + >>> def forward(self, x): + >>> x = self.conv1(x) + >>> x = self.conv2(x) + >>> x = self.conv3(x) + >>> x = self.avg_pool(x) + >>> x = self.flatten(x) + >>> x = self.fc(x) + >>> return x + + >>> model = ExampleModel() + >>> x = (3, 16, 16) + to print the complexity inforamtion state for each layer, you can use + >>> get_model_complexity_info(model, x) + or directly use + >>> print_model_with_flops(model, 4579784.0, 37361) + ExampleModel( + 0.037 M, 100.000% Params, 0.005 GFLOPs, 100.000% FLOPs, + (conv1): Conv2d(0.0 M, 0.600% Params, 0.0 GFLOPs, 0.959% FLOPs, 3, 8, kernel_size=(3, 3), stride=(1, 1)) # noqa: E501 + (conv2): Conv2d(0.019 M, 50.020% Params, 0.003 GFLOPs, 58.760% FLOPs, 8, 256, kernel_size=(3, 3), stride=(1, 1)) + (conv3): Conv2d(0.018 M, 49.356% Params, 0.002 GFLOPs, 40.264% FLOPs, 256, 8, kernel_size=(3, 3), stride=(1, 1)) + (avg_pool): AdaptiveAvgPool2d(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.017% FLOPs, output_size=(1, 1)) + (flatten): Flatten(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.000% FLOPs, ) + (fc): Linear(0.0 M, 0.024% Params, 0.0 GFLOPs, 0.000% FLOPs, in_features=8, out_features=1, bias=True) + ) + """ + + def accumulate_params(self): + if is_supported_instance(self): + return self.__params__ + else: + sum = 0 + for m in self.children(): + sum += m.accumulate_params() + return sum + + def accumulate_flops(self): + if is_supported_instance(self): + return self.__flops__ / model.__batch_counter__ + else: + sum = 0 + for m in self.children(): + sum += m.accumulate_flops() + return sum + + def flops_repr(self): + accumulated_num_params = self.accumulate_params() + accumulated_flops_cost = self.accumulate_flops() + return ', '.join([ + params_to_string( + accumulated_num_params, units='M', precision=precision), + '{:.3%} Params'.format(accumulated_num_params / total_params), + flops_to_string( + accumulated_flops_cost, units=units, precision=precision), + '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops), + self.original_extra_repr() + ]) + + def add_extra_repr(m): + m.accumulate_flops = accumulate_flops.__get__(m) + m.accumulate_params = accumulate_params.__get__(m) + flops_extra_repr = flops_repr.__get__(m) + if m.extra_repr != flops_extra_repr: + m.original_extra_repr = m.extra_repr + m.extra_repr = flops_extra_repr + assert m.extra_repr != m.original_extra_repr + + def del_extra_repr(m): + if hasattr(m, 'original_extra_repr'): + m.extra_repr = m.original_extra_repr + del m.original_extra_repr + if hasattr(m, 'accumulate_flops'): + del m.accumulate_flops + + model.apply(add_extra_repr) + print(model, file=ost, flush=flush) + model.apply(del_extra_repr) + + +def get_model_parameters_number(model): + """Calculate parameter number of a model. + + Args: + model (nn.module): The model for parameter number calculation. + + Returns: + float: Parameter number of the model. + """ + num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + return num_params + + +def add_flops_counting_methods(net_main_module): + # adding additional methods to the existing module object, + # this is done this way so that each function has access to self object + net_main_module.start_flops_count = start_flops_count.__get__( + net_main_module) + net_main_module.stop_flops_count = stop_flops_count.__get__( + net_main_module) + net_main_module.reset_flops_count = reset_flops_count.__get__( + net_main_module) + net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__( # noqa: E501 + net_main_module) + + net_main_module.reset_flops_count() + + return net_main_module + + +def compute_average_flops_cost(self): + """Compute average FLOPs cost. + + A method to compute average FLOPs cost, which will be available after + `add_flops_counting_methods()` is called on a desired net object. + + Returns: + float: Current mean flops consumption per image. + """ + batches_count = self.__batch_counter__ + flops_sum = 0 + for module in self.modules(): + if is_supported_instance(module): + flops_sum += module.__flops__ + params_sum = get_model_parameters_number(self) + return flops_sum / batches_count, params_sum + + +def start_flops_count(self): + """Activate the computation of mean flops consumption per image. + + A method to activate the computation of mean flops consumption per image. + which will be available after ``add_flops_counting_methods()`` is called on + a desired net object. It should be called before running the network. + """ + add_batch_counter_hook_function(self) + + def add_flops_counter_hook_function(module): + if is_supported_instance(module): + if hasattr(module, '__flops_handle__'): + return + + else: + handle = module.register_forward_hook( + MODULES_MAPPING[type(module)]) + + module.__flops_handle__ = handle + + self.apply(partial(add_flops_counter_hook_function)) + + +def stop_flops_count(self): + """Stop computing the mean flops consumption per image. + + A method to stop computing the mean flops consumption per image, which will + be available after ``add_flops_counting_methods()`` is called on a desired + net object. It can be called to pause the computation whenever. + """ + remove_batch_counter_hook_function(self) + self.apply(remove_flops_counter_hook_function) + + +def reset_flops_count(self): + """Reset statistics computed so far. + + A method to Reset computed statistics, which will be available after + `add_flops_counting_methods()` is called on a desired net object. + """ + add_batch_counter_variables_or_reset(self) + self.apply(add_flops_counter_variable_or_reset) + + +# ---- Internal functions +def empty_flops_counter_hook(module, input, output): + module.__flops__ += 0 + + +def upsample_flops_counter_hook(module, input, output): + output_size = output[0] + batch_size = output_size.shape[0] + output_elements_count = batch_size + for val in output_size.shape[1:]: + output_elements_count *= val + module.__flops__ += int(output_elements_count) + + +def relu_flops_counter_hook(module, input, output): + active_elements_count = output.numel() + module.__flops__ += int(active_elements_count) + + +def linear_flops_counter_hook(module, input, output): + input = input[0] + output_last_dim = output.shape[ + -1] # pytorch checks dimensions, so here we don't care much + module.__flops__ += int(np.prod(input.shape) * output_last_dim) + + +def pool_flops_counter_hook(module, input, output): + input = input[0] + module.__flops__ += int(np.prod(input.shape)) + + +def bn_flops_counter_hook(module, input, output): + input = input[0] + + batch_flops = np.prod(input.shape) + if module.affine: + batch_flops *= 2 + module.__flops__ += int(batch_flops) + + +def deconv_flops_counter_hook(conv_module, input, output): + # Can have multiple inputs, getting the first one + input = input[0] + + batch_size = input.shape[0] + input_height, input_width = input.shape[2:] + + kernel_height, kernel_width = conv_module.kernel_size + in_channels = conv_module.in_channels + out_channels = conv_module.out_channels + groups = conv_module.groups + + filters_per_channel = out_channels // groups + conv_per_position_flops = ( + kernel_height * kernel_width * in_channels * filters_per_channel) + + active_elements_count = batch_size * input_height * input_width + overall_conv_flops = conv_per_position_flops * active_elements_count + bias_flops = 0 + if conv_module.bias is not None: + output_height, output_width = output.shape[2:] + bias_flops = out_channels * batch_size * output_height * output_height + overall_flops = overall_conv_flops + bias_flops + + conv_module.__flops__ += int(overall_flops) + + +def conv_flops_counter_hook(conv_module, input, output): + # Can have multiple inputs, getting the first one + input = input[0] + + batch_size = input.shape[0] + output_dims = list(output.shape[2:]) + + kernel_dims = list(conv_module.kernel_size) + in_channels = conv_module.in_channels + out_channels = conv_module.out_channels + groups = conv_module.groups + + filters_per_channel = out_channels // groups + conv_per_position_flops = int( + np.prod(kernel_dims)) * in_channels * filters_per_channel + + active_elements_count = batch_size * int(np.prod(output_dims)) + + overall_conv_flops = conv_per_position_flops * active_elements_count + + bias_flops = 0 + + if conv_module.bias is not None: + + bias_flops = out_channels * active_elements_count + + overall_flops = overall_conv_flops + bias_flops + + conv_module.__flops__ += int(overall_flops) + + +def batch_counter_hook(module, input, output): + batch_size = 1 + if len(input) > 0: + # Can have multiple inputs, getting the first one + input = input[0] + batch_size = len(input) + else: + pass + print('Warning! No positional inputs found for a module, ' + 'assuming batch size is 1.') + module.__batch_counter__ += batch_size + + +def add_batch_counter_variables_or_reset(module): + + module.__batch_counter__ = 0 + + +def add_batch_counter_hook_function(module): + if hasattr(module, '__batch_counter_handle__'): + return + + handle = module.register_forward_hook(batch_counter_hook) + module.__batch_counter_handle__ = handle + + +def remove_batch_counter_hook_function(module): + if hasattr(module, '__batch_counter_handle__'): + module.__batch_counter_handle__.remove() + del module.__batch_counter_handle__ + + +def add_flops_counter_variable_or_reset(module): + if is_supported_instance(module): + if hasattr(module, '__flops__') or hasattr(module, '__params__'): + print('Warning: variables __flops__ or __params__ are already ' + 'defined for the module' + type(module).__name__ + + ' ptflops can affect your code!') + module.__flops__ = 0 + module.__params__ = get_model_parameters_number(module) + + +def is_supported_instance(module): + if type(module) in MODULES_MAPPING: + return True + return False + + +def remove_flops_counter_hook_function(module): + if is_supported_instance(module): + if hasattr(module, '__flops_handle__'): + module.__flops_handle__.remove() + del module.__flops_handle__ + + +MODULES_MAPPING = { + # convolutions + nn.Conv1d: conv_flops_counter_hook, + nn.Conv2d: conv_flops_counter_hook, + nn.Conv3d: conv_flops_counter_hook, + # activations + nn.ReLU: relu_flops_counter_hook, + nn.PReLU: relu_flops_counter_hook, + nn.ELU: relu_flops_counter_hook, + nn.LeakyReLU: relu_flops_counter_hook, + nn.ReLU6: relu_flops_counter_hook, + # poolings + nn.MaxPool1d: pool_flops_counter_hook, + nn.AvgPool1d: pool_flops_counter_hook, + nn.AvgPool2d: pool_flops_counter_hook, + nn.MaxPool2d: pool_flops_counter_hook, + nn.MaxPool3d: pool_flops_counter_hook, + nn.AvgPool3d: pool_flops_counter_hook, + nn.AdaptiveMaxPool1d: pool_flops_counter_hook, + nn.AdaptiveAvgPool1d: pool_flops_counter_hook, + nn.AdaptiveMaxPool2d: pool_flops_counter_hook, + nn.AdaptiveAvgPool2d: pool_flops_counter_hook, + nn.AdaptiveMaxPool3d: pool_flops_counter_hook, + nn.AdaptiveAvgPool3d: pool_flops_counter_hook, + # BNs + nn.BatchNorm1d: bn_flops_counter_hook, + nn.BatchNorm2d: bn_flops_counter_hook, + nn.BatchNorm3d: bn_flops_counter_hook, + # FC + nn.Linear: linear_flops_counter_hook, + # Upscale + nn.Upsample: upsample_flops_counter_hook, + # Deconvolution + nn.ConvTranspose2d: deconv_flops_counter_hook, +} diff --git a/mmcv/mmcv/cnn/utils/fuse_conv_bn.py b/mmcv/mmcv/cnn/utils/fuse_conv_bn.py new file mode 100644 index 0000000..31578be --- /dev/null +++ b/mmcv/mmcv/cnn/utils/fuse_conv_bn.py @@ -0,0 +1,58 @@ +import torch +import torch.nn as nn + + +def _fuse_conv_bn(conv, bn): + """Fuse conv and bn into one module. + + Args: + conv (nn.Module): Conv to be fused. + bn (nn.Module): BN to be fused. + + Returns: + nn.Module: Fused module. + """ + conv_w = conv.weight + conv_b = conv.bias if conv.bias is not None else torch.zeros_like( + bn.running_mean) + + factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) + conv.weight = nn.Parameter(conv_w * + factor.reshape([conv.out_channels, 1, 1, 1])) + conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) + return conv + + +def fuse_conv_bn(module): + """Recursively fuse conv and bn in a module. + + During inference, the functionary of batch norm layers is turned off + but only the mean and var alone channels are used, which exposes the + chance to fuse it with the preceding conv layers to save computations and + simplify network structures. + + Args: + module (nn.Module): Module to be fused. + + Returns: + nn.Module: Fused module. + """ + last_conv = None + last_conv_name = None + + for name, child in module.named_children(): + if isinstance(child, + (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): + if last_conv is None: # only fuse BN that is after Conv + continue + fused_conv = _fuse_conv_bn(last_conv, child) + module._modules[last_conv_name] = fused_conv + # To reduce changes, set BN as Identity instead of deleting it. + module._modules[name] = nn.Identity() + last_conv = None + elif isinstance(child, nn.Conv2d): + last_conv = child + last_conv_name = name + else: + fuse_conv_bn(child) + return module diff --git a/mmcv/mmcv/cnn/utils/weight_init.py b/mmcv/mmcv/cnn/utils/weight_init.py new file mode 100644 index 0000000..e6d92e7 --- /dev/null +++ b/mmcv/mmcv/cnn/utils/weight_init.py @@ -0,0 +1,66 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import numpy as np +import torch.nn as nn + + +def constant_init(module, val, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.constant_(module.weight, val) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def xavier_init(module, gain=1, bias=0, distribution='normal'): + assert distribution in ['uniform', 'normal'] + if distribution == 'uniform': + nn.init.xavier_uniform_(module.weight, gain=gain) + else: + nn.init.xavier_normal_(module.weight, gain=gain) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def normal_init(module, mean=0, std=1, bias=0): + nn.init.normal_(module.weight, mean, std) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def uniform_init(module, a=0, b=1, bias=0): + nn.init.uniform_(module.weight, a, b) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def kaiming_init(module, + a=0, + mode='fan_out', + nonlinearity='relu', + bias=0, + distribution='normal'): + assert distribution in ['uniform', 'normal'] + if distribution == 'uniform': + nn.init.kaiming_uniform_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + else: + nn.init.kaiming_normal_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def caffe2_xavier_init(module, bias=0): + # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch + # Acknowledgment to FAIR's internal code + kaiming_init( + module, + a=1, + mode='fan_in', + nonlinearity='leaky_relu', + distribution='uniform') + + +def bias_init_with_prob(prior_prob): + """initialize conv/fc bias value according to giving probablity.""" + bias_init = float(-np.log((1 - prior_prob) / prior_prob)) + return bias_init diff --git a/mmcv/mmcv/cnn/vgg.py b/mmcv/mmcv/cnn/vgg.py new file mode 100644 index 0000000..c20b5d0 --- /dev/null +++ b/mmcv/mmcv/cnn/vgg.py @@ -0,0 +1,175 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import logging + +import torch.nn as nn + +from ..runner import load_checkpoint +from .utils import constant_init, kaiming_init, normal_init + + +def conv3x3(in_planes, out_planes, dilation=1): + """3x3 convolution with padding.""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + padding=dilation, + dilation=dilation) + + +def make_vgg_layer(inplanes, + planes, + num_blocks, + dilation=1, + with_bn=False, + ceil_mode=False): + layers = [] + for _ in range(num_blocks): + layers.append(conv3x3(inplanes, planes, dilation)) + if with_bn: + layers.append(nn.BatchNorm2d(planes)) + layers.append(nn.ReLU(inplace=True)) + inplanes = planes + layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode)) + + return layers + + +class VGG(nn.Module): + """VGG backbone. + + Args: + depth (int): Depth of vgg, from {11, 13, 16, 19}. + with_bn (bool): Use BatchNorm or not. + num_classes (int): number of classes for classification. + num_stages (int): VGG stages, normally 5. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze + running stats (mean and var). + bn_frozen (bool): Whether to freeze weight and bias of BN layers. + """ + + arch_settings = { + 11: (1, 1, 2, 2, 2), + 13: (2, 2, 2, 2, 2), + 16: (2, 2, 3, 3, 3), + 19: (2, 2, 4, 4, 4) + } + + def __init__(self, + depth, + with_bn=False, + num_classes=-1, + num_stages=5, + dilations=(1, 1, 1, 1, 1), + out_indices=(0, 1, 2, 3, 4), + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + ceil_mode=False, + with_last_pool=True): + super(VGG, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for vgg') + assert num_stages >= 1 and num_stages <= 5 + stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + assert len(dilations) == num_stages + assert max(out_indices) <= num_stages + + self.num_classes = num_classes + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.bn_eval = bn_eval + self.bn_frozen = bn_frozen + + self.inplanes = 3 + start_idx = 0 + vgg_layers = [] + self.range_sub_modules = [] + for i, num_blocks in enumerate(self.stage_blocks): + num_modules = num_blocks * (2 + with_bn) + 1 + end_idx = start_idx + num_modules + dilation = dilations[i] + planes = 64 * 2**i if i < 4 else 512 + vgg_layer = make_vgg_layer( + self.inplanes, + planes, + num_blocks, + dilation=dilation, + with_bn=with_bn, + ceil_mode=ceil_mode) + vgg_layers.extend(vgg_layer) + self.inplanes = planes + self.range_sub_modules.append([start_idx, end_idx]) + start_idx = end_idx + if not with_last_pool: + vgg_layers.pop(-1) + self.range_sub_modules[-1][1] -= 1 + self.module_name = 'features' + self.add_module(self.module_name, nn.Sequential(*vgg_layers)) + + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + elif isinstance(m, nn.Linear): + normal_init(m, std=0.01) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + outs = [] + vgg_layers = getattr(self, self.module_name) + for i in range(len(self.stage_blocks)): + for j in range(*self.range_sub_modules[i]): + vgg_layer = vgg_layers[j] + x = vgg_layer(x) + if i in self.out_indices: + outs.append(x) + if self.num_classes > 0: + x = x.view(x.size(0), -1) + x = self.classifier(x) + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(VGG, self).train(mode) + if self.bn_eval: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if self.bn_frozen: + for params in m.parameters(): + params.requires_grad = False + vgg_layers = getattr(self, self.module_name) + if mode and self.frozen_stages >= 0: + for i in range(self.frozen_stages): + for j in range(*self.range_sub_modules[i]): + mod = vgg_layers[j] + mod.eval() + for param in mod.parameters(): + param.requires_grad = False diff --git a/mmcv/mmcv/fileio/__init__.py b/mmcv/mmcv/fileio/__init__.py new file mode 100644 index 0000000..b307027 --- /dev/null +++ b/mmcv/mmcv/fileio/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .file_client import BaseStorageBackend, FileClient +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler +from .io import dump, load, register_handler +from .parse import dict_from_file, list_from_file + +__all__ = [ + 'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler', + 'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler', + 'list_from_file', 'dict_from_file' +] diff --git a/mmcv/mmcv/fileio/file_client.py b/mmcv/mmcv/fileio/file_client.py new file mode 100644 index 0000000..6afae49 --- /dev/null +++ b/mmcv/mmcv/fileio/file_client.py @@ -0,0 +1,297 @@ +import inspect +import warnings +from abc import ABCMeta, abstractmethod + + +class BaseStorageBackend(metaclass=ABCMeta): + """Abstract class of storage backends. + + All backends need to implement two apis: ``get()`` and ``get_text()``. + ``get()`` reads the file as a byte stream and ``get_text()`` reads the file + as texts. + """ + + @abstractmethod + def get(self, filepath): + pass + + @abstractmethod + def get_text(self, filepath): + pass + + +class CephBackend(BaseStorageBackend): + """Ceph storage backend. + + Args: + path_mapping (dict|None): path mapping dict from local path to Petrel + path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath`` + will be replaced by ``dst``. Default: None. + """ + + def __init__(self, path_mapping=None): + try: + import ceph + warnings.warn('Ceph is deprecate in favor of Petrel.') + except ImportError: + raise ImportError('Please install ceph to enable CephBackend.') + + self._client = ceph.S3Client() + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def get(self, filepath): + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, filepath): + raise NotImplementedError + + +class PetrelBackend(BaseStorageBackend): + """Petrel storage backend (for internal use). + + Args: + path_mapping (dict|None): path mapping dict from local path to Petrel + path. When `path_mapping={'src': 'dst'}`, `src` in `filepath` will + be replaced by `dst`. Default: None. + enable_mc (bool): whether to enable memcached support. Default: True. + """ + + def __init__(self, path_mapping=None, enable_mc=True): + try: + from petrel_client import client + except ImportError: + raise ImportError('Please install petrel_client to enable ' + 'PetrelBackend.') + + self._client = client.Client(enable_mc=enable_mc) + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def get(self, filepath): + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, filepath): + raise NotImplementedError + + +class MemcachedBackend(BaseStorageBackend): + """Memcached storage backend. + + Attributes: + server_list_cfg (str): Config file for memcached server list. + client_cfg (str): Config file for memcached client. + sys_path (str | None): Additional path to be appended to `sys.path`. + Default: None. + """ + + def __init__(self, server_list_cfg, client_cfg, sys_path=None): + if sys_path is not None: + import sys + sys.path.append(sys_path) + try: + import mc + except ImportError: + raise ImportError( + 'Please install memcached to enable MemcachedBackend.') + + self.server_list_cfg = server_list_cfg + self.client_cfg = client_cfg + self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, + self.client_cfg) + # mc.pyvector servers as a point which points to a memory cache + self._mc_buffer = mc.pyvector() + + def get(self, filepath): + filepath = str(filepath) + import mc + self._client.Get(filepath, self._mc_buffer) + value_buf = mc.ConvertBuffer(self._mc_buffer) + return value_buf + + def get_text(self, filepath): + raise NotImplementedError + + +class LmdbBackend(BaseStorageBackend): + """Lmdb storage backend. + + Args: + db_path (str): Lmdb database path. + readonly (bool, optional): Lmdb environment parameter. If True, + disallow any write operations. Default: True. + lock (bool, optional): Lmdb environment parameter. If False, when + concurrent access occurs, do not lock the database. Default: False. + readahead (bool, optional): Lmdb environment parameter. If False, + disable the OS filesystem readahead mechanism, which may improve + random read performance when a database is larger than RAM. + Default: False. + + Attributes: + db_path (str): Lmdb database path. + """ + + def __init__(self, + db_path, + readonly=True, + lock=False, + readahead=False, + **kwargs): + try: + import lmdb + except ImportError: + raise ImportError('Please install lmdb to enable LmdbBackend.') + + self.db_path = str(db_path) + self._client = lmdb.open( + self.db_path, + readonly=readonly, + lock=lock, + readahead=readahead, + **kwargs) + + def get(self, filepath): + """Get values according to the filepath. + + Args: + filepath (str | obj:`Path`): Here, filepath is the lmdb key. + """ + filepath = str(filepath) + with self._client.begin(write=False) as txn: + value_buf = txn.get(filepath.encode('ascii')) + return value_buf + + def get_text(self, filepath): + raise NotImplementedError + + +class HardDiskBackend(BaseStorageBackend): + """Raw hard disks storage backend.""" + + def get(self, filepath): + filepath = str(filepath) + with open(filepath, 'rb') as f: + value_buf = f.read() + return value_buf + + def get_text(self, filepath): + filepath = str(filepath) + with open(filepath, 'r') as f: + value_buf = f.read() + return value_buf + + +class FileClient: + """A general file client to access files in different backend. + + The client loads a file or text in a specified backend from its path + and return it as a binary file. it can also register other backend + accessor with a given name and backend class. + + Attributes: + backend (str): The storage backend type. Options are "disk", "ceph", + "memcached" and "lmdb". + client (:obj:`BaseStorageBackend`): The backend object. + """ + + _backends = { + 'disk': HardDiskBackend, + 'ceph': CephBackend, + 'memcached': MemcachedBackend, + 'lmdb': LmdbBackend, + 'petrel': PetrelBackend, + } + + def __init__(self, backend='disk', **kwargs): + if backend not in self._backends: + raise ValueError( + f'Backend {backend} is not supported. Currently supported ones' + f' are {list(self._backends.keys())}') + self.backend = backend + self.client = self._backends[backend](**kwargs) + + @classmethod + def _register_backend(cls, name, backend, force=False): + if not isinstance(name, str): + raise TypeError('the backend name should be a string, ' + f'but got {type(name)}') + if not inspect.isclass(backend): + raise TypeError( + f'backend should be a class but got {type(backend)}') + if not issubclass(backend, BaseStorageBackend): + raise TypeError( + f'backend {backend} is not a subclass of BaseStorageBackend') + if not force and name in cls._backends: + raise KeyError( + f'{name} is already registered as a storage backend, ' + 'add "force=True" if you want to override it') + + cls._backends[name] = backend + + @classmethod + def register_backend(cls, name, backend=None, force=False): + """Register a backend to FileClient. + + This method can be used as a normal class method or a decorator. + + .. code-block:: python + + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + FileClient.register_backend('new', NewBackend) + + or + + .. code-block:: python + + @FileClient.register_backend('new') + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + Args: + name (str): The name of the registered backend. + backend (class, optional): The backend class to be registered, + which must be a subclass of :class:`BaseStorageBackend`. + When this method is used as a decorator, backend is None. + Defaults to None. + force (bool, optional): Whether to override the backend if the name + has already been registered. Defaults to False. + """ + if backend is not None: + cls._register_backend(name, backend, force=force) + return + + def _register(backend_cls): + cls._register_backend(name, backend_cls, force=force) + return backend_cls + + return _register + + def get(self, filepath): + return self.client.get(filepath) + + def get_text(self, filepath): + return self.client.get_text(filepath) diff --git a/mmcv/mmcv/fileio/handlers/__init__.py b/mmcv/mmcv/fileio/handlers/__init__.py new file mode 100644 index 0000000..2fbc6ec --- /dev/null +++ b/mmcv/mmcv/fileio/handlers/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .base import BaseFileHandler +from .json_handler import JsonHandler +from .pickle_handler import PickleHandler +from .yaml_handler import YamlHandler + +__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler'] diff --git a/mmcv/mmcv/fileio/handlers/base.py b/mmcv/mmcv/fileio/handlers/base.py new file mode 100644 index 0000000..91f3fe1 --- /dev/null +++ b/mmcv/mmcv/fileio/handlers/base.py @@ -0,0 +1,25 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from abc import ABCMeta, abstractmethod + + +class BaseFileHandler(metaclass=ABCMeta): + + @abstractmethod + def load_from_fileobj(self, file, **kwargs): + pass + + @abstractmethod + def dump_to_fileobj(self, obj, file, **kwargs): + pass + + @abstractmethod + def dump_to_str(self, obj, **kwargs): + pass + + def load_from_path(self, filepath, mode='r', **kwargs): + with open(filepath, mode) as f: + return self.load_from_fileobj(f, **kwargs) + + def dump_to_path(self, obj, filepath, mode='w', **kwargs): + with open(filepath, mode) as f: + self.dump_to_fileobj(obj, f, **kwargs) diff --git a/mmcv/mmcv/fileio/handlers/json_handler.py b/mmcv/mmcv/fileio/handlers/json_handler.py new file mode 100644 index 0000000..d92c397 --- /dev/null +++ b/mmcv/mmcv/fileio/handlers/json_handler.py @@ -0,0 +1,36 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import json + +import numpy as np + +from .base import BaseFileHandler + + +def set_default(obj): + """Set default json values for non-serializable values. + + It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list. + It also converts ``np.generic`` (including ``np.int32``, ``np.float32``, + etc.) into plain numbers of plain python built-in types. + """ + if isinstance(obj, (set, range)): + return list(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + elif isinstance(obj, np.generic): + return obj.item() + raise TypeError(f'{type(obj)} is unsupported for json dump') + + +class JsonHandler(BaseFileHandler): + + def load_from_fileobj(self, file): + return json.load(file) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('default', set_default) + json.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('default', set_default) + return json.dumps(obj, **kwargs) diff --git a/mmcv/mmcv/fileio/handlers/pickle_handler.py b/mmcv/mmcv/fileio/handlers/pickle_handler.py new file mode 100644 index 0000000..b22b1dc --- /dev/null +++ b/mmcv/mmcv/fileio/handlers/pickle_handler.py @@ -0,0 +1,26 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import pickle + +from .base import BaseFileHandler + + +class PickleHandler(BaseFileHandler): + + def load_from_fileobj(self, file, **kwargs): + return pickle.load(file, **kwargs) + + def load_from_path(self, filepath, **kwargs): + return super(PickleHandler, self).load_from_path( + filepath, mode='rb', **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('protocol', 2) + return pickle.dumps(obj, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('protocol', 2) + pickle.dump(obj, file, **kwargs) + + def dump_to_path(self, obj, filepath, **kwargs): + super(PickleHandler, self).dump_to_path( + obj, filepath, mode='wb', **kwargs) diff --git a/mmcv/mmcv/fileio/handlers/yaml_handler.py b/mmcv/mmcv/fileio/handlers/yaml_handler.py new file mode 100644 index 0000000..c93eba8 --- /dev/null +++ b/mmcv/mmcv/fileio/handlers/yaml_handler.py @@ -0,0 +1,24 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import yaml + +try: + from yaml import CLoader as Loader, CDumper as Dumper +except ImportError: + from yaml import Loader, Dumper + +from .base import BaseFileHandler # isort:skip + + +class YamlHandler(BaseFileHandler): + + def load_from_fileobj(self, file, **kwargs): + kwargs.setdefault('Loader', Loader) + return yaml.load(file, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('Dumper', Dumper) + yaml.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('Dumper', Dumper) + return yaml.dump(obj, **kwargs) diff --git a/mmcv/mmcv/fileio/io.py b/mmcv/mmcv/fileio/io.py new file mode 100644 index 0000000..777df97 --- /dev/null +++ b/mmcv/mmcv/fileio/io.py @@ -0,0 +1,112 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from pathlib import Path + +from ..utils import is_list_of, is_str +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler + +file_handlers = { + 'json': JsonHandler(), + 'yaml': YamlHandler(), + 'yml': YamlHandler(), + 'pickle': PickleHandler(), + 'pkl': PickleHandler() +} + + +def load(file, file_format=None, **kwargs): + """Load data from json/yaml/pickle files. + + This method provides a unified api for loading data from serialized files. + + Args: + file (str or :obj:`Path` or file-like object): Filename or a file-like + object. + file_format (str, optional): If not specified, the file format will be + inferred from the file extension, otherwise use the specified one. + Currently supported formats include "json", "yaml/yml" and + "pickle/pkl". + + Returns: + The content from the file. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None and is_str(file): + file_format = file.split('.')[-1] + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if is_str(file): + obj = handler.load_from_path(file, **kwargs) + elif hasattr(file, 'read'): + obj = handler.load_from_fileobj(file, **kwargs) + else: + raise TypeError('"file" must be a filepath str or a file-object') + return obj + + +def dump(obj, file=None, file_format=None, **kwargs): + """Dump data to json/yaml/pickle strings or files. + + This method provides a unified api for dumping data as strings or to files, + and also supports custom arguments for each file format. + + Args: + obj (any): The python object to be dumped. + file (str or :obj:`Path` or file-like object, optional): If not + specified, then the object is dump to a str, otherwise to a file + specified by the filename or file-like object. + file_format (str, optional): Same as :func:`load`. + + Returns: + bool: True for success, False otherwise. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None: + if is_str(file): + file_format = file.split('.')[-1] + elif file is None: + raise ValueError( + 'file_format must be specified since file is None') + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if file is None: + return handler.dump_to_str(obj, **kwargs) + elif is_str(file): + handler.dump_to_path(obj, file, **kwargs) + elif hasattr(file, 'write'): + handler.dump_to_fileobj(obj, file, **kwargs) + else: + raise TypeError('"file" must be a filename str or a file-object') + + +def _register_handler(handler, file_formats): + """Register a handler for some file extensions. + + Args: + handler (:obj:`BaseFileHandler`): Handler to be registered. + file_formats (str or list[str]): File formats to be handled by this + handler. + """ + if not isinstance(handler, BaseFileHandler): + raise TypeError( + f'handler must be a child of BaseFileHandler, not {type(handler)}') + if isinstance(file_formats, str): + file_formats = [file_formats] + if not is_list_of(file_formats, str): + raise TypeError('file_formats must be a str or a list of str') + for ext in file_formats: + file_handlers[ext] = handler + + +def register_handler(file_formats, **kwargs): + + def wrap(cls): + _register_handler(cls(**kwargs), file_formats) + return cls + + return wrap diff --git a/mmcv/mmcv/fileio/parse.py b/mmcv/mmcv/fileio/parse.py new file mode 100644 index 0000000..556c4cf --- /dev/null +++ b/mmcv/mmcv/fileio/parse.py @@ -0,0 +1,51 @@ +# Copyright (c) Open-MMLab. All rights reserved. +def list_from_file(filename, prefix='', offset=0, max_num=0): + """Load a text file and parse the content as a list of strings. + + Args: + filename (str): Filename. + prefix (str): The prefix to be inserted to the begining of each item. + offset (int): The offset of lines. + max_num (int): The maximum number of lines to be read, + zeros and negatives mean no limitation. + + Returns: + list[str]: A list of strings. + """ + cnt = 0 + item_list = [] + with open(filename, 'r') as f: + for _ in range(offset): + f.readline() + for line in f: + if max_num > 0 and cnt >= max_num: + break + item_list.append(prefix + line.rstrip('\n')) + cnt += 1 + return item_list + + +def dict_from_file(filename, key_type=str): + """Load a text file and parse the content as a dict. + + Each line of the text file will be two or more columns splited by + whitespaces or tabs. The first column will be parsed as dict keys, and + the following columns will be parsed as dict values. + + Args: + filename(str): Filename. + key_type(type): Type of the dict's keys. str is user by default and + type conversion will be performed if specified. + + Returns: + dict: The parsed contents. + """ + mapping = {} + with open(filename, 'r') as f: + for line in f: + items = line.rstrip('\n').split() + assert len(items) >= 2 + key = key_type(items[0]) + val = items[1:] if len(items) > 2 else items[1] + mapping[key] = val + return mapping diff --git a/mmcv/mmcv/image/__init__.py b/mmcv/mmcv/image/__init__.py new file mode 100644 index 0000000..62e6cf0 --- /dev/null +++ b/mmcv/mmcv/image/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr, + gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert, + rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb) +from .geometric import (imcrop, imflip, imflip_, impad, impad_to_multiple, + imrescale, imresize, imresize_like, imrotate, imshear, + imtranslate, rescale_size) +from .io import imfrombytes, imread, imwrite, supported_backends, use_backend +from .misc import tensor2imgs +from .photometric import (adjust_brightness, adjust_color, adjust_contrast, + imdenormalize, imequalize, iminvert, imnormalize, + imnormalize_, posterize, solarize) + +__all__ = [ + 'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb', + 'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale', + 'imresize', 'imresize_like', 'rescale_size', 'imcrop', 'imflip', 'imflip_', + 'impad', 'impad_to_multiple', 'imrotate', 'imfrombytes', 'imread', + 'imwrite', 'supported_backends', 'use_backend', 'imdenormalize', + 'imnormalize', 'imnormalize_', 'iminvert', 'posterize', 'solarize', + 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr', 'tensor2imgs', + 'imshear', 'imtranslate', 'adjust_color', 'imequalize', + 'adjust_brightness', 'adjust_contrast' +] diff --git a/mmcv/mmcv/image/colorspace.py b/mmcv/mmcv/image/colorspace.py new file mode 100644 index 0000000..56cfe65 --- /dev/null +++ b/mmcv/mmcv/image/colorspace.py @@ -0,0 +1,306 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import cv2 +import numpy as np + + +def imconvert(img, src, dst): + """Convert an image from the src colorspace to dst colorspace. + + Args: + img (ndarray): The input image. + src (str): The source colorspace, e.g., 'rgb', 'hsv'. + dst (str): The destination colorspace, e.g., 'rgb', 'hsv'. + + Returns: + ndarray: The converted image. + """ + code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') + out_img = cv2.cvtColor(img, code) + return out_img + + +def bgr2gray(img, keepdim=False): + """Convert a BGR image to grayscale image. + + Args: + img (ndarray): The input image. + keepdim (bool): If False (by default), then return the grayscale image + with 2 dims, otherwise 3 dims. + + Returns: + ndarray: The converted grayscale image. + """ + out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + if keepdim: + out_img = out_img[..., None] + return out_img + + +def rgb2gray(img, keepdim=False): + """Convert a RGB image to grayscale image. + + Args: + img (ndarray): The input image. + keepdim (bool): If False (by default), then return the grayscale image + with 2 dims, otherwise 3 dims. + + Returns: + ndarray: The converted grayscale image. + """ + out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + if keepdim: + out_img = out_img[..., None] + return out_img + + +def gray2bgr(img): + """Convert a grayscale image to BGR image. + + Args: + img (ndarray): The input image. + + Returns: + ndarray: The converted BGR image. + """ + img = img[..., None] if img.ndim == 2 else img + out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + return out_img + + +def gray2rgb(img): + """Convert a grayscale image to RGB image. + + Args: + img (ndarray): The input image. + + Returns: + ndarray: The converted RGB image. + """ + img = img[..., None] if img.ndim == 2 else img + out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + return out_img + + +def _convert_input_type_range(img): + """Convert the type and range of the input image. + + It converts the input image to np.float32 type and range of [0, 1]. + It is mainly used for pre-processing the input image in colorspace + convertion functions such as rgb2ycbcr and ycbcr2rgb. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + (ndarray): The converted image with type of np.float32 and range of + [0, 1]. + """ + img_type = img.dtype + img = img.astype(np.float32) + if img_type == np.float32: + pass + elif img_type == np.uint8: + img /= 255. + else: + raise TypeError('The img type should be np.float32 or np.uint8, ' + f'but got {img_type}') + return img + + +def _convert_output_type_range(img, dst_type): + """Convert the type and range of the image according to dst_type. + + It converts the image to desired type and range. If `dst_type` is np.uint8, + images will be converted to np.uint8 type with range [0, 255]. If + `dst_type` is np.float32, it converts the image to np.float32 type with + range [0, 1]. + It is mainly used for post-processing images in colorspace convertion + functions such as rgb2ycbcr and ycbcr2rgb. + + Args: + img (ndarray): The image to be converted with np.float32 type and + range [0, 255]. + dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it + converts the image to np.uint8 type with range [0, 255]. If + dst_type is np.float32, it converts the image to np.float32 type + with range [0, 1]. + + Returns: + (ndarray): The converted image with desired type and range. + """ + if dst_type not in (np.uint8, np.float32): + raise TypeError('The dst_type should be np.float32 or np.uint8, ' + f'but got {dst_type}') + if dst_type == np.uint8: + img = img.round() + else: + img /= 255. + return img.astype(dst_type) + + +def rgb2ycbcr(img, y_only=False): + """Convert a RGB image to YCbCr image. + + This function produces the same results as Matlab's `rgb2ycbcr` function. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + y_only (bool): Whether to only return Y channel. Default: False. + + Returns: + ndarray: The converted YCbCr image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) + if y_only: + out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0 + else: + out_img = np.matmul( + img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786], + [24.966, 112.0, -18.214]]) + [16, 128, 128] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def bgr2ycbcr(img, y_only=False): + """Convert a BGR image to YCbCr image. + + The bgr version of rgb2ycbcr. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + y_only (bool): Whether to only return Y channel. Default: False. + + Returns: + ndarray: The converted YCbCr image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) + if y_only: + out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0 + else: + out_img = np.matmul( + img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], + [65.481, -37.797, 112.0]]) + [16, 128, 128] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def ycbcr2rgb(img): + """Convert a YCbCr image to RGB image. + + This function produces the same results as Matlab's ycbcr2rgb function. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + ndarray: The converted RGB image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) * 255 + out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], + [0, -0.00153632, 0.00791071], + [0.00625893, -0.00318811, 0]]) * 255.0 + [ + -222.921, 135.576, -276.836 + ] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def ycbcr2bgr(img): + """Convert a YCbCr image to BGR image. + + The bgr version of ycbcr2rgb. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + ndarray: The converted BGR image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) * 255 + out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], + [0.00791071, -0.00153632, 0], + [0, -0.00318811, 0.00625893]]) * 255.0 + [ + -276.836, 135.576, -222.921 + ] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def convert_color_factory(src, dst): + + code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') + + def convert_color(img): + out_img = cv2.cvtColor(img, code) + return out_img + + convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()} + image. + + Args: + img (ndarray or str): The input image. + + Returns: + ndarray: The converted {dst.upper()} image. + """ + + return convert_color + + +bgr2rgb = convert_color_factory('bgr', 'rgb') + +rgb2bgr = convert_color_factory('rgb', 'bgr') + +bgr2hsv = convert_color_factory('bgr', 'hsv') + +hsv2bgr = convert_color_factory('hsv', 'bgr') + +bgr2hls = convert_color_factory('bgr', 'hls') + +hls2bgr = convert_color_factory('hls', 'bgr') diff --git a/mmcv/mmcv/image/geometric.py b/mmcv/mmcv/image/geometric.py new file mode 100644 index 0000000..bedfceb --- /dev/null +++ b/mmcv/mmcv/image/geometric.py @@ -0,0 +1,606 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import numbers + +import cv2 +import numpy as np + +from .io import imread_backend + +try: + from PIL import Image +except ImportError: + Image = None + + +def _scale_size(size, scale): + """Rescale a size by a ratio. + + Args: + size (tuple[int]): (w, h). + scale (float): Scaling factor. + + Returns: + tuple[int]: scaled size. + """ + w, h = size + return int(w * float(scale) + 0.5), int(h * float(scale) + 0.5) + + +cv2_interp_codes = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'bicubic': cv2.INTER_CUBIC, + 'area': cv2.INTER_AREA, + 'lanczos': cv2.INTER_LANCZOS4 +} + +if Image is not None: + pillow_interp_codes = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + +def imresize(img, + size, + return_scale=False, + interpolation='bilinear', + out=None, + backend=None): + """Resize image to a given size. + + Args: + img (ndarray): The input image. + size (tuple[int]): Target size (w, h). + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Interpolation method, accepted values are + "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' + backend, "nearest", "bilinear" for 'pillow' backend. + out (ndarray): The output destination. + backend (str | None): The image resize backend type. Options are `cv2`, + `pillow`, `None`. If backend is None, the global imread_backend + specified by ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = img.shape[:2] + if backend is None: + backend = imread_backend + if backend not in ['cv2', 'pillow']: + raise ValueError(f'backend: {backend} is not supported for resize.' + f"Supported backends are 'cv2', 'pillow'") + + if backend == 'pillow': + assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' + pil_image = Image.fromarray(img) + pil_image = pil_image.resize(size, pillow_interp_codes[interpolation]) + resized_img = np.array(pil_image) + else: + resized_img = cv2.resize( + img, size, dst=out, interpolation=cv2_interp_codes[interpolation]) + if not return_scale: + return resized_img + else: + w_scale = size[0] / w + h_scale = size[1] / h + return resized_img, w_scale, h_scale + + +def imresize_like(img, + dst_img, + return_scale=False, + interpolation='bilinear', + backend=None): + """Resize image to the same size of a given image. + + Args: + img (ndarray): The input image. + dst_img (ndarray): The target image. + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Same as :func:`resize`. + backend (str | None): Same as :func:`resize`. + + Returns: + tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = dst_img.shape[:2] + return imresize(img, (w, h), return_scale, interpolation, backend=backend) + + +def rescale_size(old_size, scale, return_scale=False): + """Calculate the new size to be rescaled to. + + Args: + old_size (tuple[int]): The old size (w, h) of image. + scale (float | tuple[int]): The scaling factor or maximum size. + If it is a float number, then the image will be rescaled by this + factor, else if it is a tuple of 2 integers, then the image will + be rescaled as large as possible within the scale. + return_scale (bool): Whether to return the scaling factor besides the + rescaled image size. + + Returns: + tuple[int]: The new rescaled image size. + """ + w, h = old_size + if isinstance(scale, (float, int)): + if scale <= 0: + raise ValueError(f'Invalid scale {scale}, must be positive.') + scale_factor = scale + elif isinstance(scale, tuple): + max_long_edge = max(scale) + max_short_edge = min(scale) + scale_factor = min(max_long_edge / max(h, w), + max_short_edge / min(h, w)) + else: + raise TypeError( + f'Scale must be a number or tuple of int, but got {type(scale)}') + + new_size = _scale_size((w, h), scale_factor) + + if return_scale: + return new_size, scale_factor + else: + return new_size + + +def imrescale(img, + scale, + return_scale=False, + interpolation='bilinear', + backend=None): + """Resize image while keeping the aspect ratio. + + Args: + img (ndarray): The input image. + scale (float | tuple[int]): The scaling factor or maximum size. + If it is a float number, then the image will be rescaled by this + factor, else if it is a tuple of 2 integers, then the image will + be rescaled as large as possible within the scale. + return_scale (bool): Whether to return the scaling factor besides the + rescaled image. + interpolation (str): Same as :func:`resize`. + backend (str | None): Same as :func:`resize`. + + Returns: + ndarray: The rescaled image. + """ + h, w = img.shape[:2] + new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) + rescaled_img = imresize( + img, new_size, interpolation=interpolation, backend=backend) + if return_scale: + return rescaled_img, scale_factor + else: + return rescaled_img + + +def imflip(img, direction='horizontal'): + """Flip an image horizontally or vertically. + + Args: + img (ndarray): Image to be flipped. + direction (str): The flip direction, either "horizontal" or + "vertical" or "diagonal". + + Returns: + ndarray: The flipped image. + """ + assert direction in ['horizontal', 'vertical', 'diagonal'] + if direction == 'horizontal': + return np.flip(img, axis=1) + elif direction == 'vertical': + return np.flip(img, axis=0) + else: + return np.flip(img, axis=(0, 1)) + + +def imflip_(img, direction='horizontal'): + """Inplace flip an image horizontally or vertically. + + Args: + img (ndarray): Image to be flipped. + direction (str): The flip direction, either "horizontal" or + "vertical" or "diagonal". + + Returns: + ndarray: The flipped image (inplace). + """ + assert direction in ['horizontal', 'vertical', 'diagonal'] + if direction == 'horizontal': + return cv2.flip(img, 1, img) + elif direction == 'vertical': + return cv2.flip(img, 0, img) + else: + return cv2.flip(img, -1, img) + + +def imrotate(img, + angle, + center=None, + scale=1.0, + border_value=0, + interpolation='bilinear', + auto_bound=False): + """Rotate an image. + + Args: + img (ndarray): Image to be rotated. + angle (float): Rotation angle in degrees, positive values mean + clockwise rotation. + center (tuple[float], optional): Center point (w, h) of the rotation in + the source image. If not specified, the center of the image will be + used. + scale (float): Isotropic scale factor. + border_value (int): Border value. + interpolation (str): Same as :func:`resize`. + auto_bound (bool): Whether to adjust the image size to cover the whole + rotated image. + + Returns: + ndarray: The rotated image. + """ + if center is not None and auto_bound: + raise ValueError('`auto_bound` conflicts with `center`') + h, w = img.shape[:2] + if center is None: + center = ((w - 1) * 0.5, (h - 1) * 0.5) + assert isinstance(center, tuple) + + matrix = cv2.getRotationMatrix2D(center, -angle, scale) + if auto_bound: + cos = np.abs(matrix[0, 0]) + sin = np.abs(matrix[0, 1]) + new_w = h * sin + w * cos + new_h = h * cos + w * sin + matrix[0, 2] += (new_w - w) * 0.5 + matrix[1, 2] += (new_h - h) * 0.5 + w = int(np.round(new_w)) + h = int(np.round(new_h)) + rotated = cv2.warpAffine( + img, + matrix, (w, h), + flags=cv2_interp_codes[interpolation], + borderValue=border_value) + return rotated + + +def bbox_clip(bboxes, img_shape): + """Clip bboxes to fit the image shape. + + Args: + bboxes (ndarray): Shape (..., 4*k) + img_shape (tuple[int]): (height, width) of the image. + + Returns: + ndarray: Clipped bboxes. + """ + assert bboxes.shape[-1] % 4 == 0 + cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype) + cmin[0::2] = img_shape[1] - 1 + cmin[1::2] = img_shape[0] - 1 + clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0) + return clipped_bboxes + + +def bbox_scaling(bboxes, scale, clip_shape=None): + """Scaling bboxes w.r.t the box center. + + Args: + bboxes (ndarray): Shape(..., 4). + scale (float): Scaling factor. + clip_shape (tuple[int], optional): If specified, bboxes that exceed the + boundary will be clipped according to the given shape (h, w). + + Returns: + ndarray: Scaled bboxes. + """ + if float(scale) == 1.0: + scaled_bboxes = bboxes.copy() + else: + w = bboxes[..., 2] - bboxes[..., 0] + 1 + h = bboxes[..., 3] - bboxes[..., 1] + 1 + dw = (w * (scale - 1)) * 0.5 + dh = (h * (scale - 1)) * 0.5 + scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) + if clip_shape is not None: + return bbox_clip(scaled_bboxes, clip_shape) + else: + return scaled_bboxes + + +def imcrop(img, bboxes, scale=1.0, pad_fill=None): + """Crop image patches. + + 3 steps: scale the bboxes -> clip bboxes -> crop and pad. + + Args: + img (ndarray): Image to be cropped. + bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes. + scale (float, optional): Scale ratio of bboxes, the default value + 1.0 means no padding. + pad_fill (Number | list[Number]): Value to be filled for padding. + Default: None, which means no padding. + + Returns: + list[ndarray] | ndarray: The cropped image patches. + """ + chn = 1 if img.ndim == 2 else img.shape[2] + if pad_fill is not None: + if isinstance(pad_fill, (int, float)): + pad_fill = [pad_fill for _ in range(chn)] + assert len(pad_fill) == chn + + _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes + scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32) + clipped_bbox = bbox_clip(scaled_bboxes, img.shape) + + patches = [] + for i in range(clipped_bbox.shape[0]): + x1, y1, x2, y2 = tuple(clipped_bbox[i, :]) + if pad_fill is None: + patch = img[y1:y2 + 1, x1:x2 + 1, ...] + else: + _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) + if chn == 1: + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1) + else: + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn) + patch = np.array( + pad_fill, dtype=img.dtype) * np.ones( + patch_shape, dtype=img.dtype) + x_start = 0 if _x1 >= 0 else -_x1 + y_start = 0 if _y1 >= 0 else -_y1 + w = x2 - x1 + 1 + h = y2 - y1 + 1 + patch[y_start:y_start + h, x_start:x_start + w, + ...] = img[y1:y1 + h, x1:x1 + w, ...] + patches.append(patch) + + if bboxes.ndim == 1: + return patches[0] + else: + return patches + + +def impad(img, + *, + shape=None, + padding=None, + pad_val=0, + padding_mode='constant'): + """Pad the given image to a certain shape or pad on all sides with + specified padding mode and padding value. + + Args: + img (ndarray): Image to be padded. + shape (tuple[int]): Expected padding shape (h, w). Default: None. + padding (int or tuple[int]): Padding on each border. If a single int is + provided this is used to pad all borders. If tuple of length 2 is + provided this is the padding on left/right and top/bottom + respectively. If a tuple of length 4 is provided this is the + padding for the left, top, right and bottom borders respectively. + Default: None. Note that `shape` and `padding` can not be both + set. + pad_val (Number | Sequence[Number]): Values to be filled in padding + areas when padding_mode is 'constant'. Default: 0. + padding_mode (str): Type of padding. Should be: constant, edge, + reflect or symmetric. Default: constant. + + - constant: pads with a constant value, this value is specified + with pad_val. + - edge: pads with the last value at the edge of the image. + - reflect: pads with reflection of image without repeating the + last value on the edge. For example, padding [1, 2, 3, 4] + with 2 elements on both sides in reflect mode will result + in [3, 2, 1, 2, 3, 4, 3, 2]. + - symmetric: pads with reflection of image repeating the last + value on the edge. For example, padding [1, 2, 3, 4] with + 2 elements on both sides in symmetric mode will result in + [2, 1, 1, 2, 3, 4, 4, 3] + + Returns: + ndarray: The padded image. + """ + + assert (shape is not None) ^ (padding is not None) + if shape is not None: + padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]) + + # check pad_val + if isinstance(pad_val, tuple): + assert len(pad_val) == img.shape[-1] + elif not isinstance(pad_val, numbers.Number): + raise TypeError('pad_val must be a int or a tuple. ' + f'But received {type(pad_val)}') + + # check padding + if isinstance(padding, tuple) and len(padding) in [2, 4]: + if len(padding) == 2: + padding = (padding[0], padding[1], padding[0], padding[1]) + elif isinstance(padding, numbers.Number): + padding = (padding, padding, padding, padding) + else: + raise ValueError('Padding must be a int or a 2, or 4 element tuple.' + f'But received {padding}') + + # check padding mode + assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] + + border_type = { + 'constant': cv2.BORDER_CONSTANT, + 'edge': cv2.BORDER_REPLICATE, + 'reflect': cv2.BORDER_REFLECT_101, + 'symmetric': cv2.BORDER_REFLECT + } + img = cv2.copyMakeBorder( + img, + padding[1], + padding[3], + padding[0], + padding[2], + border_type[padding_mode], + value=pad_val) + + return img + + +def impad_to_multiple(img, divisor, pad_val=0): + """Pad an image to ensure each edge to be multiple to some number. + + Args: + img (ndarray): Image to be padded. + divisor (int): Padded image edges will be multiple to divisor. + pad_val (Number | Sequence[Number]): Same as :func:`impad`. + + Returns: + ndarray: The padded image. + """ + pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor + pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor + return impad(img, shape=(pad_h, pad_w), pad_val=pad_val) + + +def _get_shear_matrix(magnitude, direction='horizontal'): + """Generate the shear matrix for transformation. + + Args: + magnitude (int | float): The magnitude used for shear. + direction (str): Thie flip direction, either "horizontal" + or "vertical". + + Returns: + ndarray: The shear matrix with dtype float32. + """ + if direction == 'horizontal': + shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]]) + elif direction == 'vertical': + shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]]) + return shear_matrix + + +def imshear(img, + magnitude, + direction='horizontal', + border_value=0, + interpolation='bilinear'): + """Shear an image. + + Args: + img (ndarray): Image to be sheared with format (h, w) + or (h, w, c). + magnitude (int | float): The magnitude used for shear. + direction (str): Thie flip direction, either "horizontal" + or "vertical". + border_value (int | tuple[int]): Value used in case of a + constant border. + interpolation (str): Same as :func:`resize`. + + Returns: + ndarray: The sheared image. + """ + assert direction in ['horizontal', + 'vertical'], f'Invalid direction: {direction}' + height, width = img.shape[:2] + if img.ndim == 2: + channels = 1 + elif img.ndim == 3: + channels = img.shape[-1] + if isinstance(border_value, int): + border_value = tuple([border_value] * channels) + elif isinstance(border_value, tuple): + assert len(border_value) == channels, \ + 'Expected the num of elements in tuple equals the channels' \ + 'of input image. Found {} vs {}'.format( + len(border_value), channels) + else: + raise ValueError( + f'Invalid type {type(border_value)} for `border_value`') + shear_matrix = _get_shear_matrix(magnitude, direction) + sheared = cv2.warpAffine( + img, + shear_matrix, + (width, height), + # Note case when the number elements in `border_value` + # greater than 3 (e.g. shearing masks whose channels large + # than 3) will raise TypeError in `cv2.warpAffine`. + # Here simply slice the first 3 values in `border_value`. + borderValue=border_value[:3], + flags=cv2_interp_codes[interpolation]) + return sheared + + +def _get_translate_matrix(offset, direction='horizontal'): + """Generate the translate matrix. + + Args: + offset (int | float): The offset used for translate. + direction (str): The translate direction, either + "horizontal" or "vertical". + + Returns: + ndarray: The translate matrix with dtype float32. + """ + if direction == 'horizontal': + translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]]) + elif direction == 'vertical': + translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]]) + return translate_matrix + + +def imtranslate(img, + offset, + direction='horizontal', + border_value=0, + interpolation='bilinear'): + """Translate an image. + + Args: + img (ndarray): Image to be translated with format + (h, w) or (h, w, c). + offset (int | float): The offset used for translate. + direction (str): The translate direction, either "horizontal" + or "vertical". + border_value (int | tuple[int]): Value used in case of a + constant border. + interpolation (str): Same as :func:`resize`. + + Returns: + ndarray: The translated image. + """ + assert direction in ['horizontal', + 'vertical'], f'Invalid direction: {direction}' + height, width = img.shape[:2] + if img.ndim == 2: + channels = 1 + elif img.ndim == 3: + channels = img.shape[-1] + if isinstance(border_value, int): + border_value = tuple([border_value] * channels) + elif isinstance(border_value, tuple): + assert len(border_value) == channels, \ + 'Expected the num of elements in tuple equals the channels' \ + 'of input image. Found {} vs {}'.format( + len(border_value), channels) + else: + raise ValueError( + f'Invalid type {type(border_value)} for `border_value`.') + translate_matrix = _get_translate_matrix(offset, direction) + translated = cv2.warpAffine( + img, + translate_matrix, + (width, height), + # Note case when the number elements in `border_value` + # greater than 3 (e.g. translating masks whose channels + # large than 3) will raise TypeError in `cv2.warpAffine`. + # Here simply slice the first 3 values in `border_value`. + borderValue=border_value[:3], + flags=cv2_interp_codes[interpolation]) + return translated diff --git a/mmcv/mmcv/image/io.py b/mmcv/mmcv/image/io.py new file mode 100644 index 0000000..ee00dcf --- /dev/null +++ b/mmcv/mmcv/image/io.py @@ -0,0 +1,233 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import io +import os.path as osp +from pathlib import Path + +import cv2 +import numpy as np +from cv2 import IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_UNCHANGED + +from mmcv.utils import check_file_exist, is_str, mkdir_or_exist + +try: + from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG +except ImportError: + TJCS_RGB = TJPF_GRAY = TJPF_BGR = TurboJPEG = None + +try: + from PIL import Image +except ImportError: + Image = None + +jpeg = None +supported_backends = ['cv2', 'turbojpeg', 'pillow'] + +imread_flags = { + 'color': IMREAD_COLOR, + 'grayscale': IMREAD_GRAYSCALE, + 'unchanged': IMREAD_UNCHANGED +} + +imread_backend = 'cv2' + + +def use_backend(backend): + """Select a backend for image decoding. + + Args: + backend (str): The image decoding backend type. Options are `cv2`, + `pillow`, `turbojpeg` (see https://github.com/lilohuang/PyTurboJPEG). + `turbojpeg` is faster but it only supports `.jpeg` file format. + """ + assert backend in supported_backends + global imread_backend + imread_backend = backend + if imread_backend == 'turbojpeg': + if TurboJPEG is None: + raise ImportError('`PyTurboJPEG` is not installed') + global jpeg + if jpeg is None: + jpeg = TurboJPEG() + elif imread_backend == 'pillow': + if Image is None: + raise ImportError('`Pillow` is not installed') + + +def _jpegflag(flag='color', channel_order='bgr'): + channel_order = channel_order.lower() + if channel_order not in ['rgb', 'bgr']: + raise ValueError('channel order must be either "rgb" or "bgr"') + + if flag == 'color': + if channel_order == 'bgr': + return TJPF_BGR + elif channel_order == 'rgb': + return TJCS_RGB + elif flag == 'grayscale': + return TJPF_GRAY + else: + raise ValueError('flag must be "color" or "grayscale"') + + +def _pillow2array(img, flag='color', channel_order='bgr'): + """Convert a pillow image to numpy array. + + Args: + img (:obj:`PIL.Image.Image`): The image loaded using PIL + flag (str): Flags specifying the color type of a loaded image, + candidates are 'color', 'grayscale' and 'unchanged'. + Default to 'color'. + channel_order (str): The channel order of the output image array, + candidates are 'bgr' and 'rgb'. Default to 'bgr'. + + Returns: + np.ndarray: The converted numpy array + """ + channel_order = channel_order.lower() + if channel_order not in ['rgb', 'bgr']: + raise ValueError('channel order must be either "rgb" or "bgr"') + + if flag == 'unchanged': + array = np.array(img) + if array.ndim >= 3 and array.shape[2] >= 3: # color image + array[:, :, :3] = array[:, :, (2, 1, 0)] # RGB to BGR + else: + # If the image mode is not 'RGB', convert it to 'RGB' first. + if img.mode != 'RGB': + if img.mode != 'LA': + # Most formats except 'LA' can be directly converted to RGB + img = img.convert('RGB') + else: + # When the mode is 'LA', the default conversion will fill in + # the canvas with black, which sometimes shadows black objects + # in the foreground. + # + # Therefore, a random color (124, 117, 104) is used for canvas + img_rgba = img.convert('RGBA') + img = Image.new('RGB', img_rgba.size, (124, 117, 104)) + img.paste(img_rgba, mask=img_rgba.split()[3]) # 3 is alpha + if flag == 'color': + array = np.array(img) + if channel_order != 'rgb': + array = array[:, :, ::-1] # RGB to BGR + elif flag == 'grayscale': + img = img.convert('L') + array = np.array(img) + else: + raise ValueError( + 'flag must be "color", "grayscale" or "unchanged", ' + f'but got {flag}') + return array + + +def imread(img_or_path, flag='color', channel_order='bgr', backend=None): + """Read an image. + + Args: + img_or_path (ndarray or str or Path): Either a numpy array or str or + pathlib.Path. If it is a numpy array (loaded image), then + it will be returned as is. + flag (str): Flags specifying the color type of a loaded image, + candidates are `color`, `grayscale` and `unchanged`. + Note that the `turbojpeg` backened does not support `unchanged`. + channel_order (str): Order of channel, candidates are `bgr` and `rgb`. + backend (str | None): The image decoding backend type. Options are + `cv2`, `pillow`, `turbojpeg`, `None`. If backend is None, the + global imread_backend specified by ``mmcv.use_backend()`` will be + used. Default: None. + + Returns: + ndarray: Loaded image array. + """ + + if backend is None: + backend = imread_backend + if backend not in supported_backends: + raise ValueError(f'backend: {backend} is not supported. Supported ' + "backends are 'cv2', 'turbojpeg', 'pillow'") + if isinstance(img_or_path, Path): + img_or_path = str(img_or_path) + + if isinstance(img_or_path, np.ndarray): + return img_or_path + elif is_str(img_or_path): + check_file_exist(img_or_path, + f'img file does not exist: {img_or_path}') + if backend == 'turbojpeg': + with open(img_or_path, 'rb') as in_file: + img = jpeg.decode(in_file.read(), + _jpegflag(flag, channel_order)) + if img.shape[-1] == 1: + img = img[:, :, 0] + return img + elif backend == 'pillow': + img = Image.open(img_or_path) + img = _pillow2array(img, flag, channel_order) + return img + else: + flag = imread_flags[flag] if is_str(flag) else flag + img = cv2.imread(img_or_path, flag) + if flag == IMREAD_COLOR and channel_order == 'rgb': + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) + return img + else: + raise TypeError('"img" must be a numpy array or a str or ' + 'a pathlib.Path object') + + +def imfrombytes(content, flag='color', channel_order='bgr', backend=None): + """Read an image from bytes. + + Args: + content (bytes): Image bytes got from files or other streams. + flag (str): Same as :func:`imread`. + backend (str | None): The image decoding backend type. Options are + `cv2`, `pillow`, `turbojpeg`, `None`. If backend is None, the + global imread_backend specified by ``mmcv.use_backend()`` will be + used. Default: None. + + Returns: + ndarray: Loaded image array. + """ + + if backend is None: + backend = imread_backend + if backend not in supported_backends: + raise ValueError(f'backend: {backend} is not supported. Supported ' + "backends are 'cv2', 'turbojpeg', 'pillow'") + if backend == 'turbojpeg': + img = jpeg.decode(content, _jpegflag(flag, channel_order)) + if img.shape[-1] == 1: + img = img[:, :, 0] + return img + elif backend == 'pillow': + buff = io.BytesIO(content) + img = Image.open(buff) + img = _pillow2array(img, flag, channel_order) + return img + else: + img_np = np.frombuffer(content, np.uint8) + flag = imread_flags[flag] if is_str(flag) else flag + img = cv2.imdecode(img_np, flag) + if flag == IMREAD_COLOR and channel_order == 'rgb': + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) + return img + + +def imwrite(img, file_path, params=None, auto_mkdir=True): + """Write image to file. + + Args: + img (ndarray): Image array to be written. + file_path (str): Image file path. + params (None or list): Same as opencv's :func:`imwrite` interface. + auto_mkdir (bool): If the parent folder of `file_path` does not exist, + whether to create it automatically. + + Returns: + bool: Successful or not. + """ + if auto_mkdir: + dir_name = osp.abspath(osp.dirname(file_path)) + mkdir_or_exist(dir_name) + return cv2.imwrite(file_path, img, params) diff --git a/mmcv/mmcv/image/misc.py b/mmcv/mmcv/image/misc.py new file mode 100644 index 0000000..1e02b95 --- /dev/null +++ b/mmcv/mmcv/image/misc.py @@ -0,0 +1,43 @@ +import numpy as np + +import mmcv + +try: + import torch +except ImportError: + torch = None + + +def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): + """Convert tensor to 3-channel images. + + Args: + tensor (torch.Tensor): Tensor that contains multiple images, shape ( + N, C, H, W). + mean (tuple[float], optional): Mean of images. Defaults to (0, 0, 0). + std (tuple[float], optional): Standard deviation of images. + Defaults to (1, 1, 1). + to_rgb (bool, optional): Whether the tensor was converted to RGB + format in the first place. If so, convert it back to BGR. + Defaults to True. + + Returns: + list[np.ndarray]: A list that contains multiple images. + """ + + if torch is None: + raise RuntimeError('pytorch is not installed') + assert torch.is_tensor(tensor) and tensor.ndim == 4 + assert len(mean) == 3 + assert len(std) == 3 + + num_imgs = tensor.size(0) + mean = np.array(mean, dtype=np.float32) + std = np.array(std, dtype=np.float32) + imgs = [] + for img_id in range(num_imgs): + img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) + img = mmcv.imdenormalize( + img, mean, std, to_bgr=to_rgb).astype(np.uint8) + imgs.append(np.ascontiguousarray(img)) + return imgs diff --git a/mmcv/mmcv/image/photometric.py b/mmcv/mmcv/image/photometric.py new file mode 100644 index 0000000..5323431 --- /dev/null +++ b/mmcv/mmcv/image/photometric.py @@ -0,0 +1,226 @@ +import cv2 +import numpy as np + +from .colorspace import bgr2gray, gray2bgr + + +def imnormalize(img, mean, std, to_rgb=True): + """Normalize an image with mean and std. + + Args: + img (ndarray): Image to be normalized. + mean (ndarray): The mean to be used for normalize. + std (ndarray): The std to be used for normalize. + to_rgb (bool): Whether to convert to rgb. + + Returns: + ndarray: The normalized image. + """ + img = img.copy().astype(np.float32) + return imnormalize_(img, mean, std, to_rgb) + + +def imnormalize_(img, mean, std, to_rgb=True): + """Inplace normalize an image with mean and std. + + Args: + img (ndarray): Image to be normalized. + mean (ndarray): The mean to be used for normalize. + std (ndarray): The std to be used for normalize. + to_rgb (bool): Whether to convert to rgb. + + Returns: + ndarray: The normalized image. + """ + # cv2 inplace normalization does not accept uint8 + assert img.dtype != np.uint8 + mean = np.float64(mean.reshape(1, -1)) + stdinv = 1 / np.float64(std.reshape(1, -1)) + if to_rgb: + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace + cv2.subtract(img, mean, img) # inplace + cv2.multiply(img, stdinv, img) # inplace + return img + + +def imdenormalize(img, mean, std, to_bgr=True): + assert img.dtype != np.uint8 + mean = mean.reshape(1, -1).astype(np.float64) + std = std.reshape(1, -1).astype(np.float64) + img = cv2.multiply(img, std) # make a copy + cv2.add(img, mean, img) # inplace + if to_bgr: + cv2.cvtColor(img, cv2.COLOR_RGB2BGR, img) # inplace + return img + + +def iminvert(img): + """Invert (negate) an image. + + Args: + img (ndarray): Image to be inverted. + + Returns: + ndarray: The inverted image. + """ + return np.full_like(img, 255) - img + + +def solarize(img, thr=128): + """Solarize an image (invert all pixel values above a threshold) + + Args: + img (ndarray): Image to be solarized. + thr (int): Threshold for solarizing (0 - 255). + + Returns: + ndarray: The solarized image. + """ + img = np.where(img < thr, img, 255 - img) + return img + + +def posterize(img, bits): + """Posterize an image (reduce the number of bits for each color channel) + + Args: + img (ndarray): Image to be posterized. + bits (int): Number of bits (1 to 8) to use for posterizing. + + Returns: + ndarray: The posterized image. + """ + shift = 8 - bits + img = np.left_shift(np.right_shift(img, shift), shift) + return img + + +def adjust_color(img, alpha=1, beta=None, gamma=0): + """It blends the source image and its gray image: + + ``output = img * alpha + gray_img * beta + gamma`` + + Args: + img (ndarray): The input source image. + alpha (int | float): Weight for the source image. Default 1. + beta (int | float): Weight for the converted gray image. + If None, it's assigned the value (1 - `alpha`). + gamma (int | float): Scalar added to each sum. + Same as :func:`cv2.addWeighted`. Default 0. + + Returns: + ndarray: Colored image which has the same size and dtype as input. + """ + gray_img = bgr2gray(img) + gray_img = np.tile(gray_img[..., None], [1, 1, 3]) + if beta is None: + beta = 1 - alpha + colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma) + if not colored_img.dtype == np.uint8: + # Note when the dtype of `img` is not defaultly `np.uint8` + # (e.g. np.float32), the value in `colored_img` got from cv2 + # is not guaranteed to be in range [0, 255], so here clip + # is needed. + colored_img = np.clip(colored_img, 0, 255) + return colored_img + + +def imequalize(img): + """Equalize the image histogram. + + This function applies a non-linear mapping to the input image, + in order to create a uniform distribution of grayscale values + in the output image. + + Args: + img (ndarray): Image to be equalized. + + Returns: + ndarray: The equalized image. + """ + + def _scale_channel(im, c): + """Scale the data in the corresponding channel.""" + im = im[:, :, c] + # Compute the histogram of the image channel. + histo = np.histogram(im, 256, (0, 255))[0] + # For computing the step, filter out the nonzeros. + nonzero_histo = histo[histo > 0] + step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255 + if not step: + lut = np.array(range(256)) + else: + # Compute the cumulative sum, shifted by step // 2 + # and then normalized by step. + lut = (np.cumsum(histo) + (step // 2)) // step + # Shift lut, prepending with 0. + lut = np.concatenate([[0], lut[:-1]], 0) + # If step is zero, return the original image. + # Otherwise, index from lut. + return np.where(np.equal(step, 0), im, lut[im]) + + # Scales each channel independently and then stacks + # the result. + s1 = _scale_channel(img, 0) + s2 = _scale_channel(img, 1) + s3 = _scale_channel(img, 2) + equalized_img = np.stack([s1, s2, s3], axis=-1) + return equalized_img + + +def adjust_brightness(img, factor=1.): + """Adjust image brightness. + + This function controls the brightness of an image. An + enhancement factor of 0.0 gives a black image. + A factor of 1.0 gives the original image. This function + blends the source image and the degenerated black image: + + ``output = img * factor + degenerated * (1 - factor)`` + + Args: + img (ndarray): Image to be brightened. + factor (float): A value controls the enhancement. + Factor 1.0 returns the original image, lower + factors mean less color (brightness, contrast, + etc), and higher values more. Default 1. + + Returns: + ndarray: The brightened image. + """ + degenerated = np.zeros_like(img) + # Note manually convert the dtype to np.float32, to + # achieve as close results as PIL.ImageEnhance.Brightness. + # Set beta=1-factor, and gamma=0 + brightened_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + return brightened_img.astype(img.dtype) + + +def adjust_contrast(img, factor=1.): + """Adjust image contrast. + + This function controls the contrast of an image. An + enhancement factor of 0.0 gives a solid grey + image. A factor of 1.0 gives the original image. It + blends the source image and the degenerated mean image: + + ``output = img * factor + degenerated * (1 - factor)`` + + Args: + img (ndarray): Image to be contrasted. BGR order. + factor (float): Same as :func:`mmcv.adjust_brightness`. + + Returns: + ndarray: The contrasted image. + """ + gray_img = bgr2gray(img) + hist = np.histogram(gray_img, 256, (0, 255))[0] + mean = round(np.sum(gray_img) / np.sum(hist)) + degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype) + degenerated = gray2bgr(degenerated) + contrasted_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + return contrasted_img.astype(img.dtype) diff --git a/mmcv/mmcv/model_zoo/deprecated.json b/mmcv/mmcv/model_zoo/deprecated.json new file mode 100644 index 0000000..25cf6f2 --- /dev/null +++ b/mmcv/mmcv/model_zoo/deprecated.json @@ -0,0 +1,6 @@ +{ + "resnet50_caffe": "detectron/resnet50_caffe", + "resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr", + "resnet101_caffe": "detectron/resnet101_caffe", + "resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr" +} diff --git a/mmcv/mmcv/model_zoo/mmcls.json b/mmcv/mmcv/model_zoo/mmcls.json new file mode 100644 index 0000000..822b995 --- /dev/null +++ b/mmcv/mmcv/model_zoo/mmcls.json @@ -0,0 +1,13 @@ +{ + "resnet50_v1d": "https://download.openmmlab.com/mmclassification/v0/imagenet/resnetv1d50_batch256_20200708-1ad0ce94.pth", + "resnet101_v1d": "https://download.openmmlab.com/mmclassification/v0/imagenet/resnetv1d101_batch256_20200708-9cb302ef.pth", + "resnet152_v1d": "https://download.openmmlab.com/mmclassification/v0/imagenet/resnetv1d152_batch256_20200708-e79cb6a2.pth", + "resnext50": "https://download.openmmlab.com/mmclassification/v0/imagenet/resnext50_32x4d_batch256_20200708-c07adbb7.pth", + "resnext101": "https://download.openmmlab.com/mmclassification/v0/imagenet/resnext101_32x8d_batch256_20200708-1ec34aa7.pth", + "resnext152": "https://download.openmmlab.com/mmclassification/v0/imagenet/resnext152_32x4d_batch256_20200708-aab5034c.pth", + "se-resnet50": "https://download.openmmlab.com/mmclassification/v0/imagenet/se-resnet50_batch256_20200804-ae206104.pth", + "se-resnet101": "https://download.openmmlab.com/mmclassification/v0/imagenet/se-resnet101_batch256_20200804-ba5b51d4.pth", + "shufflenet_v1": "https://download.openmmlab.com/mmclassification/v0/imagenet/shufflenet_v1_batch1024_20200804-5d6cec73.pth", + "shufflenet_v2": "https://download.openmmlab.com/mmclassification/v0/imagenet/shufflenet_v2_batch1024_20200812-5bf4721e.pth", + "mobilenet_v2": "https://download.openmmlab.com/mmclassification/v0/imagenet/mobilenet_v2_batch256_20200708-3b2dc3af.pth" +} diff --git a/mmcv/mmcv/model_zoo/open_mmlab.json b/mmcv/mmcv/model_zoo/open_mmlab.json new file mode 100644 index 0000000..c0e975d --- /dev/null +++ b/mmcv/mmcv/model_zoo/open_mmlab.json @@ -0,0 +1,46 @@ +{ + "vgg16_caffe": "https://download.openmmlab.com/pretrain/third_party/vgg16_caffe-292e1171.pth", + "detectron/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_caffe-788b5fa3.pth", + "detectron2/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_msra-5891d200.pth", + "detectron/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_caffe-3ad79236.pth", + "detectron2/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_msra-6cc46731.pth", + "detectron2/resnext101_32x8d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x8d-1516f1aa.pth", + "resnext50_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext50-32x4d-0ab1a123.pth", + "resnext101_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d-a5af3160.pth", + "resnext101_64x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_64x4d-ee2c6f71.pth", + "contrib/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_thangvubk-ad1730dd.pth", + "detectron/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn-9186a21c.pth", + "detectron/resnet101_gn": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn-cac0ab98.pth", + "jhu/resnet50_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_ws-15beedd8.pth", + "jhu/resnet101_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn_ws-3e3c308c.pth", + "jhu/resnext50_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn_ws-0d87ac85.pth", + "jhu/resnext101_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn_ws-34ac1a9e.pth", + "jhu/resnext50_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn-c7e8b754.pth", + "jhu/resnext101_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn-ac3bb84e.pth", + "msra/hrnetv2_w18_small": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18_small-b5a04e21.pth", + "msra/hrnetv2_w18": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18-00eb2006.pth", + "msra/hrnetv2_w32": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w32-dc9eeb4f.pth", + "msra/hrnetv2_w40": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w40-ed0b031c.pth", + "msra/hrnetv2_w48": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w48-d2186c55.pth", + "bninception_caffe": "https://download.openmmlab.com/pretrain/third_party/bn_inception_caffe-ed2e8665.pth", + "kin400/i3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/i3d_r50_f32s2_k400-2c57e077.pth", + "kin400/nl3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/nl3d_r50_f32s2_k400-fa7e7caa.pth", + "res2net101_v1d_26w_4s": "https://download.openmmlab.com/pretrain/third_party/res2net101_v1d_26w_4s_mmdetv2-f0a600f9.pth", + "regnetx_400mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_400mf-a5b10d96.pth", + "regnetx_800mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_800mf-1f4be4c7.pth", + "regnetx_1.6gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_1.6gf-5791c176.pth", + "regnetx_3.2gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_3.2gf-c2599b0f.pth", + "regnetx_4.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_4.0gf-a88f671e.pth", + "regnetx_6.4gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_6.4gf-006af45d.pth", + "regnetx_8.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_8.0gf-3c68abe7.pth", + "regnetx_12gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth", + "resnet50_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet50_v1c-2cccc1ad.pth", + "resnet101_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth", + "mmedit/vgg16": "https://download.openmmlab.com/mmediting/third_party/vgg_state_dict.pth", + "mmedit/res34_en_nomixup": "https://download.openmmlab.com/mmediting/third_party/model_best_resnet34_En_nomixup.pth", + "mmedit/mobilenet_v2": "https://download.openmmlab.com/mmediting/third_party/mobilenet_v2.pth", + "resnest50": "https://download.openmmlab.com/pretrain/third_party/resnest50_d2-7497a55b.pth", + "resnest101": "https://download.openmmlab.com/pretrain/third_party/resnest101_d2-f3b931b2.pth", + "resnest200": "https://download.openmmlab.com/pretrain/third_party/resnest200_d2-ca88e41f.pth", + "darknet53": "https://download.openmmlab.com/pretrain/third_party/darknet53-a628ea1b.pth" +} diff --git a/mmcv/mmcv/onnx/__init__.py b/mmcv/mmcv/onnx/__init__.py new file mode 100644 index 0000000..1122c5f --- /dev/null +++ b/mmcv/mmcv/onnx/__init__.py @@ -0,0 +1,3 @@ +from .symbolic import register_extra_symbolics + +__all__ = ['register_extra_symbolics'] diff --git a/mmcv/mmcv/onnx/onnx_utils/__init__.py b/mmcv/mmcv/onnx/onnx_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mmcv/mmcv/onnx/onnx_utils/symbolic_helper.py b/mmcv/mmcv/onnx/onnx_utils/symbolic_helper.py new file mode 100644 index 0000000..4e76b66 --- /dev/null +++ b/mmcv/mmcv/onnx/onnx_utils/symbolic_helper.py @@ -0,0 +1,312 @@ +"""Modified from https://github.com/pytorch/pytorch.""" +import warnings +from functools import wraps +from sys import maxsize + +import torch +import torch.onnx +# This import monkey-patches graph manipulation methods on Graph, used for the +# ONNX symbolics +import torch.onnx.utils +from torch._C import ListType + +# --------------------------------------------------------------------------------- +# Helper functions +# --------------------------------------------------------------------------------- + +# Save some builtins as locals, because we'll shadown them below +_sum = sum + + +def _parse_arg(value, desc): + if desc == 'none': + return value + if desc == 'v' or not _is_value(value): + return value + if value.node().mustBeNone(): + return None + if value.node().kind() == 'onnx::Constant': + tval = value.node()['value'] + if desc == 'i': + return int(tval) + elif desc == 'f': + return float(tval) + elif desc == 'b': + return bool(tval) + elif desc == 's': + return str(tval) + elif desc == 't': + return tval + elif desc == 'is': + return [int(v) for v in tval] + else: + raise RuntimeError( + "ONNX symbolic doesn't know to interpret Constant node") + elif value.node().kind() == 'prim::ListConstruct': + if desc == 'is': + for v in value.node().inputs(): + if v.node().kind() != 'onnx::Constant': + raise RuntimeError( + "Failed to export an ONNX attribute '" + + v.node().kind() + + "', since it's not constant, please try to make " + 'things (e.g., kernel size) static if possible') + return [int(v.node()['value']) for v in value.node().inputs()] + else: + raise RuntimeError( + "ONNX symbolic doesn't know to interpret ListConstruct node") + + raise RuntimeError('Unexpected node type: {}'.format(value.node().kind())) + + +def _maybe_get_const(value, desc): + if _is_value(value) and value.node().kind() == 'onnx::Constant': + return _parse_arg(value, desc) + return value + + +def _maybe_get_scalar(value): + value_t = _maybe_get_const(value, 't') + if isinstance(value_t, torch.Tensor) and value_t.shape == (): + return value_t + return value + + +def _get_const(value, desc, arg_name): + if _is_value(value) and value.node().kind() not in ('onnx::Constant', + 'prim::Constant'): + raise RuntimeError('ONNX symbolic expected a constant' + ' value of the {} argument, got `{}`'.format( + arg_name, value)) + return _parse_arg(value, desc) + + +def _unpack_list(list_value): + list_node = list_value.node() + assert list_node.kind() == 'prim::ListConstruct' + return list(list_node.inputs()) + + +# Check if list_value is output from prim::ListConstruct +# This is usually called before _unpack_list to ensure the list can be +# unpacked. +def _is_packed_list(list_value): + return _is_value( + list_value) and list_value.node().kind() == 'prim::ListConstruct' + + +def parse_args(*arg_descriptors): + + def decorator(fn): + fn._arg_descriptors = arg_descriptors + + def wrapper(g, *args): + # some args may be optional, so the length may be smaller + assert len(arg_descriptors) >= len(args) + args = [ + _parse_arg(arg, arg_desc) + for arg, arg_desc in zip(args, arg_descriptors) + ] + return fn(g, *args) + + # In Python 2 functools.wraps chokes on partially applied functions, so + # we need this as a workaround + try: + wrapper = wraps(fn)(wrapper) + except Exception: + pass + return wrapper + + return decorator + + +def _scalar(x): + """Convert a scalar tensor into a Python value.""" + assert x.numel() == 1 + return x.item() + + +def _if_scalar_type_as(g, self, tensor): + """Convert self into the same type of tensor, as necessary.""" + if isinstance(self, torch._C.Value): + return self + + scalar_type = tensor.type().scalarType() + if scalar_type: + ty = scalar_type.lower() + return getattr(self, ty)() + + return self + + +def _is_none(x): + return x.node().mustBeNone() + + +def _is_value(x): + return isinstance(x, torch._C.Value) + + +def _is_tensor_list(x): + return x.type().isSubtypeOf(ListType.ofTensors()) + + +def _unimplemented(op, msg): + warnings.warn('ONNX export failed on ' + op + ' because ' + msg + + ' not supported') + + +def _try_get_scalar_type(*args): + for arg in args: + try: + return arg.type().scalarType() + except RuntimeError: + pass + return None + + +def _topk_helper(g, input, k, dim, largest=True, sorted=False, out=None): + if out is not None: + _unimplemented('TopK', 'Out parameter is not supported') + if not _is_value(k): + k = g.op('Constant', value_t=torch.tensor([k], dtype=torch.int64)) + else: + k = g.op('Reshape', k, g.op('Constant', value_t=torch.tensor([1]))) + return g.op( + 'TopK', + input, + k, + axis_i=dim, + largest_i=largest, + sorted_i=sorted, + outputs=2) + + +def _slice_helper(g, + input, + axes, + starts, + ends, + steps=None, + dynamic_slice=False): + # TODO(ruobing): add support for opset<10 + from torch.onnx.symbolic_opset10 import _slice + return _slice(g, input, axes, starts, ends, steps, dynamic_slice) + + +def _unsqueeze_helper(g, input, dim): + from torch.onnx.symbolic_opset9 import unsqueeze + return unsqueeze(g, input, dim) + + +def _interpolate_size_to_scales(g, input, output_size, dim): + output_size = _maybe_get_const(output_size, 'is') + if _is_value(output_size): + offset = 2 + offsets = g.op( + 'Constant', value_t=torch.ones(offset, dtype=torch.float32)) + dividend = g.op( + 'Cast', output_size, to_i=cast_pytorch_to_onnx['Float']) + divisor = _slice_helper( + g, g.op('Shape', input), axes=[0], ends=[maxsize], starts=[offset]) + divisor = g.op('Cast', divisor, to_i=cast_pytorch_to_onnx['Float']) + scale_dims = g.op('Div', dividend, divisor) + scales = g.op('Concat', offsets, scale_dims, axis_i=0) + else: + scales_constant = [ + 1. if i < 2 else float(output_size[-(dim - i)]) / + float(input.type().sizes()[-(dim - i)]) for i in range(0, dim) + ] + scales = g.op( + 'Constant', + value_t=torch.tensor(scales_constant, dtype=torch.float32)) + return scales + + +def _interpolate_get_scales_if_available(g, scales): + if len(scales) == 0: + return None + available_scales = _maybe_get_const(scales[0], 'f') != -1 and not _is_none( + scales[0]) + + if not available_scales: + return None + + scales_list = [] + for scale in scales: + unsqueezed_scale = _unsqueeze_helper(g, scale, 0) + # ONNX only supports float for the scales. double -> float. + unsqueezed_scale = g.op( + 'Cast', unsqueezed_scale, to_i=cast_pytorch_to_onnx['Float']) + scales_list.append(unsqueezed_scale) + offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32)) + scales = g.op('Concat', offsets, *scales_list, axis_i=0) + return scales + + +def _get_interpolate_attributes(g, mode, args): + if mode == 'nearest': + align_corners = None + scales = args[0:] + else: + align_corners = args[0] + scales = args[1:] + scales = _interpolate_get_scales_if_available(g, scales) + return scales, align_corners + + +def _interpolate_get_scales(g, scale_factor, dim): + offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32)) + if isinstance(scale_factor.type(), torch._C.ListType): + return g.op('Concat', offsets, scale_factor, axis_i=0) + else: + scale_factor = _unsqueeze_helper(g, scale_factor, 0) + scale_factor = g.op( + 'Cast', scale_factor, to_i=cast_pytorch_to_onnx['Float']) + scales = [scale_factor for i in range(dim - 2)] + scale_factor = g.op('Concat', offsets, *scales, axis_i=0) + return scale_factor + + +def _size_helper(g, self, dim): + full_shape = g.op('Shape', self) + from torch.onnx.symbolic_opset9 import select + return select(g, full_shape, g.op('Constant', value_t=torch.tensor([0])), + dim) + + +def _avgpool_helper(tuple_fn, padding, kernel_size, stride, divisor_override, + name): + if divisor_override and divisor_override.node().kind() != 'prim::Constant': + return _unimplemented(name, 'divisor_override') + if not stride: + stride = kernel_size + padding = tuple(tuple_fn(padding)) + return padding + + +# Metaprogram symbolics for each ATen native specialized cast operator. +# For e.g. we specify a function named `_cast_uint8_t` that instantiates an +# ONNX cast node with `to` attribute 'UINT8' +# +# TODO: remove these once we support Type's in the JIT IR and we can once again +# use the unified toType operator +cast_pytorch_to_onnx = { + 'Byte': torch.onnx.TensorProtoDataType.UINT8, + 'Char': torch.onnx.TensorProtoDataType.INT8, + 'Double': torch.onnx.TensorProtoDataType.DOUBLE, + 'Float': torch.onnx.TensorProtoDataType.FLOAT, + 'Half': torch.onnx.TensorProtoDataType.FLOAT16, + 'Int': torch.onnx.TensorProtoDataType.INT32, + 'Long': torch.onnx.TensorProtoDataType.INT64, + 'Short': torch.onnx.TensorProtoDataType.INT16, + 'Bool': torch.onnx.TensorProtoDataType.BOOL, + 'ComplexFloat': torch.onnx.TensorProtoDataType.COMPLEX64, + 'ComplexDouble': torch.onnx.TensorProtoDataType.COMPLEX128, + 'Undefined': torch.onnx.TensorProtoDataType.UNDEFINED, +} + +# Global set to store the list of quantized operators in the network. +# This is currently only used in the conversion of quantized ops from PT +# -> C2 via ONNX. +_quantized_ops = set() diff --git a/mmcv/mmcv/onnx/symbolic.py b/mmcv/mmcv/onnx/symbolic.py new file mode 100644 index 0000000..f6b61d9 --- /dev/null +++ b/mmcv/mmcv/onnx/symbolic.py @@ -0,0 +1,389 @@ +"""Modified from https://github.com/pytorch/pytorch.""" +import os + +import numpy as np +import torch +from torch.nn.modules.utils import _pair, _single, _triple +from torch.onnx.symbolic_helper import parse_args +from torch.onnx.symbolic_registry import register_op + +from .onnx_utils import symbolic_helper as sym_help + + +def _interpolate(name, dim, interpolate_mode): + + def symbolic_fn(g, input, output_size, *args): + scales, align_corners = sym_help._get_interpolate_attributes( + g, interpolate_mode, args) + align_corners = sym_help._maybe_get_scalar(align_corners) + transformation_mode = 'asymmetric' \ + if interpolate_mode == 'nearest' \ + else 'align_corners' if align_corners else 'pytorch_half_pixel' + empty_tensor = g.op( + 'Constant', value_t=torch.tensor([], dtype=torch.float32)) + + if scales is None: + if 'ONNX_BACKEND' in os.environ and os.environ[ + 'ONNX_BACKEND'] == 'TensorRT': + input_size = input.type().sizes() + # slice the first two dim + input_size = input_size[:2] + # convert output_size to int type + output_size = sym_help._maybe_get_const(output_size, 'is') + input_size.extend(output_size) + output_size = g.op( + 'Constant', + value_t=torch.tensor(input_size, dtype=torch.int64)) + else: + input_size = g.op('Shape', input) + input_size_beg = sym_help._slice_helper( + g, input_size, axes=[0], ends=[2], starts=[0]) + output_size = g.op( + 'Cast', + output_size, + to_i=sym_help.cast_pytorch_to_onnx['Long']) + output_size = g.op( + 'Concat', input_size_beg, output_size, axis_i=0) + scales = g.op( + 'Constant', value_t=torch.tensor([], dtype=torch.float32)) + return g.op( + 'Resize', + input, + empty_tensor, + # roi only takes effect whith + # coordinate_transformation_mode="tf_crop_and_resize" + scales, # scales is not needed since we are sending out_size + output_size, + coordinate_transformation_mode_s=transformation_mode, + cubic_coeff_a_f=-0.75, # only valid when mode="cubic" + mode_s=interpolate_mode, # nearest, linear, or cubic + nearest_mode_s='floor') # only valid when mode="nearest" + else: + return g.op( + 'Resize', + input, + empty_tensor, + # roi only takes effect with + # coordinate_transformation_mode="tf_crop_and_resize" + scales, # scales is not needed since we are sending out_size + coordinate_transformation_mode_s=transformation_mode, + cubic_coeff_a_f=-0.75, # only valid when mode="cubic" + mode_s=interpolate_mode, # nearest, linear, or cubic + nearest_mode_s='floor') # only valid when mode="nearest" + + return symbolic_fn + + +upsample_nearest1d = _interpolate('upsample_nearest1d', 3, 'nearest') +upsample_nearest2d = _interpolate('upsample_nearest2d', 4, 'nearest') +upsample_nearest3d = _interpolate('upsample_nearest3d', 5, 'nearest') +upsample_linear1d = _interpolate('upsample_linear1d', 3, 'linear') +upsample_bilinear2d = _interpolate('upsample_bilinear2d', 4, 'linear') +upsample_trilinear3d = _interpolate('upsample_trilinear3d', 5, 'linear') +upsample_bicubic2d = _interpolate('upsample_bicubic2d', 4, 'cubic') + + +@parse_args('v', 'v', 'i', 'i', 'i', 'none') +def topk(g, self, k, dim, largest, sorted, out=None): + return sym_help._topk_helper( + g, self, k, dim, largest=largest, sorted=sorted, out=out) + + +def masked_select(g, self, mask): + from torch.onnx.symbolic_opset9 import nonzero, expand_as + index = nonzero(g, expand_as(g, mask, self)) + return g.op('GatherND', self, index) + + +def _prepare_onnx_paddings(g, dim, pad): + pad_len = torch.onnx.symbolic_opset9.size( + g, pad, g.op('Constant', value_t=torch.tensor([0]))) + # Set extension = [0] * (dim * 2 - len(pad)) + extension = g.op( + 'Sub', + g.op('Mul', + g.op('Constant', value_t=torch.tensor(dim, dtype=torch.int64)), + g.op('Constant', value_t=torch.tensor(2, dtype=torch.int64))), + pad_len) + pad = g.op('Cast', pad, to_i=sym_help.cast_pytorch_to_onnx['Long']) + paddings = g.op( + 'Concat', + pad, + g.op( + 'ConstantOfShape', + extension, + value_t=torch.tensor([0], dtype=torch.int64)), + axis_i=0) + paddings = g.op('Reshape', paddings, + g.op('Constant', value_t=torch.tensor([-1, 2]))) + paddings = g.op( + 'Transpose', + torch.onnx.symbolic_opset10.flip(g, paddings, [0]), + perm_i=[1, 0]) + paddings = g.op('Reshape', paddings, + g.op('Constant', value_t=torch.tensor([-1]))) + padding_c = g.op( + 'Cast', paddings, to_i=sym_help.cast_pytorch_to_onnx['Long']) + return padding_c + + +def constant_pad_nd(g, input, padding, value=None): + mode = 'constant' + value = sym_help._maybe_get_scalar(value) + value = sym_help._if_scalar_type_as(g, value, input) + pad = _prepare_onnx_paddings(g, input.type().dim(), padding) + return g.op('Pad', input, pad, value, mode_s=mode) + + +def reflection_pad(g, input, padding): + mode = 'reflect' + paddings = _prepare_onnx_paddings(g, input.type().dim(), padding) + return g.op('Pad', input, paddings, mode_s=mode) + + +reflection_pad1d = reflection_pad +reflection_pad2d = reflection_pad +reflection_pad3d = reflection_pad + + +def _avg_pool(name, tuple_fn): + + @parse_args('v', 'is', 'is', 'is', 'i', 'i', 'none') + def symbolic_fn(g, + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + divisor_override=None): + padding = sym_help._avgpool_helper(tuple_fn, padding, kernel_size, + stride, divisor_override, name) + if not stride: + stride = kernel_size + if count_include_pad: + input = g.op( + 'Pad', + input, + g.op( + 'Constant', + value_t=torch.tensor(((0, ) * 2 + padding) * 2)), + mode_s='constant') + padding = (0, ) * len(padding) + output = g.op( + 'AveragePool', + input, + kernel_shape_i=tuple_fn(kernel_size), + strides_i=tuple_fn(stride), + pads_i=padding * 2, + ceil_mode_i=ceil_mode) + return output + + return symbolic_fn + + +avg_pool1d = _avg_pool('avg_pool1d', _single) +avg_pool2d = _avg_pool('avg_pool2d', _pair) +avg_pool3d = _avg_pool('avg_pool3d', _triple) + + +def _get_im2col_indices_along_dim(g, input_d, kernel_size_d, dilation_d, + padding_d, stride_d): + # Input is always 4-D (N, C, H, W) + # Calculate indices of sliding blocks along spatial dimension + # Slide kernel over input each dim d: + # each dimension d ranges from 0 to + # input[d]+2xpadding[d]-dilation[d]x(kernel_size[d]-1) + # with steps = stride + + blocks_d = g.op('Add', input_d, + g.op('Constant', value_t=torch.tensor(padding_d * 2))) + blocks_d = g.op( + 'Sub', blocks_d, + g.op( + 'Constant', + value_t=torch.tensor(dilation_d * (kernel_size_d - 1)))) + + # Stride kernel over input and find starting indices along dim d + blocks_d_indices = g.op('Range', g.op('Constant', value_t=torch.tensor(0)), + blocks_d, + g.op('Constant', value_t=torch.tensor(stride_d))) + + # Apply dilation on kernel and find its indices along dim d + kernel_grid = np.arange(0, kernel_size_d * dilation_d, dilation_d) + kernel_grid = g.op('Constant', value_t=torch.tensor([kernel_grid])) + + # Broadcast and add kernel staring positions (indices) with + # kernel_grid along dim d, to get block indices along dim d + blocks_d_indices = g.op( + 'Unsqueeze', blocks_d_indices, axes_i=[0]) # Reshape to [1, -1] + kernel_mask = g.op('Reshape', kernel_grid, + g.op('Constant', value_t=torch.tensor([-1, 1]))) + block_mask = g.op('Add', blocks_d_indices, kernel_mask) + + return block_mask + + +def _get_im2col_padded_input(g, input, padding_h, padding_w): + # Input is always 4-D tensor (N, C, H, W) + # Padding tensor has the following format: (padding_h, padding_w) + # Reshape the padding to follow ONNX format: + # (dim1_begin, dim2_begin,...,dim1_end, dim2_end,...) + pad = g.op( + 'Constant', value_t=torch.LongTensor([0, 0, padding_h, padding_w] * 2)) + return g.op('Pad', input, pad) + + +def _get_im2col_output_shape(g, input, kernel_h, kernel_w): + batch_dim = size(g, input, g.op('Constant', value_t=torch.tensor(0))) + channel_dim = size(g, input, g.op('Constant', value_t=torch.tensor(1))) + channel_unfolded = g.op( + 'Mul', channel_dim, + g.op('Constant', value_t=torch.tensor(kernel_h * kernel_w))) + + return g.op( + 'Concat', + g.op('Unsqueeze', batch_dim, axes_i=[0]), + g.op('Unsqueeze', channel_unfolded, axes_i=[0]), + g.op('Constant', value_t=torch.tensor([-1])), + axis_i=0) + + +def size(g, self, dim=None): + if dim is None: + return g.op('Shape', self) + return sym_help._size_helper(g, self, dim) + + +@parse_args('v', 'is', 'is', 'is', 'is') +def im2col(g, input, kernel_size, dilation, padding, stride): + # Input is always 4-D tensor (N, C, H, W) + # All other args are int[2] + + input_h = size(g, input, g.op('Constant', value_t=torch.tensor(2))) + input_w = size(g, input, g.op('Constant', value_t=torch.tensor(3))) + + stride_h, stride_w = stride[0], stride[1] + padding_h, padding_w = padding[0], padding[1] + dilation_h, dilation_w = dilation[0], dilation[1] + kernel_h, kernel_w = kernel_size[0], kernel_size[1] + + blocks_row_indices = _get_im2col_indices_along_dim(g, input_h, kernel_h, + dilation_h, padding_h, + stride_h) + blocks_col_indices = _get_im2col_indices_along_dim(g, input_w, kernel_w, + dilation_w, padding_w, + stride_w) + + output_shape = _get_im2col_output_shape(g, input, kernel_h, kernel_w) + padded_input = _get_im2col_padded_input(g, input, padding_h, padding_w) + + output = g.op('Gather', padded_input, blocks_row_indices, axis_i=2) + output = g.op('Gather', output, blocks_col_indices, axis_i=4) + output = g.op('Transpose', output, perm_i=[0, 1, 2, 4, 3, 5]) + return g.op('Reshape', output, output_shape) + + +@parse_args('v', 'i') +def one_hot(g, self, num_classes): + values = g.op('Constant', value_t=torch.LongTensor([0, 1])) + depth = g.op('Constant', value_t=torch.LongTensor([num_classes])) + return g.op('OneHot', self, depth, values, axis_i=-1) + + +@parse_args('v', 'i', 'none') +def softmax(g, input, dim, dtype=None): + input_dim = input.type().dim() + if input_dim: + # TODO: remove this as onnx opset 11 spec allows negative axes + if dim < 0: + dim = input_dim + dim + if input_dim == dim + 1: + softmax = g.op('Softmax', input, axis_i=dim) + if dtype and dtype.node().kind() != 'prim::Constant': + parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype') + softmax = g.op( + 'Cast', + softmax, + to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) + return softmax + + max_value = g.op('ReduceMax', input, axes_i=[dim], keepdims_i=1) + input = g.op('Sub', input, max_value) + exp = g.op('Exp', input) + sum = g.op('ReduceSum', exp, axes_i=[dim]) + softmax = g.op('Div', exp, sum) + if dtype and dtype.node().kind() != 'prim::Constant': + parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype') + softmax = g.op( + 'Cast', softmax, to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) + return softmax + + +def _adaptive_pool(name, type, tuple_fn, fn=None): + + @parse_args('v', 'is') + def symbolic_fn(g, input, output_size): + if output_size == [1] * len(output_size) and type == 'AveragePool': + return g.op('GlobalAveragePool', input) + if not input.isCompleteTensor(): + if output_size == [1] * len(output_size): + return g.op('GlobalMaxPool', input), None + raise NotImplementedError( + '[Adaptive pool]:input size not accessible') + dim = input.type().sizes()[2:] + if output_size == [1] * len(output_size) and type == 'MaxPool': + return g.op('GlobalMaxPool', input), None + + # compute stride = floor(input_size / output_size) + s = [int(dim[i] / output_size[i]) for i in range(0, len(dim))] + + # compute kernel_size = input_size - (output_size - 1) * stride + k = [dim[i] - (output_size[i] - 1) * s[i] for i in range(0, len(dim))] + + # call max_poolxd_with_indices to get indices in the output + if type == 'MaxPool': + return fn(g, input, k, k, (0, ) * len(dim), (1, ) * len(dim), + False) + output = g.op( + type, + input, + kernel_shape_i=tuple_fn(k), + strides_i=tuple_fn(s), + ceil_mode_i=False) + return output + + return symbolic_fn + + +adaptive_avg_pool1d = _adaptive_pool('adaptive_avg_pool1d', 'AveragePool', + _single) +adaptive_avg_pool2d = _adaptive_pool('adaptive_avg_pool2d', 'AveragePool', + _pair) +adaptive_avg_pool3d = _adaptive_pool('adaptive_avg_pool3d', 'AveragePool', + _triple) + + +def register_extra_symbolics(opset=11): + register_op('one_hot', one_hot, '', opset) + register_op('im2col', im2col, '', opset) + register_op('topk', topk, '', opset) + register_op('softmax', softmax, '', opset) + register_op('constant_pad_nd', constant_pad_nd, '', opset) + register_op('reflection_pad1d', reflection_pad1d, '', opset) + register_op('reflection_pad2d', reflection_pad2d, '', opset) + register_op('reflection_pad3d', reflection_pad3d, '', opset) + register_op('avg_pool1d', avg_pool1d, '', opset) + register_op('avg_pool2d', avg_pool2d, '', opset) + register_op('avg_pool3d', avg_pool3d, '', opset) + register_op('adaptive_avg_pool1d', adaptive_avg_pool1d, '', opset) + register_op('adaptive_avg_pool2d', adaptive_avg_pool2d, '', opset) + register_op('adaptive_avg_pool3d', adaptive_avg_pool3d, '', opset) + register_op('masked_select', masked_select, '', opset) + register_op('upsample_nearest1d', upsample_nearest1d, '', opset) + register_op('upsample_nearest2d', upsample_nearest2d, '', opset) + register_op('upsample_nearest3d', upsample_nearest3d, '', opset) + register_op('upsample_linear1d', upsample_linear1d, '', opset) + register_op('upsample_bilinear2d', upsample_bilinear2d, '', opset) + register_op('upsample_trilinear3d', upsample_trilinear3d, '', opset) + register_op('upsample_bicubic2d', upsample_bicubic2d, '', opset) diff --git a/mmcv/mmcv/ops/__init__.py b/mmcv/mmcv/ops/__init__.py new file mode 100644 index 0000000..2b83dba --- /dev/null +++ b/mmcv/mmcv/ops/__init__.py @@ -0,0 +1,42 @@ +from .bbox import bbox_overlaps +from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive +from .cc_attention import CrissCrossAttention +from .corner_pool import CornerPool +from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d +from .deform_roi_pool import (DeformRoIPool, DeformRoIPoolPack, + ModulatedDeformRoIPoolPack, deform_roi_pool) +from .deprecated_wrappers import Conv2d_deprecated as Conv2d +from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d +from .deprecated_wrappers import Linear_deprecated as Linear +from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d +from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss, + sigmoid_focal_loss, softmax_focal_loss) +from .info import get_compiler_version, get_compiling_cuda_version +from .masked_conv import MaskedConv2d, masked_conv2d +from .modulated_deform_conv import (ModulatedDeformConv2d, + ModulatedDeformConv2dPack, + modulated_deform_conv2d) +from .nms import batched_nms, nms, nms_match, soft_nms +from .point_sample import (SimpleRoIAlign, point_sample, + rel_roi_point_to_rel_img_point) +from .psa_mask import PSAMask +from .roi_align import RoIAlign, roi_align +from .roi_pool import RoIPool, roi_pool +from .saconv import SAConv2d +from .sync_bn import SyncBatchNorm +from .tin_shift import TINShift, tin_shift + +__all__ = [ + 'bbox_overlaps', 'CARAFE', 'CARAFENaive', 'CARAFEPack', 'carafe', + 'carafe_naive', 'CornerPool', 'DeformConv2d', 'DeformConv2dPack', + 'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack', + 'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss', + 'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss', + 'get_compiler_version', 'get_compiling_cuda_version', 'MaskedConv2d', + 'masked_conv2d', 'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack', + 'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match', + 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d', + 'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask', + 'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign', + 'SAConv2d', 'TINShift', 'tin_shift' +] diff --git a/mmcv/mmcv/ops/bbox.py b/mmcv/mmcv/ops/bbox.py new file mode 100644 index 0000000..06bd10e --- /dev/null +++ b/mmcv/mmcv/ops/bbox.py @@ -0,0 +1,71 @@ +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps']) + + +def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): + """Calculate overlap between two set of bboxes. + + If ``aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Args: + bboxes1 (Tensor): shape (m, 4) in format or empty. + bboxes2 (Tensor): shape (n, 4) in format or empty. + If aligned is ``True``, then m and n must be equal. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (m, n) if aligned == False else shape (m, 1) + + Example: + >>> bboxes1 = torch.FloatTensor([ + >>> [0, 0, 10, 10], + >>> [10, 10, 20, 20], + >>> [32, 32, 38, 42], + >>> ]) + >>> bboxes2 = torch.FloatTensor([ + >>> [0, 0, 10, 20], + >>> [0, 10, 10, 19], + >>> [10, 10, 20, 20], + >>> ]) + >>> bbox_overlaps(bboxes1, bboxes2) + tensor([[0.5000, 0.0000, 0.0000], + [0.0000, 0.0000, 1.0000], + [0.0000, 0.0000, 0.0000]]) + + Example: + >>> empty = torch.FloatTensor([]) + >>> nonempty = torch.FloatTensor([ + >>> [0, 0, 10, 9], + >>> ]) + >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1) + >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0) + >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0) + """ + + mode_dict = {'iou': 0, 'iof': 1} + assert mode in mode_dict.keys() + mode_flag = mode_dict[mode] + # Either the boxes are empty or the length of boxes's last dimenstion is 4 + assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0) + assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0) + assert offset == 1 or offset == 0 + + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if aligned: + assert rows == cols + + if rows * cols == 0: + return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols) + + if aligned: + ious = bboxes1.new_zeros(rows) + else: + ious = bboxes1.new_zeros((rows, cols)) + ext_module.bbox_overlaps( + bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset) + return ious diff --git a/mmcv/mmcv/ops/carafe.py b/mmcv/mmcv/ops/carafe.py new file mode 100644 index 0000000..35befb4 --- /dev/null +++ b/mmcv/mmcv/ops/carafe.py @@ -0,0 +1,284 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Function +from torch.nn.modules.module import Module + +from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward', + 'carafe_backward' +]) + + +class CARAFENaiveFunction(Function): + + @staticmethod + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): + return g.op( + 'MMCVCARAFENaive', + features, + masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + @staticmethod + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): + assert scale_factor >= 1 + assert masks.size(1) == kernel_size * kernel_size * group_size + assert masks.size(-1) == features.size(-1) * scale_factor + assert masks.size(-2) == features.size(-2) * scale_factor + assert features.size(1) % group_size == 0 + assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 + ctx.kernel_size = kernel_size + ctx.group_size = group_size + ctx.scale_factor = scale_factor + ctx.feature_size = features.size() + ctx.mask_size = masks.size() + + n, c, h, w = features.size() + output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) + ext_module.carafe_naive_forward( + features, + masks, + output, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + if features.requires_grad or masks.requires_grad: + ctx.save_for_backward(features, masks) + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + + features, masks = ctx.saved_tensors + kernel_size = ctx.kernel_size + group_size = ctx.group_size + scale_factor = ctx.scale_factor + + grad_input = torch.zeros_like(features) + grad_masks = torch.zeros_like(masks) + ext_module.carafe_naive_backward( + grad_output.contiguous(), + features, + masks, + grad_input, + grad_masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + return grad_input, grad_masks, None, None, None + + +carafe_naive = CARAFENaiveFunction.apply + + +class CARAFENaive(Module): + + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFENaive, self).__init__() + + assert isinstance(kernel_size, int) and isinstance( + group_size, int) and isinstance(scale_factor, int) + self.kernel_size = kernel_size + self.group_size = group_size + self.scale_factor = scale_factor + + def forward(self, features, masks): + return carafe_naive(features, masks, self.kernel_size, self.group_size, + self.scale_factor) + + +class CARAFEFunction(Function): + + @staticmethod + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): + return g.op( + 'MMCVCARAFE', + features, + masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + @staticmethod + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): + assert scale_factor >= 1 + assert masks.size(1) == kernel_size * kernel_size * group_size + assert masks.size(-1) == features.size(-1) * scale_factor + assert masks.size(-2) == features.size(-2) * scale_factor + assert features.size(1) % group_size == 0 + assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 + ctx.kernel_size = kernel_size + ctx.group_size = group_size + ctx.scale_factor = scale_factor + ctx.feature_size = features.size() + ctx.mask_size = masks.size() + + n, c, h, w = features.size() + output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) + routput = features.new_zeros(output.size(), requires_grad=False) + rfeatures = features.new_zeros(features.size(), requires_grad=False) + rmasks = masks.new_zeros(masks.size(), requires_grad=False) + ext_module.carafe_forward( + features, + masks, + rfeatures, + routput, + rmasks, + output, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + if features.requires_grad or masks.requires_grad: + ctx.save_for_backward(features, masks, rfeatures) + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + + features, masks, rfeatures = ctx.saved_tensors + kernel_size = ctx.kernel_size + group_size = ctx.group_size + scale_factor = ctx.scale_factor + + rgrad_output = torch.zeros_like(grad_output, requires_grad=False) + rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False) + rgrad_input = torch.zeros_like(features, requires_grad=False) + rgrad_masks = torch.zeros_like(masks, requires_grad=False) + grad_input = torch.zeros_like(features, requires_grad=False) + grad_masks = torch.zeros_like(masks, requires_grad=False) + ext_module.carafe_backward( + grad_output.contiguous(), + rfeatures, + masks, + rgrad_output, + rgrad_input_hs, + rgrad_input, + rgrad_masks, + grad_input, + grad_masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + return grad_input, grad_masks, None, None, None + + +carafe = CARAFEFunction.apply + + +class CARAFE(Module): + """ CARAFE: Content-Aware ReAssembly of FEatures + + Please refer to https://arxiv.org/abs/1905.02188 for more details. + + Args: + kernel_size (int): reassemble kernel size + group_size (int): reassemble group size + scale_factor (int): upsample ratio + + Returns: + upsampled feature map + """ + + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFE, self).__init__() + + assert isinstance(kernel_size, int) and isinstance( + group_size, int) and isinstance(scale_factor, int) + self.kernel_size = kernel_size + self.group_size = group_size + self.scale_factor = scale_factor + + def forward(self, features, masks): + return carafe(features, masks, self.kernel_size, self.group_size, + self.scale_factor) + + +@UPSAMPLE_LAYERS.register_module(name='carafe') +class CARAFEPack(nn.Module): + """A unified package of CARAFE upsampler that contains: 1) channel + compressor 2) content encoder 3) CARAFE op. + + Official implementation of ICCV 2019 paper + CARAFE: Content-Aware ReAssembly of FEatures + Please refer to https://arxiv.org/abs/1905.02188 for more details. + + Args: + channels (int): input feature channels + scale_factor (int): upsample ratio + up_kernel (int): kernel size of CARAFE op + up_group (int): group size of CARAFE op + encoder_kernel (int): kernel size of content encoder + encoder_dilation (int): dilation of content encoder + compressed_channels (int): output channels of channels compressor + + Returns: + upsampled feature map + """ + + def __init__(self, + channels, + scale_factor, + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64): + super(CARAFEPack, self).__init__() + self.channels = channels + self.scale_factor = scale_factor + self.up_kernel = up_kernel + self.up_group = up_group + self.encoder_kernel = encoder_kernel + self.encoder_dilation = encoder_dilation + self.compressed_channels = compressed_channels + self.channel_compressor = nn.Conv2d(channels, self.compressed_channels, + 1) + self.content_encoder = nn.Conv2d( + self.compressed_channels, + self.up_kernel * self.up_kernel * self.up_group * + self.scale_factor * self.scale_factor, + self.encoder_kernel, + padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2), + dilation=self.encoder_dilation, + groups=1) + self.init_weights() + + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + normal_init(self.content_encoder, std=0.001) + + def kernel_normalizer(self, mask): + mask = F.pixel_shuffle(mask, self.scale_factor) + n, mask_c, h, w = mask.size() + mask_channel = int(mask_c / (self.up_kernel * self.up_kernel)) + mask = mask.view(n, mask_channel, -1, h, w) + + mask = F.softmax(mask, dim=2) + mask = mask.view(n, mask_c, h, w).contiguous() + + return mask + + def feature_reassemble(self, x, mask): + x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor) + return x + + def forward(self, x): + compressed_x = self.channel_compressor(x) + mask = self.content_encoder(compressed_x) + mask = self.kernel_normalizer(mask) + + x = self.feature_reassemble(x, mask) + return x diff --git a/mmcv/mmcv/ops/cc_attention.py b/mmcv/mmcv/ops/cc_attention.py new file mode 100644 index 0000000..6f59d29 --- /dev/null +++ b/mmcv/mmcv/ops/cc_attention.py @@ -0,0 +1,95 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd.function import once_differentiable + +from mmcv.cnn import Scale +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['ca_forward', 'ca_backward', 'ca_map_forward', 'ca_map_backward']) + + +class CAWeightFunction(torch.autograd.Function): + + @staticmethod + def symbolic(g, t, f): + return g.op('MMCVCAWeight', t, f) + + @staticmethod + def forward(ctx, t, f): + n, c, h, w = t.size() + weight = torch.zeros(n, h + w - 1, h, w).to(t.device) + ext_module.ca_forward(t, f, weight) + + ctx.save_for_backward(t, f) + + return weight + + @staticmethod + @once_differentiable + def backward(ctx, dw): + t, f = ctx.saved_tensors + dt = torch.zeros_like(t) + df = torch.zeros_like(f) + ext_module.ca_backward(dw, t, f, dt, df) + return dt, df + + +class CAMapFunction(torch.autograd.Function): + + @staticmethod + def symbolic(g, weight, v): + return g.op('MMCVCAMap', weight, v) + + @staticmethod + def forward(ctx, weight, v): + out = torch.zeros_like(v) + ext_module.ca_map_forward(weight, v, out) + + ctx.save_for_backward(weight, v) + + return out + + @staticmethod + @once_differentiable + def backward(ctx, dout): + weight, v = ctx.saved_tensors + dw = torch.zeros_like(weight) + dv = torch.zeros_like(v) + ext_module.ca_map_backward(dout, weight, v, dw, dv) + + return dw, dv + + +ca_weight = CAWeightFunction.apply +ca_map = CAMapFunction.apply + + +class CrissCrossAttention(nn.Module): + """Criss-Cross Attention Module.""" + + def __init__(self, in_channels): + super(CrissCrossAttention, self).__init__() + self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1) + self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1) + self.value_conv = nn.Conv2d(in_channels, in_channels, 1) + self.gamma = Scale(0.) + self.in_channels = in_channels + + def forward(self, x): + proj_query = self.query_conv(x) + proj_key = self.key_conv(x) + proj_value = self.value_conv(x) + + energy = ca_weight(proj_query, proj_key) + attention = F.softmax(energy, 1) + out = ca_map(attention, proj_value) + out = self.gamma(out) + x + + return out + + def __repr__(self): + s = self.__class__.__name__ + s += f'(in_channels={self.in_channels})' + return s diff --git a/mmcv/mmcv/ops/corner_pool.py b/mmcv/mmcv/ops/corner_pool.py new file mode 100644 index 0000000..6b0d871 --- /dev/null +++ b/mmcv/mmcv/ops/corner_pool.py @@ -0,0 +1,125 @@ +import torch +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward', + 'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward', + 'right_pool_forward', 'right_pool_backward' +]) + + +class TopPoolFunction(Function): + + @staticmethod + def forward(ctx, input): + output = ext_module.top_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.top_pool_backward(input, grad_output) + return output + + +class BottomPoolFunction(Function): + + @staticmethod + def forward(ctx, input): + output = ext_module.bottom_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.bottom_pool_backward(input, grad_output) + return output + + +class LeftPoolFunction(Function): + + @staticmethod + def forward(ctx, input): + output = ext_module.left_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.left_pool_backward(input, grad_output) + return output + + +class RightPoolFunction(Function): + + @staticmethod + def forward(ctx, input): + output = ext_module.right_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.right_pool_backward(input, grad_output) + return output + + +class CornerPool(nn.Module): + """Corner Pooling. + + Corner Pooling is a new type of pooling layer that helps a + convolutional network better localize corners of bounding boxes. + + Please refer to https://arxiv.org/abs/1808.01244 for more details. + Code is modified from https://github.com/princeton-vl/CornerNet-Lite. + + Args: + mode(str): Pooling orientation for the pooling layer + + - 'bottom': Bottom Pooling + - 'left': Left Pooling + - 'right': Right Pooling + - 'top': Top Pooling + + Returns: + Feature map after pooling. + """ + + pool_functions = { + 'bottom': BottomPoolFunction, + 'left': LeftPoolFunction, + 'right': RightPoolFunction, + 'top': TopPoolFunction, + } + + cummax_dim_flip = { + 'bottom': (2, False), + 'left': (3, True), + 'right': (3, False), + 'top': (2, True), + } + + def __init__(self, mode): + super(CornerPool, self).__init__() + assert mode in self.pool_functions + self.mode = mode + self.corner_pool = self.pool_functions[mode] + + def forward(self, x): + if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0': + dim, flip = self.cummax_dim_flip[self.mode] + if flip: + x = x.flip(dim) + pool_tensor, _ = torch.cummax(x, dim=dim) + if flip: + pool_tensor = pool_tensor.flip(dim) + return pool_tensor + else: + return self.corner_pool.apply(x) diff --git a/mmcv/mmcv/ops/csrc/bbox_overlaps_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/bbox_overlaps_cuda_kernel.cuh new file mode 100644 index 0000000..e5fccab --- /dev/null +++ b/mmcv/mmcv/ops/csrc/bbox_overlaps_cuda_kernel.cuh @@ -0,0 +1,83 @@ +#ifndef BBOX_OVERLAPS_CUDA_KERNEL_CUH +#define BBOX_OVERLAPS_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +template +__global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2, + T* ious, const int num_bbox1, + const int num_bbox2, const int mode, + const bool aligned, + const int offset) { + if (aligned) { + CUDA_1D_KERNEL_LOOP(index, num_bbox1) { + int b1 = index; + int b2 = index; + + int base1 = b1 * 4; + T b1_x1 = bbox1[base1]; + T b1_y1 = bbox1[base1 + 1]; + T b1_x2 = bbox1[base1 + 2]; + T b1_y2 = bbox1[base1 + 3]; + T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset); + + int base2 = b2 * 4; + T b2_x1 = bbox2[base2]; + T b2_y1 = bbox2[base2 + 1]; + T b2_x2 = bbox2[base2 + 2]; + T b2_y2 = bbox2[base2 + 3]; + T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset); + + T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2); + T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2); + T width = fmaxf(right - left + offset, 0.f); + T height = fmaxf(bottom - top + offset, 0.f); + T interS = width * height; + T baseS = 1.0; + if (mode == 0) { + baseS = fmaxf(b1_area + b2_area - interS, T(offset)); + } else if (mode == 1) { + baseS = fmaxf(b1_area, T(offset)); + } + ious[index] = interS / baseS; + } + } else { + CUDA_1D_KERNEL_LOOP(index, num_bbox1 * num_bbox2) { + int b1 = index / num_bbox2; + int b2 = index % num_bbox2; + + int base1 = b1 * 4; + T b1_x1 = bbox1[base1]; + T b1_y1 = bbox1[base1 + 1]; + T b1_x2 = bbox1[base1 + 2]; + T b1_y2 = bbox1[base1 + 3]; + T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset); + + int base2 = b2 * 4; + T b2_x1 = bbox2[base2]; + T b2_y1 = bbox2[base2 + 1]; + T b2_x2 = bbox2[base2 + 2]; + T b2_y2 = bbox2[base2 + 3]; + T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset); + + T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2); + T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2); + T width = fmaxf(right - left + offset, 0.f); + T height = fmaxf(bottom - top + offset, 0.f); + T interS = width * height; + T baseS = 1.0; + if (mode == 0) { + baseS = fmaxf(b1_area + b2_area - interS, T(offset)); + } else if (mode == 1) { + baseS = fmaxf(b1_area, T(offset)); + } + ious[index] = interS / baseS; + } + } +} + +#endif // BBOX_OVERLAPS_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/carafe_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/carafe_cuda_kernel.cuh new file mode 100644 index 0000000..e9b569d --- /dev/null +++ b/mmcv/mmcv/ops/csrc/carafe_cuda_kernel.cuh @@ -0,0 +1,314 @@ +#ifndef CARAFE_CUDA_KERNEL_CUH +#define CARAFE_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +#define WARP_SIZE 32 +#define THREADS_PER_PIXEL 32 +#define MAX_SHARED_MEMORY 49152 +#define MAX_SHARED_SCALAR_T 6144 // 49152 / 8 = 6144 +#define MAXIMIZE_KERNEL_SIZE true +#define kTileDim 32 +#define kBlockRows 8 +#define FULL_MASK 0xffffffff + +inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); } + +__device__ inline int Loc2Index(const int n, const int c, const int h, + const int w, const int channel_num, + const int height, const int width) { + int index = w + (h + (c + n * channel_num) * height) * width; + return index; +} +/* TODO: move this to a common place */ +template +__device__ inline scalar_t min(scalar_t a, scalar_t b) { + return a < b ? a : b; +} + +template +__device__ inline scalar_t max(scalar_t a, scalar_t b) { + return a > b ? a : b; +} + +template +__device__ __forceinline__ scalar_t warpReduceSum(scalar_t val) { + for (int offset = 16; offset > 0; offset /= 2) + val += __shfl_down_sync(FULL_MASK, val, offset); + return val; +} + +template <> +__device__ __forceinline__ phalf warpReduceSum(phalf val) { + for (int offset = 16; offset > 0; offset /= 2) + __PHALF(val) += + __shfl_down_sync(FULL_MASK, static_cast<__half>(__PHALF(val)), offset); + return val; +} + +// Splits the original matrix into submatrices with size 32 * 32. +// Each block transposes one submatrix by loading it into shared memory. +// Reference https://devblogs.nvidia.com/efficient-matrix-transpose-cuda-cc/ +template +__global__ void BatchTranspose2DCUDAKernel(const int N, const int H, + const int W, const int dh, + const int dw, + const scalar_t *__restrict__ X, + scalar_t *__restrict__ Y) { + __shared__ scalar_t tile[kTileDim][kTileDim + 1]; + const int n = blockIdx.x / (dh * dw); + const int k = blockIdx.x % (dh * dw); + const int r = k / dw; + const int c = k % dw; + const int offset = n * H * W; + int x = c * kTileDim + threadIdx.x; + int y = r * kTileDim + threadIdx.y; + if (x < W) { + for (int i = 0; threadIdx.y + i < kTileDim && y + i < H; i += kBlockRows) { + tile[threadIdx.y + i][threadIdx.x] = X[offset + (y + i) * W + x]; + } + } + __syncthreads(); + x = r * kTileDim + threadIdx.x; + y = c * kTileDim + threadIdx.y; + if (x < H) { + for (int i = 0; threadIdx.y + i < kTileDim && y + i < W; i += kBlockRows) { + Y[offset + (y + i) * H + x] = tile[threadIdx.x][threadIdx.y + i]; + } + } +} +template +__global__ void CARAFEForward( + const int num_kernels, const scalar_t *__restrict__ bottom_data, + const scalar_t *__restrict__ bottom_masks, const int kernel_size, + const int group_size, const int scale_factor, const int channels, + const int down_height, const int down_width, const int height, + const int width, const int mask_channels, scalar_t *__restrict__ top_data) { +#if MAXIMIZE_KERNEL_SIZE + __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2]; +#else + __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T]; +#endif + + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index > num_kernels - 1) { + return; + } + const int pixel_id = threadIdx.x / THREADS_PER_PIXEL; + const int split_id = threadIdx.x % THREADS_PER_PIXEL; + index = index / THREADS_PER_PIXEL; + const int pw = index % width; + const int ph = (index / width) % height; + const int n = index / width / height; + + const int down_pw = pw / scale_factor; + const int down_ph = ph / scale_factor; + + const int start_w = down_pw - (kernel_size - 1) / 2; + const int end_w = down_pw + (kernel_size - 1) / 2 + 1; + const int start_h = down_ph - (kernel_size - 1) / 2; + const int end_h = down_ph + (kernel_size - 1) / 2 + 1; + for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) { + int mask_index = Loc2Index(n, ph, pw, c, height, width, mask_channels); + shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index]; + } + __syncthreads(); + + const int channels_per_group = ceilf(channels / (float)group_size); +#pragma unroll + for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) { + int mask_group = c / channels_per_group; + scalar_t output_val = 0; +#pragma unroll + for (int iy = start_h; iy < end_h; iy++) { +#pragma unroll + for (int ix = start_w; ix < end_w; ix++) { + if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) { + continue; + } + int mask_iy = iy - down_ph + (kernel_size - 1) / 2; + int mask_ix = ix - down_pw + (kernel_size - 1) / 2; + int mask_c = + (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; + int feat_index = + Loc2Index(n, iy, ix, c, down_height, down_width, channels); + + output_val += bottom_data[feat_index] * + shared_mask[mask_c * WARP_SIZE + pixel_id]; + } + } + + int top_index = Loc2Index(n, ph, pw, c, height, width, channels); + top_data[top_index] = output_val; + } +} + +template +__global__ void CARAFEBackward_Feature( + const int num_kernels, const scalar_t *__restrict__ top_diff, + const scalar_t *__restrict__ bottom_masks, const int kernel_size, + const int group_size, const int scale_factor, const int channels, + const int down_height, const int down_width, const int height, + const int width, const int mask_channels, + scalar_t *__restrict__ bottom_diff) { +#if MAXIMIZE_KERNEL_SIZE + __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2]; +#else + __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T]; +#endif + + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index > num_kernels - 1) { + return; + } + + const int pixel_id = threadIdx.x / THREADS_PER_PIXEL; + const int split_id = threadIdx.x % THREADS_PER_PIXEL; + // (n, c, ph, pw) is an element in the bottom_data + index = index / THREADS_PER_PIXEL; + const int pw = index % width; + const int ph = (index / width) % height; + const int n = index / width / height; + + const int start_w = pw - (kernel_size - 1) * scale_factor / 2; + const int end_w = pw + (kernel_size - 1) * scale_factor / 2 + 1; + const int start_h = ph - (kernel_size - 1) * scale_factor / 2; + const int end_h = ph + (kernel_size - 1) * scale_factor / 2 + 1; + for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) { + const int mask_w = (c % kernel_size) * scale_factor; + const int mask_h = (c / kernel_size % kernel_size) * scale_factor; + const int mask_x = start_w + mask_w; + const int mask_y = start_h + mask_h; + if (mask_y < 0 || mask_y > height - 1 || mask_x < 0 || mask_x > width - 1) { + shared_mask[c * WARP_SIZE + pixel_id] = 0; + continue; + } + const int mask_group = c / (kernel_size * kernel_size); + const int mask_c = (2 * mask_group + 1) * kernel_size * kernel_size - c - 1; + int mask_index = + Loc2Index(n, mask_c, mask_y, mask_x, mask_channels, height, width); + shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index]; + } + __syncthreads(); + const int channels_per_group = ceilf(channels / (float)group_size); +#pragma unroll + for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) { + int mask_group = c / channels_per_group; + int top_index = Loc2Index(n, ph, pw, c, height, width, channels); + scalar_t output_val = 0; +#pragma unroll + for (int iy = start_h; iy < end_h; iy += scale_factor) { +#pragma unroll + for (int ix = start_w; ix < end_w; ix += scale_factor) { + if (iy < 0 || iy > height - 1 || ix < 0 || ix > width - 1) { + continue; + } + int mask_iy = + (iy - ph + (kernel_size - 1) * scale_factor / 2) / scale_factor; + int mask_ix = + (ix - pw + (kernel_size - 1) * scale_factor / 2) / scale_factor; + int mask_c = + (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; + int feat_index = Loc2Index(n, iy, ix, c, height, width, channels); + output_val += + shared_mask[mask_c * WARP_SIZE + pixel_id] * top_diff[feat_index]; + } + } + bottom_diff[top_index] = output_val; + } +} + +template +__global__ void FeatureSum(const int num_kernels, + const scalar_t *__restrict__ input_data, + const int scale_factor, const int channels, + const int height, const int width, + scalar_t *__restrict__ output_data) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index > num_kernels - 1) { + return; + } + const int split_id = threadIdx.x % THREADS_PER_PIXEL; + index = index / THREADS_PER_PIXEL; + const int pw = index % width; + const int ph = (index / width) % height; + const int n = index / width / height; + for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) { + scalar_t output_val = 0; + for (int iy = ph * scale_factor; iy < (ph + 1) * scale_factor; iy++) { + for (int ix = pw * scale_factor; ix < (pw + 1) * scale_factor; ix++) { + int input_id = Loc2Index(n, iy, ix, c, height * scale_factor, + width * scale_factor, channels); + output_val += input_data[input_id]; + } + } + const int output_id = Loc2Index(n, ph, pw, c, height, width, channels); + output_data[output_id] = output_val; + } +} + +template +__global__ void CARAFEBackward_Mask(const int num_kernels, + const scalar_t *__restrict__ top_diff, + const scalar_t *__restrict__ bottom_data, + const int kernel_size, const int group_size, + const int scale_factor, const int channels, + const int down_height, const int down_width, + const int height, const int width, + const int mask_channels, + scalar_t *__restrict__ mask_diff) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index > num_kernels - 1) { + return; + } + + const int lane_id = index % WARP_SIZE; + index = index / WARP_SIZE; + const int mask_c = index % mask_channels; + // (n, c, ph, pw) is an element in the bottom_data + index = index / mask_channels; + const int pw = index % width; + const int ph = (index / width) % height; + const int n = index / width / height; + + const int down_pw = pw / scale_factor; + const int down_ph = ph / scale_factor; + + const int mask_group = mask_c / (kernel_size * kernel_size); + const int mask_loc = mask_c % (kernel_size * kernel_size); + + const int offset_x = mask_loc % kernel_size - (kernel_size - 1) / 2; + const int offset_y = + mask_loc / kernel_size % kernel_size - (kernel_size - 1) / 2; + + const int down_x = down_pw + offset_x; + const int down_y = down_ph + offset_y; + + scalar_t output_val = 0; + + if (down_y >= 0 && down_y <= down_height - 1 && down_x >= 0 && + down_x <= down_width - 1) { + const int channels_per_mask = ceilf(channels / (float)group_size); + const int start = channels_per_mask * mask_group; + const int end = min(channels_per_mask * (mask_group + 1), channels); + for (int c = start + lane_id; c < end; c += WARP_SIZE) { + int bottom_id = + Loc2Index(n, down_y, down_x, c, down_height, down_width, channels); + int top_id = Loc2Index(n, ph, pw, c, height, width, channels); + output_val += top_diff[top_id] * bottom_data[bottom_id]; + } + } + __syncwarp(); + output_val = warpReduceSum(output_val); + if (lane_id == 0) { + const int mask_id = + Loc2Index(n, ph, pw, mask_c, height, width, mask_channels); + mask_diff[mask_id] = output_val; + } +} + +#endif // CARAFE_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/carafe_naive_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/carafe_naive_cuda_kernel.cuh new file mode 100644 index 0000000..6f37516 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/carafe_naive_cuda_kernel.cuh @@ -0,0 +1,110 @@ +#ifndef CARAFE_NAIVE_CUDA_KERNEL_CUH +#define CARAFE_NAIVE_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +__device__ inline int Loc2Index(const int n, const int c, const int h, + const int w, const int channel_num, + const int height, const int width) { + int index = w + (h + (c + n * channel_num) * height) * width; + return index; +} + +template +__global__ void carafe_naive_forward_cuda_kernel( + const int nthreads, const scalar_t *bottom_data, + const scalar_t *bottom_masks, scalar_t *top_data, const int kernel_size, + const int group_size, const int scale_factor, const int channels, + const int height, const int width) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the bottom_data + int pw = index % width; + int ph = (index / width) % height; + int c = (index / width / height) % channels; + int n = index / width / height / channels; + + int mask_channels = kernel_size * kernel_size * group_size; + int mask_group = c / (channels / group_size); + + int down_pw = pw / scale_factor; + int down_ph = ph / scale_factor; + int down_width = width / scale_factor; + int down_height = height / scale_factor; + int start_w = down_pw - (kernel_size - 1) / 2; + int end_w = down_pw + (kernel_size - 1) / 2 + 1; + int start_h = down_ph - (kernel_size - 1) / 2; + int end_h = down_ph + (kernel_size - 1) / 2 + 1; + + scalar_t output_val = 0; + for (int iy = start_h; iy < end_h; iy++) { + for (int ix = start_w; ix < end_w; ix++) { + if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) { + continue; + } + int mask_iy = iy - down_ph + (kernel_size - 1) / 2; + int mask_ix = ix - down_pw + (kernel_size - 1) / 2; + int mask_c = + (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; + int feat_index = + Loc2Index(n, c, iy, ix, channels, down_height, down_width); + int mask_index = + Loc2Index(n, mask_c, ph, pw, mask_channels, height, width); + output_val += bottom_data[feat_index] * bottom_masks[mask_index]; + } + } + top_data[index] = output_val; + } +} + +template +__global__ void carafe_naive_backward_cuda_kernel( + const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_data, + const scalar_t *bottom_masks, scalar_t *bottom_diff, scalar_t *mask_diff, + const int kernel_size, const int group_size, const int scale_factor, + const int channels, const int height, const int width) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the bottom_data + int pw = index % width; + int ph = (index / width) % height; + int c = (index / width / height) % channels; + int n = index / width / height / channels; + + int mask_channels = kernel_size * kernel_size * group_size; + int mask_group = c / (channels / group_size); + + int down_pw = pw / scale_factor; + int down_ph = ph / scale_factor; + int down_width = width / scale_factor; + int down_height = height / scale_factor; + int start_w = down_pw - (kernel_size - 1) / 2; + int end_w = down_pw + (kernel_size - 1) / 2 + 1; + int start_h = down_ph - (kernel_size - 1) / 2; + int end_h = down_ph + (kernel_size - 1) / 2 + 1; + + for (int iy = start_h; iy < end_h; iy++) { + for (int ix = start_w; ix < end_w; ix++) { + if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) { + continue; + } + int mask_iy = iy - down_ph + (kernel_size - 1) / 2; + int mask_ix = ix - down_pw + (kernel_size - 1) / 2; + int mask_c = + (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix; + int feat_index = + Loc2Index(n, c, iy, ix, channels, down_height, down_width); + int mask_index = + Loc2Index(n, mask_c, ph, pw, mask_channels, height, width); + atomicAdd(bottom_diff + feat_index, + bottom_masks[mask_index] * top_diff[index]); + atomicAdd(mask_diff + mask_index, + bottom_data[feat_index] * top_diff[index]); + } + } + } +} + +#endif // CARAFE_NAIVE_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/cc_attention_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/cc_attention_cuda_kernel.cuh new file mode 100644 index 0000000..0dd9c33 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/cc_attention_cuda_kernel.cuh @@ -0,0 +1,185 @@ +#ifndef CC_ATTENTION_CUDA_KERNEL_CUH +#define CC_ATTENTION_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +template +__global__ void ca_forward_kernel(const T *t, const T *f, T *weight, int num, + int chn, int height, int width) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int sp = height * width; + int len = height + width - 1; + int z = blockIdx.z; + + if (x < width && y < height && z < height + width - 1) { + for (int batch = 0; batch < num; ++batch) { + for (int plane = 0; plane < chn; ++plane) { + T _t = t[(batch * chn + plane) * sp + y * width + x]; + + if (z < width) { + int i = z; + T _f = f[(batch * chn + plane) * sp + y * width + i]; + weight[(batch * len + i) * sp + y * width + x] += _t * _f; + } else { + int i = z - width; + int j = i < y ? i : i + 1; + + T _f = f[(batch * chn + plane) * sp + j * width + x]; + weight[(batch * len + width + i) * sp + y * width + x] += _t * _f; + } + } + } + } +} + +template +__global__ void ca_backward_kernel_t(const T *dw, const T *t, const T *f, T *dt, + int num, int chn, int height, int width) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int sp = height * width; + int len = height + width - 1; + int plane = blockIdx.z; + + if (x < width && y < height && plane < chn) { + for (int batch = 0; batch < num; ++batch) { + for (int i = 0; i < width; ++i) { + T _dw = dw[(batch * len + i) * sp + y * width + x]; + T _f = f[(batch * chn + plane) * sp + y * width + i]; + dt[(batch * chn + plane) * sp + y * width + x] += _dw * _f; + } + for (int i = 0; i < height; ++i) { + if (i == y) continue; + int j = i < y ? i : i - 1; + + T _dw = dw[(batch * len + width + j) * sp + y * width + x]; + T _f = f[(batch * chn + plane) * sp + i * width + x]; + dt[(batch * chn + plane) * sp + y * width + x] += _dw * _f; + } + } + } +} + +template +__global__ void ca_backward_kernel_f(const T *dw, const T *t, const T *f, T *df, + int num, int chn, int height, int width) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int sp = height * width; + int len = height + width - 1; + int plane = blockIdx.z; + + if (x < width && y < height && plane < chn) { + for (int batch = 0; batch < num; ++batch) { + for (int i = 0; i < width; ++i) { + T _dw = dw[(batch * len + x) * sp + y * width + i]; + T _t = t[(batch * chn + plane) * sp + y * width + i]; + df[(batch * chn + plane) * sp + y * width + x] += _dw * _t; + } + for (int i = 0; i < height; ++i) { + if (i == y) continue; + int j = i > y ? y : y - 1; + + T _dw = dw[(batch * len + width + j) * sp + i * width + x]; + T _t = t[(batch * chn + plane) * sp + i * width + x]; + df[(batch * chn + plane) * sp + y * width + x] += _dw * _t; + } + } + } +} + +template +__global__ void ca_map_forward_kernel(const T *weight, const T *g, T *out, + int num, int chn, int height, int width) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int sp = height * width; + int len = height + width - 1; + int plane = blockIdx.z; + + if (x < width && y < height && plane < chn) { + for (int batch = 0; batch < num; ++batch) { + for (int i = 0; i < width; ++i) { + T _g = g[(batch * chn + plane) * sp + y * width + i]; + T _w = weight[(batch * len + i) * sp + y * width + x]; + out[(batch * chn + plane) * sp + y * width + x] += _g * _w; + } + for (int i = 0; i < height; ++i) { + if (i == y) continue; + + int j = i < y ? i : i - 1; + + T _g = g[(batch * chn + plane) * sp + i * width + x]; + T _w = weight[(batch * len + width + j) * sp + y * width + x]; + out[(batch * chn + plane) * sp + y * width + x] += _g * _w; + } + } + } +} + +template +__global__ void ca_map_backward_kernel_w(const T *dout, const T *weight, + const T *g, T *dw, int num, int chn, + int height, int width) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int sp = height * width; + int len = height + width - 1; + int z = blockIdx.z; + + if (x < width && y < height && z < height + width - 1) { + for (int batch = 0; batch < num; ++batch) { + for (int plane = 0; plane < chn; ++plane) { + T _dout = dout[(batch * chn + plane) * sp + y * width + x]; + + if (z < width) { + int i = z; + T _g = g[(batch * chn + plane) * sp + y * width + i]; + dw[(batch * len + i) * sp + y * width + x] += _dout * _g; + } else { + int i = z - width; + int j = i < y ? i : i + 1; + + T _g = g[(batch * chn + plane) * sp + j * width + x]; + dw[(batch * len + width + i) * sp + y * width + x] += _dout * _g; + } + } + } + } +} + +template +__global__ void ca_map_backward_kernel_g(const T *dout, const T *weight, + const T *g, T *dg, int num, int chn, + int height, int width) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int sp = height * width; + int len = height + width - 1; + int plane = blockIdx.z; + + if (x < width && y < height && plane < chn) { + for (int batch = 0; batch < num; ++batch) { + for (int i = 0; i < width; ++i) { + T _dout = dout[(batch * chn + plane) * sp + y * width + i]; + T _w = weight[(batch * len + x) * sp + y * width + i]; + dg[(batch * chn + plane) * sp + y * width + x] += _dout * _w; + } + for (int i = 0; i < height; ++i) { + if (i == y) continue; + int j = i > y ? y : y - 1; + + T _dout = dout[(batch * chn + plane) * sp + i * width + x]; + T _w = weight[(batch * len + width + j) * sp + i * width + x]; + dg[(batch * chn + plane) * sp + y * width + x] += _dout * _w; + } + } + } +} + +#endif // CC_ATTENTION_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/common_cuda_helper.hpp b/mmcv/mmcv/ops/csrc/common_cuda_helper.hpp new file mode 100644 index 0000000..a9ab6e8 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/common_cuda_helper.hpp @@ -0,0 +1,110 @@ +#ifndef COMMON_CUDA_HELPER +#define COMMON_CUDA_HELPER + +#include + +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ + i += blockDim.x * gridDim.x) + +#define THREADS_PER_BLOCK 512 + +inline int GET_BLOCKS(const int N) { + int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + int max_block_num = 4096; + return min(optimal_block_num, max_block_num); +} + +template +__device__ T bilinear_interpolate(const T* input, const int height, + const int width, T y, T x, + const int index /* index for debug only*/) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) return 0; + + if (y <= 0) y = 0; + if (x <= 0) x = 0; + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + // do bilinear interpolation + T v1 = input[y_low * width + x_low]; + T v2 = input[y_low * width + x_high]; + T v3 = input[y_high * width + x_low]; + T v4 = input[y_high * width + x_high]; + T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + return val; +} + +template +__device__ void bilinear_interpolate_gradient( + const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4, + int& x_low, int& x_high, int& y_low, int& y_high, + const int index /* index for debug only*/) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + w1 = w2 = w3 = w4 = 0.; + x_low = x_high = y_low = y_high = -1; + return; + } + + if (y <= 0) y = 0; + if (x <= 0) x = 0; + + y_low = (int)y; + x_low = (int)x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + + // reference in forward + // T v1 = input[y_low * width + x_low]; + // T v2 = input[y_low * width + x_high]; + // T v3 = input[y_high * width + x_low]; + // T v4 = input[y_high * width + x_high]; + // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + return; +} +#endif // COMMON_CUDA_HELPER diff --git a/mmcv/mmcv/ops/csrc/deform_conv_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/deform_conv_cuda_kernel.cuh new file mode 100644 index 0000000..b6ddf34 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/deform_conv_cuda_kernel.cuh @@ -0,0 +1,362 @@ +/*! + ******************* BEGIN Caffe Copyright Notice and Disclaimer + ***************** + * + * COPYRIGHT + * + * All contributions by the University of California: + * Copyright (c) 2014-2017 The Regents of the University of California (Regents) + * All rights reserved. + * + * All other contributions: + * Copyright (c) 2014-2017, the respective contributors + * All rights reserved. + * + * Caffe uses a shared copyright model: each contributor holds copyright over + * their contributions to Caffe. The project versioning records all such + * contribution and copyright details. If a contributor wants to further mark + * their specific copyright on a particular contribution, they should indicate + * their copyright solely in the commit message of the change when it is + * committed. + * + * LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + *this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * CONTRIBUTION AGREEMENT + * + * By contributing to the BVLC/caffe repository through pull-request, comment, + * or otherwise, the contributor releases their content to the + * license and copyright terms herein. + * + ***************** END Caffe Copyright Notice and Disclaimer + ********************* + * + * Copyright (c) 2018 Microsoft + * Licensed under The MIT License [see LICENSE for details] + * \file modulated_deformable_im2col.cuh + * \brief Function definitions of converting an image to + * column matrix based on kernel, padding, dilation, and offset. + * These functions are mainly used in deformable convolution operators. + * \ref: https://arxiv.org/abs/1703.06211 + * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng + */ + +// modified from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu + +#ifndef DEFORM_CONV_CUDA_KERNEL_CUH +#define DEFORM_CONV_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +template +__device__ T deformable_im2col_bilinear(const T *input, const int data_width, + const int height, const int width, T h, + T w) { + if (h <= -1 || height <= h || w <= -1 || width <= w) { + return 0; + } + + int h_low = floor(h); + int w_low = floor(w); + int h_high = h_low + 1; + int w_high = w_low + 1; + + T lh = h - h_low; + T lw = w - w_low; + T hh = 1 - lh, hw = 1 - lw; + + T v1 = 0; + if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low]; + T v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + v2 = input[h_low * data_width + w_high]; + T v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + v3 = input[h_high * data_width + w_low]; + T v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + v4 = input[h_high * data_width + w_high]; + + T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + +template +__device__ T get_gradient_weight(T argmax_h, T argmax_w, const int h, + const int w, const int height, + const int width) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + T weight = 0; + if (h == argmax_h_low && w == argmax_w_low) + weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); + if (h == argmax_h_low && w == argmax_w_high) + weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); + if (h == argmax_h_high && w == argmax_w_low) + weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); + if (h == argmax_h_high && w == argmax_w_high) + weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); + return weight; +} + +template +__device__ T get_coordinate_weight(T argmax_h, T argmax_w, const int height, + const int width, const T *im_data, + const int data_width, const int bp_dir) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + T weight = 0; + + if (bp_dir == 0) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += -1 * (argmax_w - argmax_w_low) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_w - argmax_w_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } else if (bp_dir == 1) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += -1 * (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } + + return weight; +} + +template +__global__ void deformable_im2col_gpu_kernel( + const int n, const T *data_im, const T *data_offset, const int height, + const int width, const int kernel_h, const int kernel_w, const int pad_h, + const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, const int batch_size, + const int num_channels, const int deformable_group, const int height_col, + const int width_col, T *data_col) { + CUDA_1D_KERNEL_LOOP(index, n) { + // index index of output matrix + const int w_col = index % width_col; + const int h_col = (index / width_col) % height_col; + const int b_col = (index / width_col / height_col) % batch_size; + const int c_im = (index / width_col / height_col) / batch_size; + const int c_col = c_im * kernel_h * kernel_w; + + // compute deformable group index + const int deformable_group_index = c_im / channel_per_deformable_group; + + const int h_in = h_col * stride_h - pad_h; + const int w_in = w_col * stride_w - pad_w; + T *data_col_ptr = + data_col + + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; + const T *data_im_ptr = + data_im + (b_col * num_channels + c_im) * height * width; + const T *data_offset_ptr = + data_offset + (b_col * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; + + for (int i = 0; i < kernel_h; ++i) { + for (int j = 0; j < kernel_w; ++j) { + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + + w_col; + const T offset_h = data_offset_ptr[data_offset_h_ptr]; + const T offset_w = data_offset_ptr[data_offset_w_ptr]; + T val = static_cast(0); + const T h_im = h_in + i * dilation_h + offset_h; + const T w_im = w_in + j * dilation_w + offset_w; + if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) + val = deformable_im2col_bilinear(data_im_ptr, width, height, width, + h_im, w_im); + *data_col_ptr = val; + data_col_ptr += batch_size * height_col * width_col; + } + } + } +} + +template +__global__ void deformable_col2im_gpu_kernel( + const int n, const T *data_col, const T *data_offset, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, const int batch_size, + const int deformable_group, const int height_col, const int width_col, + T *grad_im) { + CUDA_1D_KERNEL_LOOP(index, n) { + const int j = (index / width_col / height_col / batch_size) % kernel_w; + const int i = + (index / width_col / height_col / batch_size / kernel_w) % kernel_h; + const int c = + index / width_col / height_col / batch_size / kernel_w / kernel_h; + // compute the start and end of the output + + const int deformable_group_index = c / channel_per_deformable_group; + + int w_out = index % width_col; + int h_out = (index / width_col) % height_col; + int b = (index / width_col / height_col) % batch_size; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + + const T *data_offset_ptr = + data_offset + (b * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; + const T offset_h = data_offset_ptr[data_offset_h_ptr]; + const T offset_w = data_offset_ptr[data_offset_w_ptr]; + const T cur_inv_h_data = h_in + i * dilation_h + offset_h; + const T cur_inv_w_data = w_in + j * dilation_w + offset_w; + + const T cur_top_grad = data_col[index]; + const int cur_h = (int)cur_inv_h_data; + const int cur_w = (int)cur_inv_w_data; + for (int dy = -2; dy <= 2; dy++) { + for (int dx = -2; dx <= 2; dx++) { + if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && + cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && + abs(cur_inv_w_data - (cur_w + dx)) < 1) { + int cur_bottom_grad_pos = + ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; + T weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data, + cur_h + dy, cur_w + dx, height, width); + atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); + } + } + } + } +} + +template +__global__ void deformable_col2im_coord_gpu_kernel( + const int n, const T *data_col, const T *data_im, const T *data_offset, + const int channels, const int height, const int width, const int kernel_h, + const int kernel_w, const int pad_h, const int pad_w, const int stride_h, + const int stride_w, const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, const int batch_size, + const int offset_channels, const int deformable_group, const int height_col, + const int width_col, T *grad_offset) { + CUDA_1D_KERNEL_LOOP(index, n) { + T val = 0; + int w = index % width_col; + int h = (index / width_col) % height_col; + int c = (index / width_col / height_col) % offset_channels; + int b = (index / width_col / height_col) / offset_channels; + // compute the start and end of the output + + const int deformable_group_index = c / (2 * kernel_h * kernel_w); + const int col_step = kernel_h * kernel_w; + int cnt = 0; + const T *data_col_ptr = data_col + deformable_group_index * + channel_per_deformable_group * + batch_size * width_col * height_col; + const T *data_im_ptr = + data_im + (b * deformable_group + deformable_group_index) * + channel_per_deformable_group / kernel_h / kernel_w * + height * width; + const T *data_offset_ptr = + data_offset + (b * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; + + const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; + + for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; + col_c += col_step) { + const int col_pos = + (((col_c * batch_size + b) * height_col) + h) * width_col + w; + const int bp_dir = offset_c % 2; + + int j = (col_pos / width_col / height_col / batch_size) % kernel_w; + int i = + (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; + int w_out = col_pos % width_col; + int h_out = (col_pos / width_col) % height_col; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + const int data_offset_h_ptr = + (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); + const int data_offset_w_ptr = + (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + + w_out); + const T offset_h = data_offset_ptr[data_offset_h_ptr]; + const T offset_w = data_offset_ptr[data_offset_w_ptr]; + T inv_h = h_in + i * dilation_h + offset_h; + T inv_w = w_in + j * dilation_w + offset_w; + if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) + inv_h = inv_w = -2; + const T weight = get_coordinate_weight(inv_h, inv_w, height, width, + data_im_ptr + cnt * height * width, + width, bp_dir); + val += weight * data_col_ptr[col_pos]; + cnt += 1; + } + + grad_offset[index] = val; + } +} + +#endif // DEFORM_CONV_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/deform_roi_pool_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/deform_roi_pool_cuda_kernel.cuh new file mode 100644 index 0000000..e6b59b3 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/deform_roi_pool_cuda_kernel.cuh @@ -0,0 +1,183 @@ +#ifndef DEFORM_ROI_POOL_CUDA_KERNEL_CUH +#define DEFORM_ROI_POOL_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +template +__global__ void deform_roi_pool_forward_cuda_kernel( + const int nthreads, const T* input, const T* rois, const T* offset, + T* output, const int pooled_height, const int pooled_width, + const T spatial_scale, const int sampling_ratio, const T gamma, + const int channels, const int height, const int width) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_rois = rois + n * 5; + int roi_batch_ind = offset_rois[0]; + + // Do not using rounding; this implementation detail is critical + T roi_start_w = offset_rois[1] * spatial_scale - 0.5; + T roi_start_h = offset_rois[2] * spatial_scale - 0.5; + T roi_end_w = offset_rois[3] * spatial_scale - 0.5; + T roi_end_h = offset_rois[4] * spatial_scale - 0.5; + + T roi_width = roi_end_w - roi_start_w; + T roi_height = roi_end_h - roi_start_h; + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + const T* offset_input = + input + (roi_batch_ind * channels + c) * height * width; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = + (sampling_ratio > 0) + ? sampling_ratio + : static_cast(ceil(roi_height / pooled_height)); + int roi_bin_grid_w = (sampling_ratio > 0) + ? sampling_ratio + : static_cast(ceil(roi_width / pooled_width)); + + // Compute roi offset + if (offset != NULL) { + const T* offset_cur_w = offset + n * pooled_width * pooled_height * 2 + + ph * pooled_width + pw; + T offset_roi_w = gamma * roi_width * offset_cur_w[0]; + T offset_roi_h = + gamma * roi_height * offset_cur_w[pooled_width * pooled_height]; + roi_start_w += offset_roi_w; + roi_start_h += offset_roi_h; + } + + // We do average pooling inside a bin + const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); + T output_val = 0.; + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + const T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + T val = bilinear_interpolate(offset_input, height, width, y, x, index); + output_val += val; + } + } + output[index] = output_val / count; + } +} + +template +__global__ void deform_roi_pool_backward_cuda_kernel( + const int nthreads, const T* grad_output, const T* input, const T* rois, + const T* offset, T* grad_input, T* grad_offset, const int pooled_height, + const int pooled_width, const T spatial_scale, const int sampling_ratio, + const T gamma, const int channels, const int height, const int width) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_rois = rois + n * 5; + int roi_batch_ind = offset_rois[0]; + const T* offset_input = + input + ((roi_batch_ind * channels + c) * height * width); + T* offset_grad_input = + grad_input + ((roi_batch_ind * channels + c) * height * width); + + // Do not using rounding; this implementation detail is critical + T roi_start_w = offset_rois[1] * spatial_scale - 0.5; + T roi_start_h = offset_rois[2] * spatial_scale - 0.5; + T roi_end_w = offset_rois[3] * spatial_scale - 0.5; + T roi_end_h = offset_rois[4] * spatial_scale - 0.5; + + T roi_width = roi_end_w - roi_start_w; + T roi_height = roi_end_h - roi_start_h; + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = + (sampling_ratio > 0) + ? sampling_ratio + : static_cast(ceil(roi_height / pooled_height)); + int roi_bin_grid_w = (sampling_ratio > 0) + ? sampling_ratio + : static_cast(ceil(roi_width / pooled_width)); + + // Compute roi offset + if (offset != NULL) { + const T* offset_cur_w = offset + n * pooled_width * pooled_height * 2 + + ph * pooled_width + pw; + T offset_roi_w = gamma * roi_width * offset_cur_w[0]; + T offset_roi_h = + gamma * roi_height * offset_cur_w[pooled_width * pooled_height]; + roi_start_w += offset_roi_w; + roi_start_h += offset_roi_h; + } + + // We do average (integral) pooling inside a bin + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + const T grad_output_this_bin = grad_output[index] / count; + + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + const T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, + x_low, x_high, y_low, y_high, index); + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + atomicAdd(offset_grad_input + y_low * width + x_low, + grad_output_this_bin * w1); + atomicAdd(offset_grad_input + y_low * width + x_high, + grad_output_this_bin * w2); + atomicAdd(offset_grad_input + y_high * width + x_low, + grad_output_this_bin * w3); + atomicAdd(offset_grad_input + y_high * width + x_high, + grad_output_this_bin * w4); + if (offset != NULL) { + T input_00 = offset_input[y_low * width + x_low]; + T input_10 = offset_input[y_low * width + x_high]; + T input_01 = offset_input[y_high * width + x_low]; + T input_11 = offset_input[y_high * width + x_high]; + T ogx = gamma * roi_width * grad_output_this_bin * + (input_11 * (y - y_low) + input_10 * (y_high - y) + + input_01 * (y_low - y) + input_00 * (y - y_high)); + T ogy = gamma * roi_height * grad_output_this_bin * + (input_11 * (x - x_low) + input_01 * (x_high - x) + + input_10 * (x_low - x) + input_00 * (x - x_high)); + atomicAdd(grad_offset + n * pooled_width * pooled_height * 2 + + ph * pooled_width + pw, + ogx); + atomicAdd(grad_offset + n * pooled_width * pooled_height * 2 + + pooled_width * pooled_height + ph * pooled_width + pw, + ogy); + } + } + } + } + } +} + +#endif // DEFORM_ROI_POOL_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/masked_conv2d_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/masked_conv2d_cuda_kernel.cuh new file mode 100644 index 0000000..4be8329 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/masked_conv2d_cuda_kernel.cuh @@ -0,0 +1,61 @@ +#ifndef MASKED_CONV2D_CUDA_KERNEL_CUH +#define MASKED_CONV2D_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +template +__global__ void MaskedIm2colForward(const int n, const scalar_t *data_im, + const int height, const int width, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int64_t *mask_h_idx, + const int64_t *mask_w_idx, + const int mask_cnt, scalar_t *data_col) { + // mask_cnt * channels + CUDA_1D_KERNEL_LOOP(index, n) { + const int m_index = index % mask_cnt; + const int h_col = mask_h_idx[m_index]; + const int w_col = mask_w_idx[m_index]; + const int c_im = index / mask_cnt; + const int c_col = c_im * kernel_h * kernel_w; + const int h_offset = h_col - pad_h; + const int w_offset = w_col - pad_w; + scalar_t *data_col_ptr = data_col + c_col * mask_cnt + m_index; + for (int i = 0; i < kernel_h; ++i) { + int h_im = h_offset + i; + for (int j = 0; j < kernel_w; ++j) { + int w_im = w_offset + j; + if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) { + *data_col_ptr = + (scalar_t)data_im[(c_im * height + h_im) * width + w_im]; + } else { + *data_col_ptr = 0.0; + } + data_col_ptr += mask_cnt; + } + } + } +} + +template +__global__ void MaskedCol2imForward(const int n, const scalar_t *data_col, + const int height, const int width, + const int channels, + const int64_t *mask_h_idx, + const int64_t *mask_w_idx, + const int mask_cnt, scalar_t *data_im) { + CUDA_1D_KERNEL_LOOP(index, n) { + const int m_index = index % mask_cnt; + const int h_im = mask_h_idx[m_index]; + const int w_im = mask_w_idx[m_index]; + const int c_im = index / mask_cnt; + // compute the start and end of the output + data_im[(c_im * height + h_im) * width + w_im] = data_col[index]; + } +} + +#endif // MASKED_CONV2D_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/modulated_deform_conv_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/modulated_deform_conv_cuda_kernel.cuh new file mode 100644 index 0000000..9953a09 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/modulated_deform_conv_cuda_kernel.cuh @@ -0,0 +1,394 @@ +/*! + ******************* BEGIN Caffe Copyright Notice and Disclaimer + ***************** + * + * COPYRIGHT + * + * All contributions by the University of California: + * Copyright (c) 2014-2017 The Regents of the University of California (Regents) + * All rights reserved. + * + * All other contributions: + * Copyright (c) 2014-2017, the respective contributors + * All rights reserved. + * + * Caffe uses a shared copyright model: each contributor holds copyright over + * their contributions to Caffe. The project versioning records all such + * contribution and copyright details. If a contributor wants to further mark + * their specific copyright on a particular contribution, they should indicate + * their copyright solely in the commit message of the change when it is + * committed. + * + * LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + *this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * CONTRIBUTION AGREEMENT + * + * By contributing to the BVLC/caffe repository through pull-request, comment, + * or otherwise, the contributor releases their content to the + * license and copyright terms herein. + * + ***************** END Caffe Copyright Notice and Disclaimer + ********************* + * + * Copyright (c) 2018 Microsoft + * Licensed under The MIT License [see LICENSE for details] + * \file modulated_deformable_im2col.cuh + * \brief Function definitions of converting an image to + * column matrix based on kernel, padding, dilation, and offset. + * These functions are mainly used in deformable convolution operators. + * \ref: https://arxiv.org/abs/1703.06211 + * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng + */ + +// modified from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu + +#ifndef MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH +#define MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +template +__device__ T dmcn_im2col_bilinear(const T *input, const int data_width, + const int height, const int width, T h, T w) { + int h_low = floor(h); + int w_low = floor(w); + int h_high = h_low + 1; + int w_high = w_low + 1; + + T lh = h - h_low; + T lw = w - w_low; + T hh = 1 - lh, hw = 1 - lw; + + T v1 = 0; + if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low]; + T v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + v2 = input[h_low * data_width + w_high]; + T v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + v3 = input[h_high * data_width + w_low]; + T v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + v4 = input[h_high * data_width + w_high]; + + T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + +template +__device__ T dmcn_get_gradient_weight(T argmax_h, T argmax_w, const int h, + const int w, const int height, + const int width) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + T weight = 0; + if (h == argmax_h_low && w == argmax_w_low) + weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); + if (h == argmax_h_low && w == argmax_w_high) + weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); + if (h == argmax_h_high && w == argmax_w_low) + weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); + if (h == argmax_h_high && w == argmax_w_high) + weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); + return weight; +} + +template +__device__ T dmcn_get_coordinate_weight(T argmax_h, T argmax_w, + const int height, const int width, + const T *im_data, const int data_width, + const int bp_dir) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + T weight = 0; + + if (bp_dir == 0) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += -1 * (argmax_w - argmax_w_low) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_w - argmax_w_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } else if (bp_dir == 1) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += -1 * (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } + + return weight; +} + +template +__global__ void modulated_deformable_im2col_gpu_kernel( + const int n, const T *data_im, const T *data_offset, const T *data_mask, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, const int batch_size, + const int num_channels, const int deformable_group, const int height_col, + const int width_col, T *data_col) { + CUDA_1D_KERNEL_LOOP(index, n) { + // index index of output matrix + const int w_col = index % width_col; + const int h_col = (index / width_col) % height_col; + const int b_col = (index / width_col / height_col) % batch_size; + const int c_im = (index / width_col / height_col) / batch_size; + const int c_col = c_im * kernel_h * kernel_w; + + // compute deformable group index + const int deformable_group_index = c_im / channel_per_deformable_group; + + const int h_in = h_col * stride_h - pad_h; + const int w_in = w_col * stride_w - pad_w; + + T *data_col_ptr = + data_col + + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; + const T *data_im_ptr = + data_im + (b_col * num_channels + c_im) * height * width; + const T *data_offset_ptr = + data_offset + (b_col * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; + + const T *data_mask_ptr = + data_mask + (b_col * deformable_group + deformable_group_index) * + kernel_h * kernel_w * height_col * width_col; + + for (int i = 0; i < kernel_h; ++i) { + for (int j = 0; j < kernel_w; ++j) { + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + + w_col; + const int data_mask_hw_ptr = + ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; + const T offset_h = data_offset_ptr[data_offset_h_ptr]; + const T offset_w = data_offset_ptr[data_offset_w_ptr]; + const T mask = data_mask_ptr[data_mask_hw_ptr]; + T val = static_cast(0); + const T h_im = h_in + i * dilation_h + offset_h; + const T w_im = w_in + j * dilation_w + offset_w; + if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) + val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, + w_im); + *data_col_ptr = val * mask; + data_col_ptr += batch_size * height_col * width_col; + } + } + } +} + +template +__global__ void modulated_deformable_col2im_gpu_kernel( + const int n, const T *data_col, const T *data_offset, const T *data_mask, + const int channels, const int height, const int width, const int kernel_h, + const int kernel_w, const int pad_h, const int pad_w, const int stride_h, + const int stride_w, const int dilation_h, const int dilation_w, + const int channel_per_deformable_group, const int batch_size, + const int deformable_group, const int height_col, const int width_col, + T *grad_im) { + CUDA_1D_KERNEL_LOOP(index, n) { + const int j = (index / width_col / height_col / batch_size) % kernel_w; + const int i = + (index / width_col / height_col / batch_size / kernel_w) % kernel_h; + const int c = + index / width_col / height_col / batch_size / kernel_w / kernel_h; + // compute the start and end of the output + + const int deformable_group_index = c / channel_per_deformable_group; + + int w_out = index % width_col; + int h_out = (index / width_col) % height_col; + int b = (index / width_col / height_col) % batch_size; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + + const T *data_offset_ptr = + data_offset + (b * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; + const T *data_mask_ptr = + data_mask + (b * deformable_group + deformable_group_index) * kernel_h * + kernel_w * height_col * width_col; + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; + const int data_mask_hw_ptr = + ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; + const T offset_h = data_offset_ptr[data_offset_h_ptr]; + const T offset_w = data_offset_ptr[data_offset_w_ptr]; + const T mask = data_mask_ptr[data_mask_hw_ptr]; + const T cur_inv_h_data = h_in + i * dilation_h + offset_h; + const T cur_inv_w_data = w_in + j * dilation_w + offset_w; + + const T cur_top_grad = data_col[index] * mask; + const int cur_h = (int)cur_inv_h_data; + const int cur_w = (int)cur_inv_w_data; + for (int dy = -2; dy <= 2; dy++) { + for (int dx = -2; dx <= 2; dx++) { + if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && + cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && + abs(cur_inv_w_data - (cur_w + dx)) < 1) { + int cur_bottom_grad_pos = + ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; + T weight = + dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, + cur_h + dy, cur_w + dx, height, width); + atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); + } + } + } + } +} + +template +__global__ void modulated_deformable_col2im_coord_gpu_kernel( + const int n, const T *data_col, const T *data_im, const T *data_offset, + const T *data_mask, const int channels, const int height, const int width, + const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, const int dilation_h, + const int dilation_w, const int channel_per_deformable_group, + const int batch_size, const int offset_channels, const int deformable_group, + const int height_col, const int width_col, T *grad_offset, T *grad_mask) { + CUDA_1D_KERNEL_LOOP(index, n) { + T val = 0, mval = 0; + int w = index % width_col; + int h = (index / width_col) % height_col; + int c = (index / width_col / height_col) % offset_channels; + int b = (index / width_col / height_col) / offset_channels; + // compute the start and end of the output + + const int deformable_group_index = c / (2 * kernel_h * kernel_w); + const int col_step = kernel_h * kernel_w; + int cnt = 0; + const T *data_col_ptr = data_col + deformable_group_index * + channel_per_deformable_group * + batch_size * width_col * height_col; + const T *data_im_ptr = + data_im + (b * deformable_group + deformable_group_index) * + channel_per_deformable_group / kernel_h / kernel_w * + height * width; + const T *data_offset_ptr = + data_offset + (b * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; + const T *data_mask_ptr = + data_mask + (b * deformable_group + deformable_group_index) * kernel_h * + kernel_w * height_col * width_col; + + const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; + + for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; + col_c += col_step) { + const int col_pos = + (((col_c * batch_size + b) * height_col) + h) * width_col + w; + const int bp_dir = offset_c % 2; + + int j = (col_pos / width_col / height_col / batch_size) % kernel_w; + int i = + (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; + int w_out = col_pos % width_col; + int h_out = (col_pos / width_col) % height_col; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + const int data_offset_h_ptr = + (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); + const int data_offset_w_ptr = + (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + + w_out); + const int data_mask_hw_ptr = + (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); + const T offset_h = data_offset_ptr[data_offset_h_ptr]; + const T offset_w = data_offset_ptr[data_offset_w_ptr]; + const T mask = data_mask_ptr[data_mask_hw_ptr]; + T inv_h = h_in + i * dilation_h + offset_h; + T inv_w = w_in + j * dilation_w + offset_w; + if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) + inv_h = inv_w = -2; + else + mval += data_col_ptr[col_pos] * + dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, + height, width, inv_h, inv_w); + const T weight = dmcn_get_coordinate_weight( + inv_h, inv_w, height, width, data_im_ptr + cnt * height * width, + width, bp_dir); + val += weight * data_col_ptr[col_pos] * mask; + cnt += 1; + } + // KERNEL_ASSIGN(grad_offset[index], offset_req, val); + grad_offset[index] = val; + if (offset_c % 2 == 0) + // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + + // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * + // height_col + h) * width_col + w], mask_req, mval); + grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * + kernel_w + + offset_c / 2) * + height_col + + h) * + width_col + + w] = mval; + } +} + +#endif // MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/nms_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/nms_cuda_kernel.cuh new file mode 100644 index 0000000..bde6334 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/nms_cuda_kernel.cuh @@ -0,0 +1,69 @@ +#ifndef NMS_CUDA_KERNEL_CUH +#define NMS_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) +int const threadsPerBlock = sizeof(unsigned long long int) * 8; + +__device__ inline bool devIoU(float const *const a, float const *const b, + const int offset, const float threshold) { + float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); + float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); + float width = fmaxf(right - left + offset, 0.f), + height = fmaxf(bottom - top + offset, 0.f); + float interS = width * height; + float Sa = (a[2] - a[0] + offset) * (a[3] - a[1] + offset); + float Sb = (b[2] - b[0] + offset) * (b[3] - b[1] + offset); + return interS > threshold * (Sa + Sb - interS); +} + +__global__ void nms_cuda(const int n_boxes, const float iou_threshold, + const int offset, const float *dev_boxes, + unsigned long long *dev_mask) { + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + const int tid = threadIdx.x; + + if (row_start > col_start) return; + + const int row_size = + fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); + const int col_size = + fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); + + __shared__ float block_boxes[threadsPerBlock * 4]; + if (tid < col_size) { + block_boxes[tid * 4 + 0] = + dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 0]; + block_boxes[tid * 4 + 1] = + dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 1]; + block_boxes[tid * 4 + 2] = + dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 2]; + block_boxes[tid * 4 + 3] = + dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 3]; + } + __syncthreads(); + + if (tid < row_size) { + const int cur_box_idx = threadsPerBlock * row_start + tid; + const float *cur_box = dev_boxes + cur_box_idx * 4; + int i = 0; + unsigned long long int t = 0; + int start = 0; + if (row_start == col_start) { + start = tid + 1; + } + for (i = start; i < col_size; i++) { + if (devIoU(cur_box, block_boxes + i * 4, offset, iou_threshold)) { + t |= 1ULL << i; + } + } + dev_mask[cur_box_idx * gridDim.y + col_start] = t; + } +} +#endif // NMS_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps.cpp b/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps.cpp new file mode 100644 index 0000000..aff8ea1 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps.cpp @@ -0,0 +1,36 @@ +#include "parrots_cpp_helper.hpp" + +void BBoxOverlapsCUDAKernelLauncher(const DArrayLite bboxes1, + const DArrayLite bboxes2, DArrayLite ious, + const int mode, const bool aligned, + const int offset, cudaStream_t stream); + +void bbox_overlaps_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int mode, offset; + bool aligned; + SSAttrs(attr) + .get("mode", mode) + .get("aligned", aligned) + .get("offset", offset) + .done(); + + const auto& bboxes1 = ins[0]; + const auto& bboxes2 = ins[1]; + + auto& ious = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + BBoxOverlapsCUDAKernelLauncher(bboxes1, bboxes2, ious, mode, aligned, offset, + stream); +} + +PARROTS_EXTENSION_REGISTER(bbox_overlaps) + .attr("mode") + .attr("aligned") + .attr("offset") + .input(2) + .output(1) + .apply(bbox_overlaps_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps_cuda.cu new file mode 100644 index 0000000..8c7abb6 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/bbox_overlaps_cuda.cu @@ -0,0 +1,22 @@ +#include "bbox_overlaps_cuda_kernel.cuh" +#include "parrots_cuda_helper.hpp" + +void BBoxOverlapsCUDAKernelLauncher(const DArrayLite bboxes1, + const DArrayLite bboxes2, DArrayLite ious, + const int mode, const bool aligned, + const int offset, cudaStream_t stream) { + int output_size = ious.size(); + int num_bbox1 = bboxes1.dim(0); + int num_bbox2 = bboxes2.dim(0); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + bboxes1.elemType().prim(), ([&] { + bbox_overlaps_cuda_kernel + <<>>( + bboxes1.ptr(), bboxes2.ptr(), + ious.ptr(), num_bbox1, num_bbox2, mode, aligned, + offset); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/carafe.cpp b/mmcv/mmcv/ops/csrc/parrots/carafe.cpp new file mode 100644 index 0000000..0a4a7c7 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/carafe.cpp @@ -0,0 +1,84 @@ +#include "parrots_cpp_helper.hpp" + +void CARAFEForwardCUDAKernelLauncher( + const DArrayLite features, const DArrayLite masks, DArrayLite rfeatures, + DArrayLite routput, DArrayLite rmasks, DArrayLite output, + const int kernel_size, const int group_size, const int scale_factor, + cudaStream_t stream); + +void CARAFEBackwardCUDAKernelLauncher( + const DArrayLite top_grad, const DArrayLite rfeatures, + const DArrayLite masks, DArrayLite rtop_grad, DArrayLite rbottom_grad_hs, + DArrayLite rbottom_grad, DArrayLite rmask_grad, DArrayLite bottom_grad, + DArrayLite mask_grad, const int kernel_size, const int group_size, + const int scale_factor, cudaStream_t stream); + +void carafe_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int kernel_size, group_size, scale_factor; + SSAttrs(attr) + .get("kernel_size", kernel_size) + .get("group_size", group_size) + .get("scale_factor", scale_factor) + .done(); + + const auto& features = ins[0]; + const auto& masks = ins[1]; + + auto& rfeatures = outs[0]; + auto& routput = outs[1]; + auto& rmasks = outs[2]; + auto& output = outs[3]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + CARAFEForwardCUDAKernelLauncher(features, masks, rfeatures, routput, rmasks, + output, kernel_size, group_size, scale_factor, + stream); +} + +void carafe_backward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int kernel_size, group_size, scale_factor; + SSAttrs(attr) + .get("kernel_size", kernel_size) + .get("group_size", group_size) + .get("scale_factor", scale_factor) + .done(); + + const auto& top_grad = ins[0]; + const auto& rfeatures = ins[1]; + const auto& masks = ins[2]; + + auto& rtop_grad = outs[0]; + auto rbottom_grad_hs = outs[1]; + auto& rbottom_grad = outs[2]; + auto& rmask_grad = outs[3]; + auto& bottom_grad = outs[4]; + auto& mask_grad = outs[5]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + CARAFEBackwardCUDAKernelLauncher(top_grad, rfeatures, masks, rtop_grad, + rbottom_grad_hs, rbottom_grad, rmask_grad, + bottom_grad, mask_grad, kernel_size, + group_size, scale_factor, stream); +} + +PARROTS_EXTENSION_REGISTER(carafe_forward) + .attr("kernel_size") + .attr("group_size") + .attr("scale_factor") + .input(2) + .output(4) + .apply(carafe_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(carafe_backward) + .attr("kernel_size") + .attr("group_size") + .attr("scale_factor") + .input(3) + .output(6) + .apply(carafe_backward_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/carafe_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/carafe_cuda.cu new file mode 100644 index 0000000..2a95e5f --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/carafe_cuda.cu @@ -0,0 +1,144 @@ +#include "carafe_cuda_kernel.cuh" +#include "parrots_cuda_helper.hpp" + +void CARAFEForwardCUDAKernelLauncher( + const DArrayLite features, const DArrayLite masks, DArrayLite rfeatures, + DArrayLite routput, DArrayLite rmasks, DArrayLite output, + const int kernel_size, const int group_size, const int scale_factor, + cudaStream_t stream) { + const int batch_size = output.dim(0); + const int channels = output.dim(1); + const int output_height = output.dim(2); + const int output_width = output.dim(3); + + const int input_height = features.dim(2); + const int input_width = features.dim(3); + + const int mask_channels = masks.dim(1); + + // one warp per pixel + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + features.elemType().prim(), ([&] { + const int dh = divideUP(channels, kTileDim); + const int dw = divideUP(input_height * input_width, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, channels, input_height * input_width, dh, dw, + features.ptr(), rfeatures.ptr()); + })); + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + features.elemType().prim(), ([&] { + const int dh = divideUP(mask_channels, kTileDim); + const int dw = divideUP(output_height * output_width, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, mask_channels, output_height * output_width, dh, dw, + masks.ptr(), rmasks.ptr()); + })); + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + features.elemType().prim(), ([&] { + const int num_kernels = + batch_size * output_height * output_width * THREADS_PER_PIXEL; + + CARAFEForward<<>>( + num_kernels, rfeatures.ptr(), rmasks.ptr(), + kernel_size, group_size, scale_factor, channels, input_height, + input_width, output_height, output_width, mask_channels, + routput.ptr()); + })); + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + features.elemType().prim(), ([&] { + const int dh = divideUP(output_height * output_width, kTileDim); + const int dw = divideUP(channels, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, output_height * output_width, channels, dh, dw, + routput.ptr(), output.ptr()); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void CARAFEBackwardCUDAKernelLauncher( + const DArrayLite top_grad, const DArrayLite rfeatures, + const DArrayLite masks, DArrayLite rtop_grad, DArrayLite rbottom_grad_hs, + DArrayLite rbottom_grad, DArrayLite rmask_grad, DArrayLite bottom_grad, + DArrayLite mask_grad, const int kernel_size, const int group_size, + const int scale_factor, cudaStream_t stream) { + const int batch_size = top_grad.dim(0); + const int channels = top_grad.dim(1); + const int output_height = top_grad.dim(2); + const int output_width = top_grad.dim(3); + + const int input_height = bottom_grad.dim(2); + const int input_width = bottom_grad.dim(3); + + const int mask_channels = masks.dim(1); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.elemType().prim(), ([&] { + const int dh = divideUP(channels, kTileDim); + const int dw = divideUP(output_height * output_width, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, channels, output_height * output_width, dh, dw, + top_grad.ptr(), rtop_grad.ptr()); + })); + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.elemType().prim(), ([&] { + const int num_kernels = + batch_size * output_height * output_width * THREADS_PER_PIXEL; + + CARAFEBackward_Feature + <<>>(num_kernels, rtop_grad.ptr(), + masks.ptr(), kernel_size, group_size, + scale_factor, channels, input_height, input_width, + output_height, output_width, mask_channels, + rbottom_grad_hs.ptr()); + })); + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.elemType().prim(), ([&] { + const int num_kernels = + batch_size * input_height * input_width * THREADS_PER_PIXEL; + + FeatureSum<<>>( + num_kernels, rbottom_grad_hs.ptr(), scale_factor, + channels, input_height, input_width, rbottom_grad.ptr()); + })); + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.elemType().prim(), ([&] { + const int dh = divideUP(input_height * input_width, kTileDim); + const int dw = divideUP(channels, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, input_height * input_width, channels, dh, dw, + rbottom_grad.ptr(), bottom_grad.ptr()); + })); + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.elemType().prim(), ([&] { + const int num_kernels = batch_size * output_height * output_width * + mask_channels * WARP_SIZE; + + CARAFEBackward_Mask + <<>>(num_kernels, rtop_grad.ptr(), + rfeatures.ptr(), kernel_size, group_size, + scale_factor, channels, input_height, input_width, + output_height, output_width, mask_channels, + rmask_grad.ptr()); + })); + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.elemType().prim(), ([&] { + const int dh = divideUP(output_height * output_width, kTileDim); + const int dw = divideUP(mask_channels, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, output_height * output_width, mask_channels, dh, dw, + rmask_grad.ptr(), mask_grad.ptr()); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/carafe_naive.cpp b/mmcv/mmcv/ops/csrc/parrots/carafe_naive.cpp new file mode 100644 index 0000000..b1a9047 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/carafe_naive.cpp @@ -0,0 +1,73 @@ +#include "parrots_cpp_helper.hpp" + +void CARAFENAIVEForwardCUDAKernelLauncher( + const DArrayLite features, const DArrayLite masks, DArrayLite output, + const int kernel_size, const int group_size, const int scale_factor, + cudaStream_t stream); + +void CARAFENAIVEBackwardCUDAKernelLauncher( + const DArrayLite top_grad, const DArrayLite features, + const DArrayLite masks, DArrayLite bottom_grad, DArrayLite mask_grad, + const int kernel_size, const int group_size, const int scale_factor, + cudaStream_t stream); + +void carafe_naive_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int kernel_size, group_size, scale_factor; + SSAttrs(attr) + .get("kernel_size", kernel_size) + .get("group_size", group_size) + .get("scale_factor", scale_factor) + .done(); + + const auto& features = ins[0]; + const auto& masks = ins[1]; + + auto& output = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + CARAFENAIVEForwardCUDAKernelLauncher(features, masks, output, kernel_size, + group_size, scale_factor, stream); +} + +void carafe_naive_backward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int kernel_size, group_size, scale_factor; + SSAttrs(attr) + .get("kernel_size", kernel_size) + .get("group_size", group_size) + .get("scale_factor", scale_factor) + .done(); + + const auto& top_grad = ins[0]; + const auto& features = ins[1]; + const auto& masks = ins[2]; + + auto& bottom_grad = outs[0]; + auto& mask_grad = outs[1]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + CARAFENAIVEBackwardCUDAKernelLauncher(top_grad, features, masks, bottom_grad, + mask_grad, kernel_size, group_size, + scale_factor, stream); +} + +PARROTS_EXTENSION_REGISTER(carafe_naive_forward) + .attr("kernel_size") + .attr("group_size") + .attr("scale_factor") + .input(2) + .output(1) + .apply(carafe_naive_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(carafe_naive_backward) + .attr("kernel_size") + .attr("group_size") + .attr("scale_factor") + .input(3) + .output(2) + .apply(carafe_naive_backward_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/carafe_naive_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/carafe_naive_cuda.cu new file mode 100644 index 0000000..2592ece --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/carafe_naive_cuda.cu @@ -0,0 +1,46 @@ +#include "carafe_naive_cuda_kernel.cuh" +#include "parrots_cuda_helper.hpp" + +void CARAFENAIVEForwardCUDAKernelLauncher( + const DArrayLite features, const DArrayLite masks, DArrayLite output, + const int kernel_size, const int group_size, const int scale_factor, + cudaStream_t stream) { + int output_size = output.size(); + int channels = output.dim(1); + int height = output.dim(2); + int width = output.dim(3); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + features.elemType().prim(), ([&] { + carafe_naive_forward_cuda_kernel + <<>>( + output_size, features.ptr(), masks.ptr(), + output.ptr(), kernel_size, group_size, scale_factor, + channels, height, width); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void CARAFENAIVEBackwardCUDAKernelLauncher( + const DArrayLite top_grad, const DArrayLite features, + const DArrayLite masks, DArrayLite bottom_grad, DArrayLite mask_grad, + const int kernel_size, const int group_size, const int scale_factor, + cudaStream_t stream) { + int output_size = top_grad.size(); + int channels = top_grad.dim(1); + int height = top_grad.dim(2); + int width = top_grad.dim(3); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + features.elemType().prim(), ([&] { + carafe_naive_backward_cuda_kernel + <<>>( + output_size, top_grad.ptr(), features.ptr(), + masks.ptr(), bottom_grad.ptr(), + mask_grad.ptr(), kernel_size, group_size, + scale_factor, channels, height, width); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/cc_attention.cpp b/mmcv/mmcv/ops/csrc/parrots/cc_attention.cpp new file mode 100644 index 0000000..efc7c05 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/cc_attention.cpp @@ -0,0 +1,88 @@ +#include "parrots_cpp_helper.hpp" + +void CAForwardCUDAKernelLauncher(const DArrayLite t, const DArrayLite f, + DArrayLite weight, CudaContext &ctx, + cudaStream_t stream); + +void CABackwardCUDAKernelLauncher(const DArrayLite dw, const DArrayLite t, + const DArrayLite f, DArrayLite dt, + DArrayLite df, CudaContext &ctx, + cudaStream_t stream); + +void CAMapForwardCUDAKernelLauncher(const DArrayLite weight, const DArrayLite g, + DArrayLite out, CudaContext &ctx, + cudaStream_t stream); + +void CAMapBackwardCUDAKernelLauncher(const DArrayLite dout, + const DArrayLite weight, + const DArrayLite g, DArrayLite dw, + DArrayLite dg, CudaContext &ctx, + cudaStream_t stream); + +void ca_forward_cuda(CudaContext &ctx, const SSElement &attr, + const OperatorBase::in_list_t &ins, + OperatorBase::out_list_t &outs) { + const auto &t = ins[0]; + const auto &f = ins[1]; + auto &weight = outs[0]; + cudaStream_t stream = getStreamNative(ctx.getStream()); + CAForwardCUDAKernelLauncher(t, f, weight, ctx, stream); +} + +void ca_backward_cuda(CudaContext &ctx, const SSElement &attr, + const OperatorBase::in_list_t &ins, + OperatorBase::out_list_t &outs) { + const auto &dw = ins[0]; + const auto &t = ins[1]; + const auto &f = ins[2]; + auto &dt = outs[0]; + auto &df = outs[1]; + cudaStream_t stream = getStreamNative(ctx.getStream()); + CABackwardCUDAKernelLauncher(dw, t, f, dt, df, ctx, stream); +} + +void ca_map_forward_cuda(CudaContext &ctx, const SSElement &attr, + const OperatorBase::in_list_t &ins, + OperatorBase::out_list_t &outs) { + const auto &weight = ins[0]; + const auto &g = ins[1]; + auto &out = outs[0]; + cudaStream_t stream = getStreamNative(ctx.getStream()); + CAMapForwardCUDAKernelLauncher(weight, g, out, ctx, stream); +} + +void ca_map_backward_cuda(CudaContext &ctx, const SSElement &attr, + const OperatorBase::in_list_t &ins, + OperatorBase::out_list_t &outs) { + const auto &dout = ins[0]; + const auto &weight = ins[1]; + const auto &g = ins[2]; + auto &dw = outs[0]; + auto &dg = outs[1]; + cudaStream_t stream = getStreamNative(ctx.getStream()); + CAMapBackwardCUDAKernelLauncher(dout, weight, g, dw, dg, ctx, stream); +} + +PARROTS_EXTENSION_REGISTER(ca_forward) + .input(2) + .output(1) + .apply(ca_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(ca_backward) + .input(3) + .output(2) + .apply(ca_backward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(ca_map_forward) + .input(2) + .output(1) + .apply(ca_map_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(ca_map_backward) + .input(3) + .output(2) + .apply(ca_map_backward_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/cc_attention_cuda_kernel.cu b/mmcv/mmcv/ops/csrc/parrots/cc_attention_cuda_kernel.cu new file mode 100644 index 0000000..7656f74 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/cc_attention_cuda_kernel.cu @@ -0,0 +1,109 @@ +#include "cc_attention_cuda_kernel.cuh" +#include "parrots_cuda_helper.hpp" + +void CAForwardCUDAKernelLauncher(const DArrayLite t, const DArrayLite f, + DArrayLite weight, CudaContext &ctx, + cudaStream_t stream) { + auto n = t.dim(0); + auto c = t.dim(1); + auto h = t.dim(2); + auto w = t.dim(3); + + // Run kernel + dim3 threads(32, 32); + int d1 = (w + threads.x - 1) / threads.x; + int d2 = (h + threads.y - 1) / threads.y; + int d3 = h + w; + dim3 blocks(d1, d2, d3); + + PARROTS_DISPATCH_FLOATING_TYPES(t.elemType().prim(), [&] { + ca_forward_kernel + <<>>(t.ptr(), f.ptr(), + weight.ptr(), n, c, h, w); + }); + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void CABackwardCUDAKernelLauncher(const DArrayLite dw, const DArrayLite t, + const DArrayLite f, DArrayLite dt, + DArrayLite df, CudaContext &ctx, + cudaStream_t stream) { + auto n = t.dim(0); + auto c = t.dim(1); + auto h = t.dim(2); + auto w = t.dim(3); + + // Run kernel + dim3 threads(32, 32); + int d1 = (w + threads.x - 1) / threads.x; + int d2 = (h + threads.y - 1) / threads.y; + int d3 = c; + dim3 blocks(d1, d2, d3); + + PARROTS_DISPATCH_FLOATING_TYPES(t.elemType().prim(), [&] { + ca_backward_kernel_t<<>>( + dw.ptr(), t.ptr(), f.ptr(), + dt.ptr(), n, c, h, w); + }); + + PARROTS_DISPATCH_FLOATING_TYPES(f.elemType().prim(), [&] { + ca_backward_kernel_f<<>>( + dw.ptr(), t.ptr(), f.ptr(), + df.ptr(), n, c, h, w); + }); + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void CAMapForwardCUDAKernelLauncher(const DArrayLite weight, const DArrayLite g, + DArrayLite out, CudaContext &ctx, + cudaStream_t stream) { + auto n = g.dim(0); + auto c = g.dim(1); + auto h = g.dim(2); + auto w = g.dim(3); + + // Run kernel + dim3 threads(32, 32); + int d1 = (w + threads.x - 1) / threads.x; + int d2 = (h + threads.y - 1) / threads.y; + int d3 = c; + dim3 blocks(d1, d2, d3); + + PARROTS_DISPATCH_FLOATING_TYPES(g.elemType().prim(), [&] { + ca_map_forward_kernel<<>>( + weight.ptr(), g.ptr(), out.ptr(), n, c, h, + w); + }); + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void CAMapBackwardCUDAKernelLauncher(const DArrayLite dout, + const DArrayLite weight, + const DArrayLite g, DArrayLite dw, + DArrayLite dg, CudaContext &ctx, + cudaStream_t stream) { + auto n = dout.dim(0); + auto c = dout.dim(1); + auto h = dout.dim(2); + auto w = dout.dim(3); + + // Run kernel + dim3 threads(32, 32); + int d1 = (w + threads.x - 1) / threads.x; + int d2 = (h + threads.y - 1) / threads.y; + int d3 = h + w; + dim3 blocks(d1, d2, d3); + + PARROTS_DISPATCH_FLOATING_TYPES(weight.elemType().prim(), [&] { + ca_map_backward_kernel_w<<>>( + dout.ptr(), weight.ptr(), g.ptr(), + dw.ptr(), n, c, h, w); + }); + + PARROTS_DISPATCH_FLOATING_TYPES(g.elemType().prim(), [&] { + ca_map_backward_kernel_g<<>>( + dout.ptr(), weight.ptr(), g.ptr(), + dg.ptr(), n, c, h, w); + }); + PARROTS_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/corner_pool.cpp b/mmcv/mmcv/ops/csrc/parrots/corner_pool.cpp new file mode 100644 index 0000000..dca8cf1 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/corner_pool.cpp @@ -0,0 +1,83 @@ +// Modified from +// https://github.com/princeton-vl/CornerNet-Lite/tree/master/core/models/py_utils/_cpools/src +#include "parrots_cpp_helper.hpp" + +void bottom_pool_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) {} + +void bottom_pool_backward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) {} + +void top_pool_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) {} + +void top_pool_backward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) {} + +void left_pool_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) {} + +void left_pool_backward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) {} + +void right_pool_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) {} + +void right_pool_backward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) {} + +PARROTS_EXTENSION_REGISTER(bottom_pool_forward) + .input(1) + .output(1) + .apply(bottom_pool_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(bottom_pool_backward) + .input(2) + .output(1) + .apply(bottom_pool_backward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(top_pool_forward) + .input(1) + .output(1) + .apply(top_pool_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(top_pool_backward) + .input(2) + .output(1) + .apply(top_pool_backward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(left_pool_forward) + .input(1) + .output(1) + .apply(left_pool_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(left_pool_backward) + .input(2) + .output(1) + .apply(left_pool_backward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(right_pool_forward) + .input(1) + .output(1) + .apply(right_pool_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(right_pool_backward) + .input(2) + .output(1) + .apply(right_pool_backward_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/deform_conv.cpp b/mmcv/mmcv/ops/csrc/parrots/deform_conv.cpp new file mode 100644 index 0000000..c471faa --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/deform_conv.cpp @@ -0,0 +1,181 @@ +// Copyright (c) 2018, SenseTime. +#include "parrots_cpp_helper.hpp" + +void DeformConvForwardCUDAKernelLauncher( + const DArrayLite input, const DArrayLite weight, const DArrayLite offset, + DArrayLite output, DArrayLite columns, DArrayLite ones, int kW, int kH, + int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, int im2col_step, CudaContext& ctx, + cudaStream_t stream); + +void DeformConvBackwardInputCUDAKernelLauncher( + const DArrayLite input, const DArrayLite offset, + const DArrayLite gradOutput, DArrayLite gradInput, DArrayLite gradOffset, + DArrayLite weight, DArrayLite columns, int kW, int kH, int dW, int dH, + int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, int im2col_step, CudaContext& ctx, + cudaStream_t stream); + +void DeformConvBackwardParametersCUDAKernelLauncher( + const DArrayLite input, const DArrayLite offset, + const DArrayLite gradOutput, DArrayLite gradWeight, DArrayLite columns, + DArrayLite ones, int kW, int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, int deformable_group, float scale, + int im2col_step, CudaContext& ctx, cudaStream_t stream); + +void deform_conv_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, + im2col_step; + SSAttrs(attr) + .get("kW", kW) + .get("kH", kH) + .get("dW", dW) + .get("dH", dH) + .get("padW", padW) + .get("padH", padH) + .get("dilationW", dilationW) + .get("dilationH", dilationH) + .get("group", group) + .get("deformable_group", deformable_group) + .get("im2col_step", im2col_step) + .done(); + + const auto input = ins[0]; + const auto weight = ins[1]; + const auto offset = ins[2]; + + auto output = outs[0]; + auto columns = outs[1]; + auto ones = outs[2]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + DeformConvForwardCUDAKernelLauncher( + input, weight, offset, output, columns, ones, kW, kH, dW, dH, padW, padH, + dilationW, dilationH, group, deformable_group, im2col_step, ctx, stream); +} + +void deform_conv_backward_input_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, + im2col_step; + SSAttrs(attr) + .get("kW", kW) + .get("kH", kH) + .get("dW", dW) + .get("dH", dH) + .get("padW", padW) + .get("padH", padH) + .get("dilationW", dilationW) + .get("dilationH", dilationH) + .get("group", group) + .get("deformable_group", deformable_group) + .get("im2col_step", im2col_step) + .done(); + + auto input = ins[0]; + auto offset = ins[1]; + auto gradOutput = ins[2]; + + auto gradInput = outs[0]; + auto gradOffset = outs[1]; + auto weight = outs[2]; + auto columns = outs[3]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + DeformConvBackwardInputCUDAKernelLauncher( + input, offset, gradOutput, gradInput, gradOffset, weight, columns, kW, kH, + dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, + im2col_step, ctx, stream); +} + +void deform_conv_backward_parameters_cuda(CudaContext& ctx, + const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, + im2col_step; + float scale; + SSAttrs(attr) + .get("kW", kW) + .get("kH", kH) + .get("dW", dW) + .get("dH", dH) + .get("padW", padW) + .get("padH", padH) + .get("dilationW", dilationW) + .get("dilationH", dilationH) + .get("group", group) + .get("deformable_group", deformable_group) + .get("scale", scale) + .get("im2col_step", im2col_step) + .done(); + + auto input = ins[0]; + auto offset = ins[1]; + auto gradOutput = ins[2]; + + auto gradWeight = outs[0]; + auto columns = outs[1]; + auto ones = outs[2]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + DeformConvBackwardParametersCUDAKernelLauncher( + input, offset, gradOutput, gradWeight, columns, ones, kW, kH, dW, dH, + padW, padH, dilationW, dilationH, group, deformable_group, scale, + im2col_step, ctx, stream); +} + +PARROTS_EXTENSION_REGISTER(deform_conv_forward) + .attr("kW") + .attr("kH") + .attr("dW") + .attr("dH") + .attr("padW") + .attr("padH") + .attr("dilationW") + .attr("dilationH") + .attr("group") + .attr("deformable_group") + .attr("im2col_step") + .input(3) + .output(3) + .apply(deform_conv_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(deform_conv_backward_input) + .attr("kW") + .attr("kH") + .attr("dW") + .attr("dH") + .attr("padW") + .attr("padH") + .attr("dilationW") + .attr("dilationH") + .attr("group") + .attr("deformable_group") + .attr("im2col_step") + .input(3) + .output(4) + .apply(deform_conv_backward_input_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(deform_conv_backward_parameters) + .attr("kW") + .attr("kH") + .attr("dW") + .attr("dH") + .attr("padW") + .attr("padH") + .attr("dilationW") + .attr("dilationH") + .attr("group") + .attr("deformable_group") + .attr("scale") + .attr("im2col_step") + .input(3) + .output(3) + .apply(deform_conv_backward_parameters_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/deform_conv_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/deform_conv_cuda.cu new file mode 100644 index 0000000..203e1cf --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/deform_conv_cuda.cu @@ -0,0 +1,518 @@ +#include "deform_conv_cuda_kernel.cuh" +#include "parrots_cuda_helper.hpp" + +void deformable_im2col(DArrayLite data_im, DArrayLite data_offset, + const int channels, const int height, const int width, + const int ksize_h, const int ksize_w, const int pad_h, + const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, + DArrayLite data_col, cudaStream_t stream) { + // num_axes should be smaller than block size + // todo: check parallel_imgs is correctly passed in + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = channels * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.elemType().prim(), ([&] { + deformable_im2col_gpu_kernel + <<>>( + num_kernels, data_im.ptr(), + data_offset.ptr(), height, width, ksize_h, ksize_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + channel_per_deformable_group, parallel_imgs, channels, + deformable_group, height_col, width_col, + data_col.ptr()); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void deformable_col2im(DArrayLite data_col, DArrayLite data_offset, + const int channels, const int height, const int width, + const int ksize_h, const int ksize_w, const int pad_h, + const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, + DArrayLite grad_im, cudaStream_t stream) { + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = + channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.elemType().prim(), ([&] { + deformable_col2im_gpu_kernel<<>>( + num_kernels, data_col.ptr(), data_offset.ptr(), + channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, + stride_w, dilation_h, dilation_w, channel_per_deformable_group, + parallel_imgs, deformable_group, height_col, width_col, + grad_im.ptr()); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void deformable_col2im_coord( + DArrayLite data_col, DArrayLite data_im, DArrayLite data_offset, + const int channels, const int height, const int width, const int ksize_h, + const int ksize_w, const int pad_h, const int pad_w, const int stride_h, + const int stride_w, const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, DArrayLite grad_offset, + cudaStream_t stream) { + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * + deformable_group * parallel_imgs; + int channel_per_deformable_group = + channels * ksize_h * ksize_w / deformable_group; + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.elemType().prim(), ([&] { + deformable_col2im_coord_gpu_kernel<<>>( + num_kernels, data_col.ptr(), data_im.ptr(), + data_offset.ptr(), channels, height, width, ksize_h, + ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + channel_per_deformable_group, parallel_imgs, + 2 * ksize_h * ksize_w * deformable_group, deformable_group, + height_col, width_col, grad_offset.ptr()); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void deform_conv_shape_check(DArrayLite input, DArrayLite offset, + DArrayLite* gradOutput, DArrayLite weight, int kH, + int kW, int dH, int dW, int padH, int padW, + int dilationH, int dilationW, int group, + int deformable_group) { + PARROTS_CHECKARGS(weight.ndims() == 4) + << "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, but got: " + << weight.ndims(); + + PARROTS_CHECKARGS(weight.isContiguous()) + << "weight tensor has to be contiguous"; + + PARROTS_CHECKARGS(kW > 0 && kH > 0) + << "kernel size should be greater than zero, but got kH: " << kH + << " kW: " << kW; + + PARROTS_CHECKARGS(weight.dim(2) == kH && weight.dim(3) == kW) + << "kernel size should be consistent with weight, but got kH: " << kH + << " kW: " << kW << " weight.dim(2): " << weight.dim(2) + << ", weight.dim(3): " << weight.dim(3); + + PARROTS_CHECKARGS(dW > 0 && dH > 0) + << "stride should be greater than zero, but got dH: " << dH + << " dW: " << dW; + + PARROTS_CHECKARGS(dilationW > 0 && dilationH > 0) + << "dilation should be greater than 0, but got dilationH: " << dilationH + << " dilationW: " << dilationW; + + int ndim = input.ndims(); + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + PARROTS_CHECKARGS(ndim == 3 || ndim == 4) + << "3D or 4D input tensor expected but got: " << ndim; + + size_t nInputPlane = weight.dim(1) * group; + size_t inputHeight = input.dim(dimh); + size_t inputWidth = input.dim(dimw); + size_t nOutputPlane = weight.dim(0); + size_t outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + size_t outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + + PARROTS_CHECKARGS(nInputPlane % deformable_group == 0) + << "input channels must divide deformable group size"; + + PARROTS_CHECKARGS(outputWidth >= 1 || outputHeight >= 1) + << "Given input size: (" << nInputPlane << " x " << inputHeight << " x " + << inputWidth << "). Calculated output size: (" << nOutputPlane << " x " + << outputHeight << " x " << outputWidth << "). Output size is too small"; + + PARROTS_CHECKARGS(input.dim(1) == nInputPlane) + << "invalid number of input planes, expected: " << nInputPlane + << ", but got: " << input.dim(1); + + PARROTS_CHECKARGS(inputHeight >= kH && inputWidth >= kW) + << "input image is smaller than kernel"; + + PARROTS_CHECKARGS(offset.dim(2) == outputHeight && + offset.dim(3) == outputWidth) + << "invalid spatial dim of offset, expected height: " << outputHeight + << " width: " << outputWidth << ", but got height: " << offset.dim(2) + << " width: " << offset.dim(3); + + PARROTS_CHECKARGS(offset.dim(1) == deformable_group * 2 * kH * kW) + << "invalid number of channels of offset"; + + if (gradOutput != NULL) { + PARROTS_CHECKARGS(gradOutput->dim(dimf) == nOutputPlane) + << "invalid number of gradOutput planes, expected: " << nOutputPlane + << ", but got: " << gradOutput->dim(dimf); + + PARROTS_CHECKARGS(gradOutput->dim(dimh) == outputHeight && + gradOutput->dim(dimw) == outputWidth) + << "invalid dim of gradOutput, expected height: " << outputHeight + << " width: " << outputWidth + << " , but got height: " << gradOutput->dim(dimh) + << " width: " << gradOutput->dim(dimw); + } +} + +void DeformConvForwardCUDAKernelLauncher( + DArrayLite input, DArrayLite weight, DArrayLite offset, DArrayLite output, + DArrayLite columns, DArrayLite ones, int kW, int kH, int dW, int dH, + int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, int im2col_step, CudaContext& ctx, + cudaStream_t stream) { + // todo: resize columns to include im2col: done + // todo: add im2col_step as input + // todo: add new output buffer and transpose it to output (or directly + // transpose output) todo: possibly change data indexing because of + // parallel_imgs + + deform_conv_shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, + padW, dilationH, dilationW, group, deformable_group); + + int batch = 1; + if (input.ndims() == 3) { + // Force batch + batch = 0; + input = input.view({1, input.dim(0), input.dim(1), input.dim(2)}); + offset = offset.view({1, offset.dim(0), offset.dim(1), offset.dim(2)}); + } + + // todo: assert batchsize dividable by im2col_step + + size_t batchSize = input.dim(0); + size_t nInputPlane = input.dim(1); + size_t inputHeight = input.dim(2); + size_t inputWidth = input.dim(3); + + size_t nOutputPlane = weight.dim(0); + + size_t outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + size_t outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + PARROTS_CHECKARGS(offset.dim(0) == batchSize) + << "invalid batch size of offset"; + + output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, + outputHeight, outputWidth}); + + columns = ctx.createDArrayLite( + input.elemType(), DArrayShape(nInputPlane * kW * kH, + im2col_step * outputHeight * outputWidth)); + columns.setZeros(ctx.getStream()); + + if (ones.ndims() != 2 || + ones.dim(0) * ones.dim(1) < outputHeight * outputWidth) { + ones = ctx.createDArrayLite(input.elemType(), + DArrayShape(outputHeight, outputWidth)); + fill(ctx, ones, *toScalar(1)); + } + + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + auto output_buffer = ctx.createDArrayLite( + input.elemType(), DArrayShape(batchSize / im2col_step, nOutputPlane, + im2col_step * outputHeight, outputWidth)); + output_buffer.setZeros(ctx.getStream()); + output_buffer = output_buffer.view( + {output_buffer.dim(0), group, output_buffer.dim(1) / group, + output_buffer.dim(2) * output_buffer.dim(3)}); + + for (size_t elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, columns, + stream); + + columns = columns.view({group, columns.dim(0) / group, columns.dim(1)}); + weight = weight.view( + {group, nOutputPlane / group, nInputPlane / group * kH * kW}); + + for (size_t g = 0; g < group; g++) { + auto output_g = output_buffer[elt][g]; + auto weight_g = weight[g]; + auto columns_g = columns[g]; + gemm(ctx, 1, false, weight_g, false, columns_g, 1, output_g); + } + columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)}); + weight = weight.view({nOutputPlane, nInputPlane, kH, kW}); + } + + output_buffer = output_buffer.view( + {output_buffer.dim(0), output_buffer.dim(1) * output_buffer.dim(2), + output_buffer.dim(3)}); + + output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane, + im2col_step, outputHeight, outputWidth}); + output_buffer = transpose(ctx, output_buffer, 1, 2); + if (!output_buffer.isContiguous()) { + output_buffer = ctx.cloneDArrayLite(output_buffer); + } + copy(ctx, output, output_buffer); + output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + output = output.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.dim(1), offset.dim(2), offset.dim(3)}); + } +} + +void DeformConvBackwardInputCUDAKernelLauncher( + DArrayLite input, DArrayLite offset, DArrayLite gradOutput, + DArrayLite gradInput, DArrayLite gradOffset, DArrayLite weight, + DArrayLite columns, int kW, int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, int deformable_group, + int im2col_step, CudaContext& ctx, cudaStream_t stream) { + deform_conv_shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, + padH, padW, dilationH, dilationW, group, + deformable_group); + + int batch = 1; + + if (input.ndims() == 3) { + // Force batch + batch = 0; + input = input.view({1, input.dim(0), input.dim(1), input.dim(2)}); + offset = offset.view({1, offset.dim(0), offset.dim(1), offset.dim(2)}); + gradOutput = gradOutput.view( + {1, gradOutput.dim(0), gradOutput.dim(1), gradOutput.dim(2)}); + } + + size_t batchSize = input.dim(0); + size_t nInputPlane = input.dim(1); + size_t inputHeight = input.dim(2); + size_t inputWidth = input.dim(3); + + size_t nOutputPlane = weight.dim(0); + + size_t outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + size_t outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + PARROTS_CHECKARGS(offset.dim(0) == batchSize) + << "invalid batch size of offset"; + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + columns = ctx.createDArrayLite( + input.elemType(), DArrayShape(nInputPlane * kW * kH, + im2col_step * outputHeight * outputWidth)); + columns.setZeros(ctx.getStream()); + + // change order of grad output + gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, + nOutputPlane, outputHeight, outputWidth}); + gradOutput = transpose(ctx, gradOutput, 1, 2); + if (!gradOutput.isContiguous()) { + gradOutput = ctx.cloneDArrayLite(gradOutput); + } + + gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, + outputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + for (size_t elt = 0; elt < batchSize / im2col_step; elt++) { + // divide into groups + columns = columns.view({group, columns.dim(0) / group, columns.dim(1)}); + weight = weight.view({group, weight.dim(0) / group, + weight.dim(1) * weight.dim(2) * weight.dim(3)}); + gradOutput = gradOutput.view( + {gradOutput.dim(0), group, gradOutput.dim(1) / group, + gradOutput.dim(2) * gradOutput.dim(3) * gradOutput.dim(4)}); + + for (size_t g = 0; g < group; g++) { + auto columns_g = columns[g]; + gemm(ctx, 1, true, weight[g], false, gradOutput[elt][g], 0, columns_g); + } + + columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)}); + gradOutput = gradOutput.view({gradOutput.dim(0), + gradOutput.dim(1) * gradOutput.dim(2), + im2col_step, outputHeight, outputWidth}); + weight = weight.view({nOutputPlane, nInputPlane, kH, kW}); + + deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane, + inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, + dilationH, dilationW, im2col_step, deformable_group, + gradOffset[elt], stream); + + deformable_col2im(columns, offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, gradInput[elt], + stream); + } + + gradOutput = transpose(ctx, gradOutput, 1, 2); + if (!gradOutput.isContiguous()) { + gradOutput = ctx.cloneDArrayLite(gradOutput); + } + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + gradOffset = gradOffset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.dim(1), offset.dim(2), offset.dim(3)}); + gradOffset = gradOffset.view({offset.dim(1), offset.dim(2), offset.dim(3)}); + } +} + +void DeformConvBackwardParametersCUDAKernelLauncher( + DArrayLite input, DArrayLite offset, DArrayLite gradOutput, + DArrayLite gradWeight, DArrayLite columns, DArrayLite ones, int kW, int kH, + int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, float scale, int im2col_step, CudaContext& ctx, + cudaStream_t stream) { + // todo: transpose and reshape outGrad + // todo: reshape columns + // todo: add im2col_step as input + + deform_conv_shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, + dW, padH, padW, dilationH, dilationW, group, + deformable_group); + + int batch = 1; + + if (input.ndims() == 3) { + // Force batch + batch = 0; + input = input.view({1, input.dim(0), input.dim(1), input.dim(2)}); + gradOutput = gradOutput.view( + {1, gradOutput.dim(0), gradOutput.dim(1), gradOutput.dim(2)}); + } + + size_t batchSize = input.dim(0); + size_t nInputPlane = input.dim(1); + size_t inputHeight = input.dim(2); + size_t inputWidth = input.dim(3); + + size_t nOutputPlane = gradWeight.dim(0); + + size_t outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + size_t outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + PARROTS_CHECKARGS(offset.dim(0) == batchSize) + << "invalid batch size of offset"; + + columns = ctx.createDArrayLite( + input.elemType(), DArrayShape(nInputPlane * kW * kH, + im2col_step * outputHeight * outputWidth)); + columns.setZeros(ctx.getStream()); + + gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, + nOutputPlane, outputHeight, outputWidth}); + gradOutput = transpose(ctx, gradOutput, 1, 2); + if (!gradOutput.isContiguous()) { + gradOutput = ctx.cloneDArrayLite(gradOutput); + } + + auto gradOutputBuffer = ctx.cloneDArrayLite(gradOutput); + gradOutputBuffer = + gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, + im2col_step * outputHeight, outputWidth}); + + gradOutput = transpose(ctx, gradOutput, 1, 2); + if (!gradOutput.isContiguous()) { + gradOutput = ctx.cloneDArrayLite(gradOutput); + } + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + for (size_t elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, columns, + stream); + + // divide into group + gradOutputBuffer = gradOutputBuffer.view( + {gradOutputBuffer.dim(0), group, gradOutputBuffer.dim(1) / group, + gradOutputBuffer.dim(2) * gradOutputBuffer.dim(3)}); + columns = columns.view({group, columns.dim(0) / group, columns.dim(1)}); + gradWeight = gradWeight.view( + {group, gradWeight.dim(0) / group, + gradWeight.dim(1) * gradWeight.dim(2) * gradWeight.dim(3)}); + + for (int g = 0; g < group; g++) { + auto gradWeight_g = gradWeight[g]; + gemm(ctx, scale, false, gradOutputBuffer[elt][g], true, columns[g], 1, + gradWeight_g); + } + gradOutputBuffer = gradOutputBuffer.view( + {gradOutputBuffer.dim(0), + gradOutputBuffer.dim(1) * gradOutputBuffer.dim(2), + im2col_step * outputHeight, outputWidth}); + columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)}); + gradWeight = gradWeight.view( + {gradWeight.dim(0) * gradWeight.dim(1), nInputPlane / group, kH, kW}); + } + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + } +} diff --git a/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool.cpp b/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool.cpp new file mode 100644 index 0000000..9cd5256 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool.cpp @@ -0,0 +1,93 @@ +#include "parrots_cpp_helper.hpp" + +void DeformRoIPoolForwardCUDAKernelLauncher( + const DArrayLite input, const DArrayLite rois, const DArrayLite offset, + DArrayLite output, int pooled_height, int pooled_width, float spatial_scale, + int sampling_ratio, float gamma, cudaStream_t stream); + +void DeformRoIPoolBackwardCUDAKernelLauncher( + const DArrayLite grad_output, const DArrayLite input, const DArrayLite rois, + const DArrayLite offset, DArrayLite grad_input, DArrayLite grad_offset, + int pooled_height, int pooled_width, float spatial_scale, + int sampling_ratio, float gamma, cudaStream_t stream); + +void deform_roi_pool_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int pooled_height; + int pooled_width; + float spatial_scale; + int sampling_ratio; + float gamma; + SSAttrs(attr) + .get("pooled_height", pooled_height) + .get("pooled_width", pooled_width) + .get("spatial_scale", spatial_scale) + .get("sampling_ratio", sampling_ratio) + .get("gamma", gamma) + .done(); + + const auto& input = ins[0]; + const auto& rois = ins[1]; + const auto& offset = ins[2]; + + auto& output = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + DeformRoIPoolForwardCUDAKernelLauncher( + input, rois, offset, output, pooled_height, pooled_width, spatial_scale, + sampling_ratio, gamma, stream); +} + +void deform_roi_pool_backward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int pooled_height; + int pooled_width; + float spatial_scale; + int sampling_ratio; + float gamma; + + SSAttrs(attr) + .get("pooled_height", pooled_height) + .get("pooled_width", pooled_width) + .get("spatial_scale", spatial_scale) + .get("sampling_ratio", sampling_ratio) + .get("gamma", gamma) + .done(); + + const auto& grad_output = ins[0]; + const auto& input = ins[1]; + const auto& rois = ins[2]; + const auto& offset = ins[3]; + + auto& grad_input = outs[0]; + auto& grad_offset = outs[1]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + DeformRoIPoolBackwardCUDAKernelLauncher( + grad_output, input, rois, offset, grad_input, grad_offset, pooled_height, + pooled_width, spatial_scale, sampling_ratio, gamma, stream); +} + +PARROTS_EXTENSION_REGISTER(deform_roi_pool_forward) + .attr("pooled_height") + .attr("pooled_width") + .attr("spatial_scale") + .attr("sampling_ratio") + .attr("gamma") + .input(3) + .output(1) + .apply(deform_roi_pool_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(deform_roi_pool_backward) + .attr("pooled_height") + .attr("pooled_width") + .attr("spatial_scale") + .attr("sampling_ratio") + .attr("gamma") + .input(4) + .output(2) + .apply(deform_roi_pool_backward_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool_cuda.cu new file mode 100644 index 0000000..7081ebf --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/deform_roi_pool_cuda.cu @@ -0,0 +1,48 @@ +#include "deform_roi_pool_cuda_kernel.cuh" +#include "parrots_cuda_helper.hpp" + +void DeformRoIPoolForwardCUDAKernelLauncher( + const DArrayLite input, const DArrayLite rois, const DArrayLite offset, + DArrayLite output, int pooled_height, int pooled_width, float spatial_scale, + int sampling_ratio, float gamma, cudaStream_t stream) { + int output_size = output.size(); + int channels = input.dim(1); + int height = input.dim(2); + int width = input.dim(3); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + input.elemType().prim(), ([&] { + deform_roi_pool_forward_cuda_kernel + <<>>( + output_size, input.ptr(), rois.ptr(), + offset.ptr(), output.ptr(), pooled_height, + pooled_width, spatial_scale, sampling_ratio, gamma, channels, + height, width); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void DeformRoIPoolBackwardCUDAKernelLauncher( + const DArrayLite grad_output, const DArrayLite input, const DArrayLite rois, + const DArrayLite offset, DArrayLite grad_input, DArrayLite grad_offset, + int pooled_height, int pooled_width, float spatial_scale, + int sampling_ratio, float gamma, cudaStream_t stream) { + int output_size = grad_output.size(); + int channels = grad_input.dim(1); + int height = grad_input.dim(2); + int width = grad_input.dim(3); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_output.elemType().prim(), ([&] { + deform_roi_pool_backward_cuda_kernel + <<>>( + output_size, grad_output.ptr(), input.ptr(), + rois.ptr(), offset.ptr(), + grad_input.ptr(), grad_offset.ptr(), + pooled_height, pooled_width, spatial_scale, sampling_ratio, + gamma, channels, height, width); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/focal_loss.cpp b/mmcv/mmcv/ops/csrc/parrots/focal_loss.cpp new file mode 100644 index 0000000..032e138 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/focal_loss.cpp @@ -0,0 +1,130 @@ +// Copyright (c) 2018, SenseTime. +#include "parrots_cpp_helper.hpp" + +void SigmoidFocalLossForwardCUDAKernelLauncher( + const DArrayLite input, const DArrayLite target, const DArrayLite weight, + DArrayLite output, float gamma, float alpha, cudaStream_t stream); + +void SigmoidFocalLossBackwardCUDAKernelLauncher( + const DArrayLite input, const DArrayLite target, const DArrayLite weight, + DArrayLite grad_input, float gamma, float alpha, cudaStream_t stream); + +void SoftmaxFocalLossForwardCUDAKernelLauncher( + const DArrayLite input, const DArrayLite target, const DArrayLite weight, + DArrayLite output, float gamma, float alpha, cudaStream_t stream); + +void SoftmaxFocalLossBackwardCUDAKernelLauncher( + const DArrayLite input, const DArrayLite target, const DArrayLite weight, + DArrayLite buff, DArrayLite grad_input, float gamma, float alpha, + cudaStream_t stream); + +void sigmoid_focal_loss_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + float gamma; + float alpha; + SSAttrs(attr).get("gamma", gamma).get("alpha", alpha).done(); + + // get inputs and outputs + const auto& input = ins[0]; + const auto& target = ins[1]; + const auto& weight = ins[2]; + + auto& output = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + + SigmoidFocalLossForwardCUDAKernelLauncher(input, target, weight, output, + gamma, alpha, stream); +} + +void sigmoid_focal_loss_backward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + float gamma; + float alpha; + SSAttrs(attr).get("gamma", gamma).get("alpha", alpha).done(); + + // get inputs and outputs + const auto& input = ins[0]; + const auto& target = ins[1]; + const auto& weight = ins[2]; + + auto& grad_input = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + SigmoidFocalLossBackwardCUDAKernelLauncher(input, target, weight, grad_input, + gamma, alpha, stream); +} + +void softmax_focal_loss_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + float gamma; + float alpha; + SSAttrs(attr).get("gamma", gamma).get("alpha", alpha).done(); + + // get inputs and outputs + const auto& input = ins[0]; + const auto& target = ins[1]; + const auto& weight = ins[2]; + + auto& grad_input = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + + SoftmaxFocalLossForwardCUDAKernelLauncher(input, target, weight, grad_input, + gamma, alpha, stream); +} + +void softmax_focal_loss_backward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + float gamma; + float alpha; + SSAttrs(attr).get("gamma", gamma).get("alpha", alpha).done(); + + // get inputs and outputs + const auto& input = ins[0]; + const auto& target = ins[1]; + const auto& weight = ins[2]; + + auto& buff = outs[0]; + auto& grad_input = outs[1]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + SoftmaxFocalLossBackwardCUDAKernelLauncher(input, target, weight, buff, + grad_input, gamma, alpha, stream); +} + +PARROTS_EXTENSION_REGISTER(sigmoid_focal_loss_forward) + .attr("gamma") + .attr("alpha") + .input(3) + .output(1) + .apply(sigmoid_focal_loss_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(sigmoid_focal_loss_backward) + .attr("gamma") + .attr("alpha") + .input(3) + .output(1) + .apply(sigmoid_focal_loss_backward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(softmax_focal_loss_forward) + .attr("gamma") + .attr("alpha") + .input(3) + .output(1) + .apply(softmax_focal_loss_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(softmax_focal_loss_backward) + .attr("gamma") + .attr("alpha") + .input(3) + .output(2) + .apply(softmax_focal_loss_backward_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/focal_loss_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/focal_loss_cuda.cu new file mode 100644 index 0000000..2bba0dc --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/focal_loss_cuda.cu @@ -0,0 +1,88 @@ +#include "parrots_cuda_helper.hpp" +#include "sigmoid_focal_loss_cuda_kernel.cuh" +#include "softmax_focal_loss_cuda_kernel.cuh" + +void SigmoidFocalLossForwardCUDAKernelLauncher( + const DArrayLite input, const DArrayLite target, const DArrayLite weight, + DArrayLite output, float gamma, float alpha, cudaStream_t stream) { + int output_size = output.size(); + int num_classes = input.dim(1); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + input.elemType().prim(), ([&] { + sigmoid_focal_loss_forward_cuda_kernel + <<>>( + output_size, input.ptr(), target.ptr(), + weight.ptr(), output.ptr(), gamma, alpha, + num_classes); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void SigmoidFocalLossBackwardCUDAKernelLauncher( + const DArrayLite input, const DArrayLite target, const DArrayLite weight, + DArrayLite grad_input, float gamma, float alpha, cudaStream_t stream) { + int output_size = grad_input.size(); + int num_classes = input.dim(1); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + input.elemType().prim(), ([&] { + sigmoid_focal_loss_backward_cuda_kernel + <<>>( + output_size, input.ptr(), target.ptr(), + weight.ptr(), grad_input.ptr(), gamma, + alpha, num_classes); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void SoftmaxFocalLossForwardCUDAKernelLauncher( + const DArrayLite softmax, const DArrayLite target, const DArrayLite weight, + DArrayLite output, float gamma, float alpha, cudaStream_t stream) { + int output_size = output.size(); + int num_classes = softmax.dim(1); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + softmax.elemType().prim(), ([&] { + softmax_focal_loss_forward_cuda_kernel + <<>>( + output_size, softmax.ptr(), target.ptr(), + weight.ptr(), output.ptr(), gamma, alpha, + num_classes); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void SoftmaxFocalLossBackwardCUDAKernelLauncher( + const DArrayLite softmax, const DArrayLite target, const DArrayLite weight, + DArrayLite buff, DArrayLite grad_input, float gamma, float alpha, + cudaStream_t stream) { + int output_size = buff.size(); + int num_classes = softmax.dim(1); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_input.elemType().prim(), ([&] { + softmax_focal_loss_backward_cuda1_kernel + <<>>( + output_size, softmax.ptr(), target.ptr(), + weight.ptr(), buff.ptr(), gamma, alpha, + num_classes); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); + + output_size = grad_input.size(); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_input.elemType().prim(), ([&] { + softmax_focal_loss_backward_cuda2_kernel + <<>>( + output_size, softmax.ptr(), target.ptr(), + buff.ptr(), grad_input.ptr(), num_classes); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/masked_conv2d.cpp b/mmcv/mmcv/ops/csrc/parrots/masked_conv2d.cpp new file mode 100644 index 0000000..de5c9dd --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/masked_conv2d.cpp @@ -0,0 +1,80 @@ +#include "parrots_cpp_helper.hpp" + +void MaskedIm2colForwardCUDAKernelLauncher( + const DArrayLite bottom_data, const DArrayLite mask_h_idx, + const DArrayLite mask_w_idx, DArrayLite top_data, const int kernel_h, + const int kernel_w, const int pad_h, const int pad_w, cudaStream_t stream); + +void MaskedCol2imForwardCUDAKernelLaucher(const DArrayLite bottom_data, + const DArrayLite mask_h_idx, + const DArrayLite mask_w_idx, + DArrayLite top_data, const int height, + const int width, const int channels, + cudaStream_t stream); + +void masked_im2col_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + // im: (n, ic, h, w), kernel size (kh, kw) + // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) + int kernel_h, kernel_w, pad_h, pad_w; + SSAttrs(attr) + .get("kernel_h", kernel_h) + .get("kernel_w", kernel_w) + .get("pad_h", pad_h) + .get("pad_w", pad_w) + .done(); + + const auto& im = ins[0]; + const auto& mask_h_idx = ins[1]; + const auto& mask_w_idx = ins[2]; + + auto& col = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + MaskedIm2colForwardCUDAKernelLauncher(im, mask_h_idx, mask_w_idx, col, + kernel_h, kernel_w, pad_h, pad_w, + stream); +} + +void masked_col2im_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + // im: (n, ic, h, w), kernel size (kh, kw) + // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) + int height, width, channels; + SSAttrs(attr) + .get("height", height) + .get("width", width) + .get("channels", channels) + .done(); + + const auto& col = ins[0]; + const auto& mask_h_idx = ins[1]; + const auto& mask_w_idx = ins[2]; + + auto& im = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + MaskedCol2imForwardCUDAKernelLaucher(col, mask_h_idx, mask_w_idx, im, height, + width, channels, stream); +} + +PARROTS_EXTENSION_REGISTER(masked_im2col_forward) + .attr("kernel_h") + .attr("kernel_w") + .attr("pad_h") + .attr("pad_w") + .input(3) + .output(1) + .apply(masked_im2col_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(masked_col2im_forward) + .attr("height") + .attr("width") + .attr("channels") + .input(3) + .output(1) + .apply(masked_col2im_forward_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/masked_conv2d_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/masked_conv2d_cuda.cu new file mode 100644 index 0000000..879d991 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/masked_conv2d_cuda.cu @@ -0,0 +1,45 @@ +#include "masked_conv2d_cuda_kernel.cuh" +#include "parrots_cuda_helper.hpp" + +void MaskedIm2colForwardCUDAKernelLauncher( + const DArrayLite bottom_data, const DArrayLite mask_h_idx, + const DArrayLite mask_w_idx, DArrayLite top_data, const int kernel_h, + const int kernel_w, const int pad_h, const int pad_w, cudaStream_t stream) { + int channels = bottom_data.dim(1); + int height = bottom_data.dim(2); + int width = bottom_data.dim(3); + int mask_cnt = mask_h_idx.dim(0); + int output_size = mask_cnt * channels; + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + bottom_data.elemType().prim(), ([&] { + MaskedIm2colForward + <<>>( + output_size, bottom_data.ptr(), height, width, + kernel_h, kernel_w, pad_h, pad_w, mask_h_idx.ptr(), + mask_w_idx.ptr(), mask_cnt, top_data.ptr()); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void MaskedCol2imForwardCUDAKernelLaucher(const DArrayLite bottom_data, + const DArrayLite mask_h_idx, + const DArrayLite mask_w_idx, + DArrayLite top_data, const int height, + const int width, const int channels, + cudaStream_t stream) { + int mask_cnt = mask_h_idx.dim(0); + int output_size = mask_cnt * channels; + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + bottom_data.elemType().prim(), ([&] { + MaskedCol2imForward + <<>>( + output_size, bottom_data.ptr(), height, width, + channels, mask_h_idx.ptr(), mask_w_idx.ptr(), + mask_cnt, top_data.ptr()); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp b/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp new file mode 100644 index 0000000..3ef3f4a --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp @@ -0,0 +1,134 @@ +// Copyright (c) 2019, SenseTime. +#include "parrots_cpp_helper.hpp" + +void ModulatedDeformConvForwardCUDAKernelLauncher( + const DArrayLite input, const DArrayLite weight, const DArrayLite bias, + const DArrayLite ones, const DArrayLite offset, const DArrayLite mask, + DArrayLite output, DArrayLite columns, int kernel_h, int kernel_w, + const int stride_h, const int stride_w, const int pad_h, const int pad_w, + const int dilation_h, const int dilation_w, const int group, + int deformable_group, const bool with_bias, CudaContext& ctx, + cudaStream_t stream); + +void ModulatedDeformConvBackwardCUDAKernelLauncher( + const DArrayLite input, const DArrayLite weight, const DArrayLite bias, + const DArrayLite ones, const DArrayLite offset, const DArrayLite mask, + DArrayLite columns, DArrayLite grad_input, DArrayLite grad_weight, + DArrayLite grad_bias, DArrayLite grad_offset, DArrayLite grad_mask, + DArrayLite grad_output, int kernel_h, int kernel_w, int stride_h, + int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, + int group, int deformable_group, const bool with_bias, CudaContext& ctx, + cudaStream_t stream); + +void modulated_deform_conv_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, + dilation_w, group, deformable_group, with_bias; + SSAttrs(attr) + .get("kernel_h", kernel_h) + .get("kernel_w", kernel_w) + .get("stride_h", stride_h) + .get("stride_w", stride_w) + .get("pad_h", pad_h) + .get("pad_w", pad_w) + .get("dilation_h", dilation_h) + .get("dilation_w", dilation_w) + .get("group", group) + .get("deformable_group", deformable_group) + .get("with_bias", with_bias) + .done(); + + auto input = ins[0]; + auto weight = ins[1]; + auto bias = ins[2]; + auto ones = ins[3]; + auto offset = ins[4]; + auto mask = ins[5]; + + auto output = outs[0]; + auto columns = outs[1]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + ModulatedDeformConvForwardCUDAKernelLauncher( + input, weight, bias, ones, offset, mask, output, columns, kernel_h, + kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, + deformable_group, with_bias, ctx, stream); +} + +void modulated_deform_conv_backward_cuda(CudaContext& ctx, + const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, + dilation_w, group, deformable_group, with_bias; + SSAttrs(attr) + .get("kernel_h", kernel_h) + .get("kernel_w", kernel_w) + .get("stride_h", stride_h) + .get("stride_w", stride_w) + .get("pad_h", pad_h) + .get("pad_w", pad_w) + .get("dilation_h", dilation_h) + .get("dilation_w", dilation_w) + .get("group", group) + .get("deformable_group", deformable_group) + .get("with_bias", with_bias) + .done(); + + auto input = ins[0]; + auto weight = ins[1]; + auto bias = ins[2]; + auto ones = ins[3]; + auto offset = ins[4]; + auto mask = ins[5]; + + auto columns = outs[0]; + auto grad_input = outs[1]; + auto grad_weight = outs[2]; + auto grad_bias = outs[3]; + auto grad_offset = outs[4]; + auto grad_mask = outs[5]; + auto grad_output = outs[6]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + ModulatedDeformConvBackwardCUDAKernelLauncher( + input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight, + grad_bias, grad_offset, grad_mask, grad_output, kernel_h, kernel_w, + stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, + deformable_group, with_bias, ctx, stream); +} + +PARROTS_EXTENSION_REGISTER(modulated_deform_conv_forward) + .attr("kernel_h") + .attr("kernel_w") + .attr("stride_h") + .attr("stride_w") + .attr("pad_h") + .attr("pad_w") + .attr("dilation_h") + .attr("dilation_w") + .attr("group") + .attr("deformable_group") + .attr("with_bias") + .input(6) + .output(2) + .apply(modulated_deform_conv_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(modulated_deform_conv_backward) + .attr("kernel_h") + .attr("kernel_w") + .attr("stride_h") + .attr("stride_w") + .attr("pad_h") + .attr("pad_w") + .attr("dilation_h") + .attr("dilation_w") + .attr("group") + .attr("deformable_group") + .attr("with_bias") + .input(6) + .output(7) + .apply(modulated_deform_conv_backward_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv_cuda.cu new file mode 100644 index 0000000..da5692a --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/modulated_deform_conv_cuda.cu @@ -0,0 +1,341 @@ +#include "modulated_deform_conv_cuda_kernel.cuh" +#include "parrots_cuda_helper.hpp" + +void modulated_deformable_im2col_cuda( + const DArrayLite data_im, const DArrayLite data_offset, + const DArrayLite data_mask, const int batch_size, const int channels, + const int height_im, const int width_im, const int height_col, + const int width_col, const int kernel_h, const int kenerl_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int deformable_group, + DArrayLite data_col, cudaStream_t stream) { + // num_axes should be smaller than block size + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = channels * batch_size * height_col * width_col; + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.elemType().prim(), ([&] { + modulated_deformable_im2col_gpu_kernel<<< + GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>( + num_kernels, data_im.ptr(), data_offset.ptr(), + data_mask.ptr(), height_im, width_im, kernel_h, kenerl_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + channel_per_deformable_group, batch_size, channels, + deformable_group, height_col, width_col, data_col.ptr()); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void modulated_deformable_col2im_cuda( + const DArrayLite data_col, const DArrayLite data_offset, + const DArrayLite data_mask, const int batch_size, const int channels, + const int height_im, const int width_im, const int height_col, + const int width_col, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int deformable_group, + DArrayLite grad_im, cudaStream_t stream) { + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = + channels * kernel_h * kernel_w * batch_size * height_col * width_col; + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.elemType().prim(), ([&] { + modulated_deformable_col2im_gpu_kernel<<< + GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>( + num_kernels, data_col.ptr(), data_offset.ptr(), + data_mask.ptr(), channels, height_im, width_im, kernel_h, + kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + channel_per_deformable_group, batch_size, deformable_group, + height_col, width_col, grad_im.ptr()); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void modulated_deformable_col2im_coord_cuda( + const DArrayLite data_col, const DArrayLite data_im, + const DArrayLite data_offset, const DArrayLite data_mask, + const int batch_size, const int channels, const int height_im, + const int width_im, const int height_col, const int width_col, + const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, const int dilation_h, + const int dilation_w, const int deformable_group, DArrayLite grad_offset, + DArrayLite grad_mask, cudaStream_t stream) { + const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * + kernel_w * deformable_group; + const int channel_per_deformable_group = + channels * kernel_h * kernel_w / deformable_group; + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.elemType().prim(), ([&] { + modulated_deformable_col2im_coord_gpu_kernel<<< + GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, stream>>>( + num_kernels, data_col.ptr(), data_im.ptr(), + data_offset.ptr(), data_mask.ptr(), channels, + height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, + stride_w, dilation_h, dilation_w, channel_per_deformable_group, + batch_size, 2 * kernel_h * kernel_w * deformable_group, + deformable_group, height_col, width_col, + grad_offset.ptr(), grad_mask.ptr()); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void ModulatedDeformConvForwardCUDAKernelLauncher( + DArrayLite input, DArrayLite weight, DArrayLite bias, DArrayLite ones, + DArrayLite offset, DArrayLite mask, DArrayLite output, DArrayLite columns, + int kernel_h, int kernel_w, const int stride_h, const int stride_w, + const int pad_h, const int pad_w, const int dilation_h, + const int dilation_w, const int group, const int deformable_group, + const bool with_bias, CudaContext& ctx, cudaStream_t stream) { + const int batch = input.dim(0); + const int channels = input.dim(1); + const int height = input.dim(2); + const int width = input.dim(3); + + const int channels_out = weight.dim(0); + const int channels_kernel = weight.dim(1); + const int kernel_h_ = weight.dim(2); + const int kernel_w_ = weight.dim(3); + + PARROTS_CHECKARGS(kernel_h_ == kernel_h && kernel_w_ == kernel_w) + << "Input shape and kernel shape wont match: (" << kernel_h << " x " + << kernel_w << " vs " << kernel_h_ << " x " << kernel_w_ << ")."; + + PARROTS_CHECKARGS(channels == channels_kernel * group) + << "Input shape and kernel channels wont match: (" << channels << " vs " + << channels_kernel * group << ")."; + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + if (ones.ndims() != 2 || ones.dim(0) * ones.dim(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = ctx.createDArrayLite(input.elemType(), + DArrayShape(height_out, width_out)); + fill(ctx, ones, *toScalar(1)); + } + + // resize output + output = output.view({batch, channels_out, height_out, width_out}); + output.setZeros(ctx.getStream()); + + // resize temporary columns + columns = ctx.createDArrayLite( + input.elemType(), + DArrayShape(channels * kernel_h * kernel_w, 1 * height_out * width_out)); + columns.setZeros(ctx.getStream()); + + output = output.view({output.dim(0), group, output.dim(1) / group, + output.dim(2), output.dim(3)}); + + for (size_t b = 0; b < batch; b++) { + modulated_deformable_im2col_cuda( + input[b], offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, columns, stream); + + // divide into group + weight = weight.view({group, weight.dim(0) / group, weight.dim(1), + weight.dim(2), weight.dim(3)}); + columns = columns.view({group, columns.dim(0) / group, columns.dim(1)}); + + for (size_t g = 0; g < group; g++) { + auto output_g = output[b][g]; + gemm(ctx, 1, false, + weight[g].view( + {weight.dim(1), weight.dim(2) * weight.dim(3) * weight.dim(4)}), + false, columns[g], 1, output_g); + } + + weight = weight.view({weight.dim(0) * weight.dim(1), weight.dim(2), + weight.dim(3), weight.dim(4)}); + columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)}); + } + + output = output.view({output.dim(0), output.dim(1) * output.dim(2), + output.dim(3), output.dim(4)}); + + if (with_bias) { + bias = bias.view({1, bias.dim(0), 1, 1}); + add(ctx, output, bias, output); + } +} + +void ModulatedDeformConvBackwardCUDAKernelLauncher( + DArrayLite input, DArrayLite weight, DArrayLite bias, DArrayLite ones, + DArrayLite offset, DArrayLite mask, DArrayLite columns, + DArrayLite grad_input, DArrayLite grad_weight, DArrayLite grad_bias, + DArrayLite grad_offset, DArrayLite grad_mask, DArrayLite grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias, CudaContext& ctx, cudaStream_t stream) { + const int batch = input.dim(0); + const int channels = input.dim(1); + const int height = input.dim(2); + const int width = input.dim(3); + + const int channels_kernel = weight.dim(1); + const int kernel_h_ = weight.dim(2); + const int kernel_w_ = weight.dim(3); + + PARROTS_CHECKARGS(kernel_h_ == kernel_h && kernel_w_ == kernel_w) + << "Input shape and kernel shape wont match: (" << kernel_h << " x " + << kernel_w << " vs " << kernel_h_ << " x " << kernel_w_ << ")."; + + PARROTS_CHECKARGS(channels == channels_kernel * group) + << "Input shape and kernel channels wont match: (" << channels << " vs " + << channels_kernel * group << ")."; + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + if (ones.ndims() != 2 || ones.dim(0) * ones.dim(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = ctx.createDArrayLite(input.elemType(), + DArrayShape(height_out, width_out)); + fill(ctx, ones, *toScalar(1)); + } + + grad_input = grad_input.view({batch, channels, height, width}); + columns = ctx.createDArrayLite( + input.elemType(), + DArrayShape(channels * kernel_h * kernel_w, height_out * width_out)); + + grad_output = + grad_output.view({grad_output.dim(0), group, grad_output.dim(1) / group, + grad_output.dim(2), grad_output.dim(3)}); + + for (size_t b = 0; b < batch; b++) { + // divide int group + columns = columns.view({group, columns.dim(0) / group, columns.dim(1)}); + weight = weight.view({group, weight.dim(0) / group, weight.dim(1), + weight.dim(2), weight.dim(3)}); + + for (size_t g = 0; g < group; g++) { + auto columns_g = ctx.createDArrayLite( + weight.elemType(), DArrayShape(columns.dim(1), columns.dim(2))); + copy(ctx, columns_g, columns[g]); + auto weight_g = weight[g].view( + {weight.dim(1), weight.dim(2) * weight.dim(3) * weight.dim(4)}); + weight_g = transpose(ctx, weight_g, 0, 1); + + auto grad_output_bg = ctx.createDArrayLite( + grad_output.elemType(), + DArrayShape(grad_output.dim(2), grad_output.dim(3), + grad_output.dim(4))); + copy(ctx, grad_output_bg, grad_output[b][g]); + grad_output_bg = + grad_output_bg.view({grad_output_bg.dim(0), + grad_output_bg.dim(1) * grad_output_bg.dim(2)}); + + columns_g = + parrots::op::addmm(ctx, columns[g], weight_g, grad_output_bg, 0, 1); + auto columns_out = columns[g]; + copy(ctx, columns_out, columns_g); + } + + columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)}); + weight = weight.view({weight.dim(0) * weight.dim(1), weight.dim(2), + weight.dim(3), weight.dim(4)}); + + // gradient w.r.t. input coordinate data + modulated_deformable_col2im_coord_cuda( + columns, input[b], offset[b], mask[b], 1, channels, height, width, + height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, + stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b], + grad_mask[b], stream); + // gradient w.r.t. input data + modulated_deformable_col2im_cuda( + columns, offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, grad_input[b], stream); + + // gradient w.r.t. weight, dWeight should accumulate across the batch and + // group + modulated_deformable_im2col_cuda( + input[b], offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, columns, stream); + + columns = columns.view({group, columns.dim(0) / group, columns.dim(1)}); + grad_weight = + grad_weight.view({group, grad_weight.dim(0) / group, grad_weight.dim(1), + grad_weight.dim(2), grad_weight.dim(3)}); + if (with_bias) { + grad_bias = grad_bias.view({group, grad_bias.dim(0) / group}); + } + + for (size_t g = 0; g < group; g++) { + auto grad_weight_g = ctx.createDArrayLite( + grad_weight.elemType(), + DArrayShape(grad_weight.dim(1), grad_weight.dim(2), + grad_weight.dim(3), grad_weight.dim(4))); + copy(ctx, grad_weight_g, grad_weight[g]); + grad_weight_g = grad_weight_g.view( + {grad_weight_g.dim(0), + grad_weight_g.dim(1) * grad_weight_g.dim(2) * grad_weight_g.dim(3)}); + + auto columns_g = columns[g]; + columns_g = transpose(ctx, columns_g, 0, 1); + + auto grad_output_bg = ctx.createDArrayLite( + grad_output.elemType(), + DArrayShape(grad_output.dim(2), grad_output.dim(3), + grad_output.dim(4))); + copy(ctx, grad_output_bg, grad_output[b][g]); + grad_output_bg = + grad_output_bg.view({grad_output_bg.dim(0), + grad_output_bg.dim(1) * grad_output_bg.dim(2)}); + + grad_weight_g = parrots::op::addmm(ctx, grad_weight_g, grad_output_bg, + columns_g, 1, 1); + auto grad_weight_out = grad_weight[g]; + copy(ctx, grad_weight_out, grad_weight_g); + + if (with_bias) { + auto grad_bias_g = ctx.createDArrayLite(grad_bias.elemType(), + DArrayShape(grad_bias.dim(1))); + copy(ctx, grad_bias_g, grad_bias[g]); + grad_bias_g = grad_bias_g.view({grad_bias_g.dim(0), 1}); + + auto grad_output_bg = ctx.createDArrayLite( + grad_output.elemType(), + DArrayShape(grad_output.dim(2), grad_output.dim(3), + grad_output.dim(4))); + copy(ctx, grad_output_bg, grad_output[b][g]); + grad_output_bg = grad_output_bg.view( + {grad_output_bg.dim(0), + grad_output_bg.dim(1) * grad_output_bg.dim(2)}); + + auto ones_g = ctx.createDArrayLite( + ones.elemType(), DArrayShape(ones.dim(0), ones.dim(1))); + copy(ctx, ones_g, ones); + ones_g = ones_g.view({ones_g.dim(0) * ones_g.dim(1), 1}); + + grad_bias_g = + parrots::op::addmm(ctx, grad_bias_g, grad_output_bg, ones_g, 1, 1); + + auto grad_bias_out = grad_bias[g]; + copy(ctx, grad_bias_out, grad_bias_g); + } + } + + columns = columns.view({columns.dim(0) * columns.dim(1), columns.dim(2)}); + grad_weight = grad_weight.view({grad_weight.dim(0) * grad_weight.dim(1), + grad_weight.dim(2), grad_weight.dim(3), + grad_weight.dim(4)}); + if (with_bias) + grad_bias = + grad_bias.view(DArrayShape{grad_bias.dim(0) * grad_bias.dim(1)}); + } + grad_output = grad_output.view({grad_output.dim(0) * grad_output.dim(1), + grad_output.dim(2), grad_output.dim(3), + grad_output.dim(4)}); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/nms.cpp b/mmcv/mmcv/ops/csrc/parrots/nms.cpp new file mode 100644 index 0000000..158eb9a --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/nms.cpp @@ -0,0 +1,248 @@ +#include "parrots_cpp_helper.hpp" +#define DIVUP(x, y) (((x) + (y)-1) / (y)) +int const threadsPerBlock = sizeof(unsigned long long) * 8; + +DArrayLite NMSCUDAKernelLauncher(const DArrayLite boxes_sorted, + const DArrayLite order, const DArrayLite areas, + float iou_threshold, int offset, + CudaContext& ctx, cudaStream_t stream); + +void nms_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + float iou_threshold; + int offset; + SSAttrs(attr) + .get("iou_threshold", iou_threshold) + .get("offset", offset) + .done(); + + const auto& boxes_sorted = ins[0]; + const auto& order = ins[1]; + const auto& areas = ins[2]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + outs[0] = NMSCUDAKernelLauncher(boxes_sorted, order, areas, iou_threshold, + offset, ctx, stream); +} + +void nms_cpu(HostContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + float iou_threshold; + int offset; + SSAttrs(attr) + .get("iou_threshold", iou_threshold) + .get("offset", offset) + .done(); + + const auto& boxes = ins[0]; + const auto& order = ins[1]; + const auto& areas = ins[2]; + + size_t nboxes = boxes.shape().dim(0); + size_t boxes_dim = boxes.shape().dim(1); + + auto select = ctx.createDArrayLite(DArraySpec::array(Prim::Int64, nboxes), + getHostProxy()); + select.setZeros(syncStream()); + + if (boxes.size() == 0) { + outs[0] = select; + return; + } + + fill(ctx, select, *toScalar(1)); + + auto select_ptr = select.ptr(); + auto boxes_ptr = boxes.ptr(); + auto order_ptr = order.ptr(); + auto areas_ptr = areas.ptr(); + + for (int64_t _i = 0; _i < nboxes; _i++) { + if (select_ptr[_i] == 0) continue; + auto i = order_ptr[_i]; + auto ix1 = boxes_ptr[i * boxes_dim]; + auto iy1 = boxes_ptr[i * boxes_dim + 1]; + auto ix2 = boxes_ptr[i * boxes_dim + 2]; + auto iy2 = boxes_ptr[i * boxes_dim + 3]; + auto iarea = areas_ptr[i]; + for (int64_t _j = _i + 1; _j < nboxes; _j++) { + if (select_ptr[_j] == 0) continue; + auto j = order_ptr[_j]; + auto xx1 = fmaxf(ix1, boxes_ptr[j * boxes_dim]); + auto yy1 = fmaxf(iy1, boxes_ptr[j * boxes_dim + 1]); + auto xx2 = fminf(ix2, boxes_ptr[j * boxes_dim + 2]); + auto yy2 = fminf(iy2, boxes_ptr[j * boxes_dim + 3]); + + auto w = fmaxf(0.0, xx2 - xx1 + offset); + auto h = fmaxf(0.0, yy2 - yy1 + offset); + auto inter = w * h; + auto ovr = inter / (iarea + areas_ptr[j] - inter); + if (ovr >= iou_threshold) select_ptr[_j] = 0; + } + } + outs[0] = select; +} + +void softnms_cpu(HostContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + float iou_threshold; + float sigma; + float min_score; + int method; + int offset; + SSAttrs(attr) + .get("iou_threshold", iou_threshold) + .get("sigma", sigma) + .get("min_score", min_score) + .get("method", method) + .get("offset", offset) + .done(); + + const auto& boxes = ins[0]; + const auto& scores = ins[1]; + const auto& areas = ins[2]; + + size_t nboxes = boxes.shape().dim(0); + size_t boxes_dim = boxes.shape().dim(1); + auto boxes_ptr = boxes.ptr(); + auto scores_ptr = scores.ptr(); + auto areas_ptr = areas.ptr(); + + auto inputs = ctx.createDArrayLite( + DArraySpec::array(Prim::Float32, DArrayShape(nboxes, 6))); + auto inputs_ptr = inputs.ptr(); + auto dets = ctx.createDArrayLite( + DArraySpec::array(Prim::Float32, DArrayShape(nboxes, 5))); + auto de = dets.ptr(); + for (size_t i = 0; i < nboxes; i++) { + inputs_ptr[i * 6 + 0] = boxes_ptr[i * boxes_dim + 0]; + inputs_ptr[i * 6 + 1] = boxes_ptr[i * boxes_dim + 1]; + inputs_ptr[i * 6 + 2] = boxes_ptr[i * boxes_dim + 2]; + inputs_ptr[i * 6 + 3] = boxes_ptr[i * boxes_dim + 3]; + inputs_ptr[i * 6 + 4] = scores_ptr[i]; + inputs_ptr[i * 6 + 5] = areas_ptr[i]; + } + + size_t pos = 0; + auto inds_t = ctx.createDArrayLite(DArraySpec::array(Prim::Int64, nboxes)); + arange(ctx, *toScalar(0), *toScalar(nboxes), *toScalar(1), inds_t); + auto inds = inds_t.ptr(); + auto num_out = ctx.createDArrayLite(DArraySpec::scalar(Prim::Int64)); + + for (size_t i = 0; i < nboxes; i++) { + auto max_score = inputs_ptr[i * 6 + 4]; + auto max_pos = i; + + pos = i + 1; + // get max box + while (pos < nboxes) { + if (max_score < inputs_ptr[pos * 6 + 4]) { + max_score = inputs_ptr[pos * 6 + 4]; + max_pos = pos; + } + pos = pos + 1; + } + // swap + auto ix1 = de[i * 5 + 0] = inputs_ptr[max_pos * 6 + 0]; + auto iy1 = de[i * 5 + 1] = inputs_ptr[max_pos * 6 + 1]; + auto ix2 = de[i * 5 + 2] = inputs_ptr[max_pos * 6 + 2]; + auto iy2 = de[i * 5 + 3] = inputs_ptr[max_pos * 6 + 3]; + auto iscore = de[i * 5 + 4] = inputs_ptr[max_pos * 6 + 4]; + auto iarea = inputs_ptr[max_pos * 6 + 5]; + auto iind = inds[max_pos]; + inputs_ptr[max_pos * 6 + 0] = inputs_ptr[i * 6 + 0]; + inputs_ptr[max_pos * 6 + 1] = inputs_ptr[i * 6 + 1]; + inputs_ptr[max_pos * 6 + 2] = inputs_ptr[i * 6 + 2]; + inputs_ptr[max_pos * 6 + 3] = inputs_ptr[i * 6 + 3]; + inputs_ptr[max_pos * 6 + 4] = inputs_ptr[i * 6 + 4]; + inputs_ptr[max_pos * 6 + 5] = inputs_ptr[i * 6 + 5]; + inds[max_pos] = inds[i]; + inputs_ptr[i * 6 + 0] = ix1; + inputs_ptr[i * 6 + 1] = iy1; + inputs_ptr[i * 6 + 2] = ix2; + inputs_ptr[i * 6 + 3] = iy2; + inputs_ptr[i * 6 + 4] = iscore; + inputs_ptr[i * 6 + 5] = iarea; + inds[i] = iind; + + pos = i + 1; + while (pos < nboxes) { + auto xx1 = fmaxf(ix1, inputs_ptr[pos * 6 + 0]); + auto yy1 = fmaxf(iy1, inputs_ptr[pos * 6 + 1]); + auto xx2 = fminf(ix2, inputs_ptr[pos * 6 + 2]); + auto yy2 = fminf(iy2, inputs_ptr[pos * 6 + 3]); + + auto w = fmaxf(0.0, xx2 - xx1 + offset); + auto h = fmaxf(0.0, yy2 - yy1 + offset); + auto inter = w * h; + auto ovr = inter / (iarea + inputs_ptr[pos * 6 + 5] - inter); + + float weight = 1.; + if (method == 0) { + if (ovr >= iou_threshold) weight = 0; + } else if (method == 1) { + if (ovr >= iou_threshold) weight = 1 - ovr; + } else if (method == 2) { + weight = exp(-(ovr * ovr) / sigma); + } + inputs_ptr[pos * 6 + 4] *= weight; + // if box score falls below threshold, discard the box by + // swapping with last box update N + if (inputs_ptr[pos * 6 + 4] < min_score) { + inputs_ptr[pos * 6 + 0] = inputs_ptr[(nboxes - 1) * 6 + 0]; + inputs_ptr[pos * 6 + 1] = inputs_ptr[(nboxes - 1) * 6 + 1]; + inputs_ptr[pos * 6 + 2] = inputs_ptr[(nboxes - 1) * 6 + 2]; + inputs_ptr[pos * 6 + 3] = inputs_ptr[(nboxes - 1) * 6 + 3]; + inputs_ptr[pos * 6 + 4] = inputs_ptr[(nboxes - 1) * 6 + 4]; + inputs_ptr[pos * 6 + 5] = inputs_ptr[(nboxes - 1) * 6 + 5]; + inds[pos] = inds[nboxes - 1]; + nboxes = nboxes - 1; + pos = pos - 1; + } + pos = pos + 1; + } + } + setScalar(num_out, int64_t{nboxes}); + outs[0] = dets; + outs[1] = inds_t; + outs[2] = num_out; +} + +void nms_match_cpu(HostContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + float iou_threshold; + SSAttrs(attr).get("iou_threshold", iou_threshold).done(); +} + +PARROTS_EXTENSION_REGISTER(nms) + .attr("iou_threshold") + .attr("offset") + .input(3) + .output(1) + .apply(nms_cpu) +#ifdef PARROTS_USE_CUDA + .apply(nms_cuda) +#endif + .done(); + +PARROTS_EXTENSION_REGISTER(softnms) + .attr("iou_threshold") + .attr("sigma") + .attr("min_score") + .attr("method") + .attr("offset") + .input(3) + .output(3) + .apply(softnms_cpu) + .done(); + +PARROTS_EXTENSION_REGISTER(nms_match) + .attr("iou_threshold") + .input(1) + .output(1) + .apply(nms_match_cpu) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/nms_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/nms_cuda.cu new file mode 100644 index 0000000..840e792 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/nms_cuda.cu @@ -0,0 +1,55 @@ +#include "nms_cuda_kernel.cuh" +#include "parrots_cuda_helper.hpp" + +DArrayLite NMSCUDAKernelLauncher(const DArrayLite boxes_sorted, + const DArrayLite order, const DArrayLite areas, + float iou_threshold, int offset, + CudaContext& ctx, cudaStream_t stream) { + size_t boxes_num = boxes_sorted.dim(0); + + if (boxes_sorted.size() == 0) { + auto select = ctx.createDArrayLite(DArraySpec::array(Prim::Int64, 0)); + return select; + } + + const size_t col_blocks = DIVUP(boxes_num, threadsPerBlock); + auto mask = ctx.createDArrayLite( + DArraySpec::array(Prim::Int64, DArrayShape(boxes_num, col_blocks))); + dim3 blocks(col_blocks, col_blocks); + dim3 threads(threadsPerBlock); + PARROTS_CUDA_CHECK(cudaGetLastError()); + nms_cuda<<>>( + boxes_num, iou_threshold, offset, boxes_sorted.ptr(), + (unsigned long long*)mask.ptr()); + PARROTS_CUDA_CHECK(cudaGetLastError()); + + auto mask_cpu = ctx.createDArrayLite(mask, getHostProxy()); + auto mask_host = mask_cpu.ptr(); + + auto remv = ctx.createDArrayLite(DArraySpec::array(Prim::Int64, col_blocks), + getHostProxy()); + remv.setZeros(syncStream()); + auto remv_ptr = remv.ptr(); + + auto keep_t = ctx.createDArrayLite(DArraySpec::array(Prim::Uint8, boxes_num), + getHostProxy()); + keep_t.setZeros(syncStream()); + auto keep = keep_t.ptr(); + + for (int i = 0; i < boxes_num; i++) { + int nblock = i / threadsPerBlock; + int inblock = i % threadsPerBlock; + + if (!(remv_ptr[nblock] & (1ULL << inblock))) { + keep[i] = 1; + int64_t* p = mask_host + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) { + remv_ptr[j] |= p[j]; + } + } + } + + auto keep_cuda = ctx.createDArrayLite(keep_t, ctx.getProxy()); + PARROTS_CUDA_CHECK(cudaGetLastError()); + return keep_cuda; +} diff --git a/mmcv/mmcv/ops/csrc/parrots/parrots_cpp_helper.cpp b/mmcv/mmcv/ops/csrc/parrots/parrots_cpp_helper.cpp new file mode 100644 index 0000000..8349104 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/parrots_cpp_helper.cpp @@ -0,0 +1,2 @@ +#include "parrots_cpp_helper.hpp" +using namespace parrots; diff --git a/mmcv/mmcv/ops/csrc/parrots/parrots_cuda_helper.cu b/mmcv/mmcv/ops/csrc/parrots/parrots_cuda_helper.cu new file mode 100644 index 0000000..cef5685 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/parrots_cuda_helper.cu @@ -0,0 +1,3 @@ +#include "parrots_cuda_helper.hpp" + +using namespace parrots; diff --git a/mmcv/mmcv/ops/csrc/parrots/psamask.cpp b/mmcv/mmcv/ops/csrc/parrots/psamask.cpp new file mode 100644 index 0000000..9cfcb23 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/psamask.cpp @@ -0,0 +1,304 @@ +#include "parrots_cpp_helper.hpp" + +#ifndef min +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +#ifndef max +#define max(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +void psamask_collect_forward(const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask, const float *mask_data, + float *buffer_data) { + for (int n = 0; n < num_; n++) { + for (int h = 0; h < h_feature; h++) { + for (int w = 0; w < w_feature; w++) { + // effective mask region : [hstart, hend) x [wstart, wend) with + // mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with + // feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + buffer_data[(n * h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)) * + h_feature * w_feature + + h * w_feature + w] = + mask_data[((n * h_mask * w_mask + hidx * w_mask + widx) * + h_feature + + h) * + w_feature + + w]; + } + } + } + } + } +} + +void psamask_distribute_forward(const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask, const float *mask_data, + float *buffer_data) { + for (int n = 0; n < num_; n++) { + for (int h = 0; h < h_feature; h++) { + for (int w = 0; w < w_feature; w++) { + // effective mask region : [hstart, hend) x [wstart, wend) with + // mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with + // feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + buffer_data[(n * h_feature * w_feature + h * w_feature + w) * + h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)] = + mask_data[((n * h_mask * w_mask + hidx * w_mask + widx) * + h_feature + + h) * + w_feature + + w]; + } + } + } + } + } +} + +void psamask_collect_backward(const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask, const float *buffer_diff, + float *mask_diff) { + for (int n = 0; n < num_; n++) { + for (int h = 0; h < h_feature; h++) { + for (int w = 0; w < w_feature; w++) { + // effective mask region : [hstart, hend) x [wstart, wend) with + // mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with + // feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * + h_feature + + h) * + w_feature + + w] = buffer_diff[(n * h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)) * + h_feature * w_feature + + h * w_feature + w]; + } + } + } + } + } +} + +void psamask_distribute_backward(const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask, + const float *buffer_diff, float *mask_diff) { + for (int n = 0; n < num_; n++) { + for (int h = 0; h < h_feature; h++) { + for (int w = 0; w < w_feature; w++) { + // effective mask region : [hstart, hend) x [wstart, wend) with + // mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with + // feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * + h_feature + + h) * + w_feature + + w] = + buffer_diff[(n * h_feature * w_feature + h * w_feature + w) * + h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)]; + } + } + } + } + } +} + +void psamask_forward_cpu(HostContext &ctx, const SSElement &attr, + const OperatorBase::in_list_t &ins, + OperatorBase::out_list_t &outs) { + int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask; + SSAttrs(attr) + .get("psa_type", psa_type) + .get("num_", num_) + .get("h_feature", h_feature) + .get("w_feature", w_feature) + .get("h_mask", h_mask) + .get("w_mask", w_mask) + .get("half_h_mask", half_h_mask) + .get("half_w_mask", half_w_mask) + .done(); + const auto &input = ins[0]; + auto &output = outs[0]; + + auto input_ptr = input.ptr(); + auto output_ptr = output.ptr(); + + if (psa_type == 0) + psamask_collect_forward(num_, h_feature, w_feature, h_mask, w_mask, + half_h_mask, half_w_mask, input_ptr, output_ptr); + else + psamask_distribute_forward(num_, h_feature, w_feature, h_mask, w_mask, + half_h_mask, half_w_mask, input_ptr, output_ptr); +} + +void psamask_backward_cpu(HostContext &ctx, const SSElement &attr, + const OperatorBase::in_list_t &ins, + OperatorBase::out_list_t &outs) { + int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask; + SSAttrs(attr) + .get("psa_type", psa_type) + .get("num_", num_) + .get("h_feature", h_feature) + .get("w_feature", w_feature) + .get("h_mask", h_mask) + .get("w_mask", w_mask) + .get("half_h_mask", half_h_mask) + .get("half_w_mask", half_w_mask) + .done(); + + const auto &input = ins[0]; + auto &output = outs[0]; + + auto input_ptr = input.ptr(); + auto output_ptr = output.ptr(); + + if (psa_type == 0) + psamask_collect_backward(num_, h_feature, w_feature, h_mask, w_mask, + half_h_mask, half_w_mask, input_ptr, output_ptr); + else + psamask_distribute_backward(num_, h_feature, w_feature, h_mask, w_mask, + half_h_mask, half_w_mask, input_ptr, + output_ptr); +} + +void PSAMaskForwardCUDAKernelLauncher(const int psa_type, + const DArrayLite input, DArrayLite output, + const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask, CudaContext &ctx); + +void PSAMaskBackwardCUDAKernelLauncher(const int psa_type, + const DArrayLite grad_output, + DArrayLite grad_input, const int num_, + const int h_feature, const int w_feature, + const int h_mask, const int w_mask, + const int half_h_mask, + const int half_w_mask, CudaContext &ctx); + +void psamask_forward_cuda(CudaContext &ctx, const SSElement &attr, + const OperatorBase::in_list_t &ins, + OperatorBase::out_list_t &outs) { + int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask; + SSAttrs(attr) + .get("psa_type", psa_type) + .get("num_", num_) + .get("h_feature", h_feature) + .get("w_feature", w_feature) + .get("h_mask", h_mask) + .get("w_mask", w_mask) + .get("half_h_mask", half_h_mask) + .get("half_w_mask", half_w_mask) + .done(); + const auto &input = ins[0]; + auto &output = outs[0]; + PSAMaskForwardCUDAKernelLauncher(psa_type, input, output, num_, h_feature, + w_feature, h_mask, w_mask, half_h_mask, + half_w_mask, ctx); +} + +void psamask_backward_cuda(CudaContext &ctx, const SSElement &attr, + const OperatorBase::in_list_t &ins, + OperatorBase::out_list_t &outs) { + int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask; + SSAttrs(attr) + .get("psa_type", psa_type) + .get("num_", num_) + .get("h_feature", h_feature) + .get("w_feature", w_feature) + .get("h_mask", h_mask) + .get("w_mask", w_mask) + .get("half_h_mask", half_h_mask) + .get("half_w_mask", half_w_mask) + .done(); + + const auto &input = ins[0]; + auto &output = outs[0]; + PSAMaskBackwardCUDAKernelLauncher(psa_type, input, output, num_, h_feature, + w_feature, h_mask, w_mask, half_h_mask, + half_w_mask, ctx); +} + +PARROTS_EXTENSION_REGISTER(psamask_forward) + .attr("psa_type") + .attr("num_") + .attr("h_feature") + .attr("w_feature") + .attr("h_mask") + .attr("w_mask") + .attr("half_h_mask") + .attr("half_w_mask") + .input(1) + .output(1) + .apply(psamask_forward_cpu) +#ifdef PARROTS_USE_CUDA + .apply(psamask_forward_cuda) +#endif + .done(); + +PARROTS_EXTENSION_REGISTER(psamask_backward) + .attr("psa_type") + .attr("num_") + .attr("h_feature") + .attr("w_feature") + .attr("h_mask") + .attr("w_mask") + .attr("half_h_mask") + .attr("half_w_mask") + .input(1) + .output(1) + .apply(psamask_backward_cpu) +#ifdef PARROTS_USE_CUDA + .apply(psamask_backward_cuda) +#endif + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/psamask_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/psamask_cuda.cu new file mode 100644 index 0000000..e82e01d --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/psamask_cuda.cu @@ -0,0 +1,48 @@ +// Modified from +// https://github.com/hszhao/semseg/blob/master/lib/psa/src + +#include "parrots_cuda_helper.hpp" +#include "psamask_cuda_kernel.cuh" + +void PSAMaskForwardCUDAKernelLauncher(const int psa_type, + const DArrayLite input, DArrayLite output, + const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask, CudaContext& ctx) { + int nthreads = num_ * h_feature * w_feature; + cudaStream_t stream = getStreamNative(ctx.getStream()); + if (psa_type == 0) + PARROTS_DISPATCH_FLOATING_TYPES(input.elemType().prim(), [&] { + psamask_collect_forward_cuda<<>>( + nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask, input.ptr(), output.ptr()); + }); + else + PARROTS_DISPATCH_FLOATING_TYPES(input.elemType().prim(), [&] { + psamask_distribute_forward_cuda<<>>( + nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask, input.ptr(), output.ptr()); + }); +} + +void PSAMaskBackwardCUDAKernelLauncher( + const int psa_type, const DArrayLite grad_output, DArrayLite grad_input, + const int num_, const int h_feature, const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, const int half_w_mask, + CudaContext& ctx) { + int nthreads = num_ * h_feature * w_feature; + cudaStream_t stream = getStreamNative(ctx.getStream()); + if (psa_type == 0) + PARROTS_DISPATCH_FLOATING_TYPES(grad_input.elemType().prim(), [&] { + psamask_collect_backward_cuda<<>>( + nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask, grad_output.ptr(), grad_input.ptr()); + }); + else + PARROTS_DISPATCH_FLOATING_TYPES(grad_input.elemType().prim(), [&] { + psamask_distribute_backward_cuda<<>>( + nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask, grad_output.ptr(), grad_input.ptr()); + }); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/roi_align.cpp b/mmcv/mmcv/ops/csrc/parrots/roi_align.cpp new file mode 100644 index 0000000..47149ca --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/roi_align.cpp @@ -0,0 +1,100 @@ +// Copyright (c) 2018, SenseTime. +#include "parrots_cpp_helper.hpp" + +void ROIAlignForwardCUDAKernelLauncher(const DArrayLite input, + const DArrayLite rois, DArrayLite output, + DArrayLite argmax_y, DArrayLite argmax_x, + int aligned_height, int aligned_width, + float spatial_scale, int sampling_ratio, + int pool_mode, bool aligned, + cudaStream_t stream); + +void ROIAlignBackwardCUDAKernelLauncher( + const DArrayLite grad_output, const DArrayLite rois, + const DArrayLite argmax_y, const DArrayLite argmax_x, DArrayLite grad_input, + int aligned_height, int aligned_width, float spatial_scale, + int sampling_ratio, int pool_mode, bool aligned, cudaStream_t stream); + +void roi_align_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int aligned_height; + int aligned_width; + float spatial_scale; + int sampling_ratio; + int pool_mode; + bool aligned; + SSAttrs(attr) + .get("aligned_height", aligned_height) + .get("aligned_width", aligned_width) + .get("spatial_scale", spatial_scale) + .get("sampling_ratio", sampling_ratio) + .get("pool_mode", pool_mode) + .get("aligned", aligned) + .done(); + + const auto& input = ins[0]; + const auto& rois = ins[1]; + auto& output = outs[0]; + auto& argmax_y = outs[1]; + auto& argmax_x = outs[2]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + ROIAlignForwardCUDAKernelLauncher( + input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, + spatial_scale, sampling_ratio, pool_mode, aligned, stream); +} + +void roi_align_backward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int aligned_height; + int aligned_width; + float spatial_scale; + int sampling_ratio; + int pool_mode; + bool aligned; + SSAttrs(attr) + .get("aligned_height", aligned_height) + .get("aligned_width", aligned_width) + .get("spatial_scale", spatial_scale) + .get("sampling_ratio", sampling_ratio) + .get("pool_mode", pool_mode) + .get("aligned", aligned) + .done(); + + const auto& grad_output = ins[0]; + const auto& rois = ins[1]; + const auto& argmax_y = ins[2]; + const auto& argmax_x = ins[3]; + auto& grad_input = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + ROIAlignBackwardCUDAKernelLauncher( + grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height, + aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned, stream); +} + +PARROTS_EXTENSION_REGISTER(roi_align_forward) + .attr("aligned_height") + .attr("aligned_width") + .attr("spatial_scale") + .attr("sampling_ratio") + .attr("pool_mode") + .attr("aligned") + .input(2) + .output(3) + .apply(roi_align_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(roi_align_backward) + .attr("aligned_height") + .attr("aligned_width") + .attr("spatial_scale") + .attr("sampling_ratio") + .attr("pool_mode") + .attr("aligned") + .input(4) + .output(1) + .apply(roi_align_backward_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/roi_align_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/roi_align_cuda.cu new file mode 100644 index 0000000..2c39831 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/roi_align_cuda.cu @@ -0,0 +1,52 @@ +#include "parrots_cuda_helper.hpp" +#include "roi_align_cuda_kernel.cuh" + +void ROIAlignForwardCUDAKernelLauncher(const DArrayLite input, + const DArrayLite rois, DArrayLite output, + DArrayLite argmax_y, DArrayLite argmax_x, + int aligned_height, int aligned_width, + float spatial_scale, int sampling_ratio, + int pool_mode, bool aligned, + cudaStream_t stream) { + int output_size = output.size(); + int channels = input.dim(1); + int height = input.dim(2); + int width = input.dim(3); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + input.elemType().prim(), ([&] { + roi_align_forward_cuda_kernel + <<>>( + output_size, input.ptr(), rois.ptr(), + output.ptr(), argmax_y.ptr(), + argmax_x.ptr(), aligned_height, aligned_width, + spatial_scale, sampling_ratio, pool_mode, aligned, channels, + height, width); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void ROIAlignBackwardCUDAKernelLauncher( + const DArrayLite grad_output, const DArrayLite rois, + const DArrayLite argmax_y, const DArrayLite argmax_x, DArrayLite grad_input, + int aligned_height, int aligned_width, float spatial_scale, + int sampling_ratio, int pool_mode, bool aligned, cudaStream_t stream) { + int output_size = grad_output.size(); + int channels = grad_input.dim(1); + int height = grad_input.dim(2); + int width = grad_input.dim(3); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_output.elemType().prim(), ([&] { + roi_align_backward_cuda_kernel + <<>>( + output_size, grad_output.ptr(), rois.ptr(), + argmax_y.ptr(), argmax_x.ptr(), + grad_input.ptr(), aligned_height, aligned_width, + spatial_scale, sampling_ratio, pool_mode, aligned, channels, + height, width); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/roi_pool.cpp b/mmcv/mmcv/ops/csrc/parrots/roi_pool.cpp new file mode 100644 index 0000000..0f9d754 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/roi_pool.cpp @@ -0,0 +1,77 @@ +#include "parrots_cpp_helper.hpp" + +void ROIPoolForwardCUDAKernelLauncher(const DArrayLite input, + const DArrayLite rois, DArrayLite output, + DArrayLite argmax, int pooled_height, + int pooled_width, float spatial_scale, + cudaStream_t stream); + +void ROIPoolBackwardCUDAKernelLauncher(const DArrayLite grad_output, + const DArrayLite rois, + const DArrayLite argmax, + DArrayLite grad_input, int pooled_height, + int pooled_width, float spatial_scale, + cudaStream_t stream); + +void roi_pool_forward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int pooled_height; + int pooled_width; + float spatial_scale; + SSAttrs(attr) + .get("pooled_height", pooled_height) + .get("pooled_width", pooled_width) + .get("spatial_scale", spatial_scale) + .done(); + + const auto& input = ins[0]; + const auto& rois = ins[1]; + auto& output = outs[0]; + auto& argmax = outs[1]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + ROIPoolForwardCUDAKernelLauncher(input, rois, output, argmax, pooled_height, + pooled_width, spatial_scale, stream); +} + +void roi_pool_backward_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + int pooled_height; + int pooled_width; + float spatial_scale; + SSAttrs(attr) + .get("pooled_height", pooled_height) + .get("pooled_width", pooled_width) + .get("spatial_scale", spatial_scale) + .done(); + + const auto& grad_output = ins[0]; + const auto& rois = ins[1]; + const auto& argmax = ins[2]; + auto& grad_input = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + ROIPoolBackwardCUDAKernelLauncher(grad_output, rois, argmax, grad_input, + pooled_height, pooled_width, spatial_scale, + stream); +} + +PARROTS_EXTENSION_REGISTER(roi_pool_forward) + .attr("pooled_height") + .attr("pooled_width") + .attr("spatial_scale") + .input(2) + .output(2) + .apply(roi_pool_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(roi_pool_backward) + .attr("pooled_height") + .attr("pooled_width") + .attr("spatial_scale") + .input(3) + .output(1) + .apply(roi_pool_backward_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/roi_pool_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/roi_pool_cuda.cu new file mode 100644 index 0000000..a94ffce --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/roi_pool_cuda.cu @@ -0,0 +1,45 @@ +#include "parrots_cuda_helper.hpp" +#include "roi_pool_cuda_kernel.cuh" + +void ROIPoolForwardCUDAKernelLauncher(const DArrayLite input, + const DArrayLite rois, DArrayLite output, + DArrayLite argmax, int pooled_height, + int pooled_width, float spatial_scale, + cudaStream_t stream) { + int output_size = output.size(); + int channels = input.dim(1); + int height = input.dim(2); + int width = input.dim(3); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(input.elemType().prim(), [&] { + roi_pool_forward_cuda_kernel + <<>>( + output_size, input.ptr(), rois.ptr(), + output.ptr(), argmax.ptr(), pooled_height, + pooled_width, spatial_scale, channels, height, width); + }); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void ROIPoolBackwardCUDAKernelLauncher(const DArrayLite grad_output, + const DArrayLite rois, + const DArrayLite argmax, + DArrayLite grad_input, int pooled_height, + int pooled_width, float spatial_scale, + cudaStream_t stream) { + int output_size = grad_output.size(); + int channels = grad_output.dim(1); + int height = grad_output.dim(2); + int width = grad_output.dim(3); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(grad_output.elemType().prim(), [&] { + roi_pool_backward_cuda_kernel + <<>>( + output_size, grad_output.ptr(), rois.ptr(), + argmax.ptr(), grad_input.ptr(), pooled_height, + pooled_width, channels, height, width); + }); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/sync_bn.cpp b/mmcv/mmcv/ops/csrc/parrots/sync_bn.cpp new file mode 100644 index 0000000..eee2a43 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/sync_bn.cpp @@ -0,0 +1,139 @@ +#include "parrots_cpp_helper.hpp" + +void SyncBNForwardMeanCUDAKernelLauncher(const DArrayLite input, + DArrayLite mean, cudaStream_t stream); + +void SyncBNForwardVarCUDAKernelLauncher(const DArrayLite input, + const DArrayLite mean, DArrayLite var, + cudaStream_t stream); + +void SyncBNForwardOutputCUDAKernelLauncher( + const DArrayLite input, const DArrayLite mean, const DArrayLite var, + DArrayLite running_mean, DArrayLite running_var, const DArrayLite weight, + const DArrayLite bias, DArrayLite norm, DArrayLite std, DArrayLite output, + const float eps, const float momentum, size_t group_size, + cudaStream_t stream); + +void SyncBNBackwardParamCUDAKernelLauncher(const DArrayLite grad_output, + const DArrayLite norm, + DArrayLite weight_diff, + DArrayLite bias_diff, + cudaStream_t stream); + +void SyncBNBackwardDataCUDAKernelLauncher( + const DArrayLite grad_output, const DArrayLite weight, + const DArrayLite weight_diff, const DArrayLite bias_diff, + const DArrayLite norm, const DArrayLite std, DArrayLite grad_input, + cudaStream_t stream); + +void sync_bn_forward_mean_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + const auto& input = ins[0]; + auto& mean = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + SyncBNForwardMeanCUDAKernelLauncher(input, mean, stream); +} + +void sync_bn_forward_var_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + const auto& input = ins[0]; + const auto& mean = ins[1]; + auto& var = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + SyncBNForwardVarCUDAKernelLauncher(input, mean, var, stream); +} + +void sync_bn_forward_output_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + size_t group_size; + float eps, momentum; + SSAttrs(attr) + .get("eps", eps) + .get("momentum", momentum) + .get("group_size", group_size) + .done(); + + const auto& input = ins[0]; + const auto& mean = ins[1]; + const auto& var = ins[2]; + const auto& weight = ins[3]; + const auto& bias = ins[4]; + auto& running_mean = outs[0]; + auto& running_var = outs[1]; + auto& norm = outs[2]; + auto& std = outs[3]; + auto& output = outs[4]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + SyncBNForwardOutputCUDAKernelLauncher( + input, mean, var, running_mean, running_var, weight, bias, norm, std, + output, eps, momentum, group_size, stream); +} + +void sync_bn_backward_param_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + const auto& grad_output = ins[0]; + const auto& norm = ins[1]; + auto& grad_weight = outs[0]; + auto& grad_bias = outs[1]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + SyncBNBackwardParamCUDAKernelLauncher(grad_output, norm, grad_weight, + grad_bias, stream); +} + +void sync_bn_backward_data_cuda(CudaContext& ctx, const SSElement& attr, + const OperatorBase::in_list_t& ins, + OperatorBase::out_list_t& outs) { + const auto& grad_output = ins[0]; + const auto& weight = ins[1]; + const auto& grad_weight = ins[2]; + const auto& grad_bias = ins[3]; + const auto& norm = ins[4]; + const auto& std = ins[5]; + auto& grad_input = outs[0]; + + cudaStream_t stream = getStreamNative(ctx.getStream()); + SyncBNBackwardDataCUDAKernelLauncher(grad_output, weight, grad_weight, + grad_bias, norm, std, grad_input, + stream); +} + +PARROTS_EXTENSION_REGISTER(sync_bn_forward_mean) + .input(1) + .output(1) + .apply(sync_bn_forward_mean_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(sync_bn_forward_var) + .input(2) + .output(1) + .apply(sync_bn_forward_var_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(sync_bn_forward_output) + .attr("eps") + .attr("momentum") + .attr("group_size") + .input(5) + .output(5) + .apply(sync_bn_forward_output_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(sync_bn_backward_param) + .input(2) + .output(2) + .apply(sync_bn_backward_param_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(sync_bn_backward_data) + .input(6) + .output(1) + .apply(sync_bn_backward_data_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/sync_bn_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/sync_bn_cuda.cu new file mode 100644 index 0000000..9bc97e4 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/sync_bn_cuda.cu @@ -0,0 +1,104 @@ +#include "parrots_cuda_helper.hpp" +#include "sync_bn_cuda_kernel.cuh" + +void SyncBNForwardMeanCUDAKernelLauncher(const DArrayLite input, + DArrayLite mean, cudaStream_t stream) { + int num = input.dim(0); + int channels = input.dim(1); + int spatial = input.dim(2); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + input.elemType().prim(), ([&] { + sync_bn_forward_mean_cuda_kernel + <<>>(input.ptr(), + mean.ptr(), num, + channels, spatial); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void SyncBNForwardVarCUDAKernelLauncher(const DArrayLite input, + const DArrayLite mean, DArrayLite var, + cudaStream_t stream) { + int num = input.dim(0); + int channels = input.dim(1); + int spatial = input.dim(2); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + input.elemType().prim(), ([&] { + sync_bn_forward_var_cuda_kernel + <<>>( + input.ptr(), mean.ptr(), var.ptr(), num, + channels, spatial); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void SyncBNForwardOutputCUDAKernelLauncher( + const DArrayLite input, const DArrayLite mean, const DArrayLite var, + DArrayLite running_mean, DArrayLite running_var, const DArrayLite weight, + const DArrayLite bias, DArrayLite norm, DArrayLite std, DArrayLite output, + float eps, float momentum, size_t group_size, cudaStream_t stream) { + int num = input.dim(0); + int channels = input.dim(1); + int spatial = input.dim(2); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + input.elemType().prim(), ([&] { + sync_bn_forward_output_cuda_kernel + <<>>( + input.ptr(), mean.ptr(), var.ptr(), + running_mean.ptr(), running_var.ptr(), + weight.ptr(), bias.ptr(), norm.ptr(), + std.ptr(), output.ptr(), num, channels, + spatial, eps, momentum, group_size); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void SyncBNBackwardParamCUDAKernelLauncher(const DArrayLite grad_output, + const DArrayLite norm, + DArrayLite grad_weight, + DArrayLite grad_bias, + cudaStream_t stream) { + int num = grad_output.dim(0); + int channels = grad_output.dim(1); + int spatial = grad_output.dim(2); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_output.elemType().prim(), ([&] { + sync_bn_backward_param_cuda_kernel + <<>>( + grad_output.ptr(), norm.ptr(), + grad_weight.ptr(), grad_bias.ptr(), num, channels, + spatial); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void SyncBNBackwardDataCUDAKernelLauncher( + const DArrayLite grad_output, const DArrayLite weight, + const DArrayLite grad_weight, const DArrayLite grad_bias, + const DArrayLite norm, const DArrayLite std, DArrayLite grad_input, + cudaStream_t stream) { + int output_size = grad_input.size(); + int num = grad_input.dim(0); + int channels = grad_input.dim(1); + int spatial = grad_input.dim(2); + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_input.elemType().prim(), ([&] { + sync_bn_backward_data_cuda_kernel + <<>>( + output_size, grad_output.ptr(), weight.ptr(), + grad_weight.ptr(), grad_bias.ptr(), + norm.ptr(), std.ptr(), grad_input.ptr(), + num, channels, spatial); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/parrots/tin_shift.cpp b/mmcv/mmcv/ops/csrc/parrots/tin_shift.cpp new file mode 100644 index 0000000..17b48af --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/tin_shift.cpp @@ -0,0 +1,42 @@ +#include "parrots_cpp_helper.hpp" + +void TINShiftForwardCUDAKernelLauncher(const DArrayLite input, + const DArrayLite shift, + DArrayLite output, cudaStream_t stream); + +void TINShiftBackwardCUDAKernelLauncher(const DArrayLite grad_output, + const DArrayLite shift, + DArrayLite grad_input, + cudaStream_t stream); + +void tin_shift_forward_cuda(CudaContext &ctx, const SSElement &attr, + const OperatorBase::in_list_t &ins, + OperatorBase::out_list_t &outs) { + const auto &input = ins[0]; + const auto &shift = ins[1]; + auto &output = outs[0]; + cudaStream_t stream = getStreamNative(ctx.getStream()); + TINShiftForwardCUDAKernelLauncher(input, shift, output, stream); +} + +void tin_shift_backward_cuda(CudaContext &ctx, const SSElement &attr, + const OperatorBase::in_list_t &ins, + OperatorBase::out_list_t &outs) { + const auto &grad_output = ins[0]; + const auto &shift = ins[1]; + auto &grad_input = outs[0]; + cudaStream_t stream = getStreamNative(ctx.getStream()); + TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input, stream); +} + +PARROTS_EXTENSION_REGISTER(tin_shift_forward) + .input(2) + .output(1) + .apply(tin_shift_forward_cuda) + .done(); + +PARROTS_EXTENSION_REGISTER(tin_shift_backward) + .input(2) + .output(1) + .apply(tin_shift_backward_cuda) + .done(); diff --git a/mmcv/mmcv/ops/csrc/parrots/tin_shift_cuda.cu b/mmcv/mmcv/ops/csrc/parrots/tin_shift_cuda.cu new file mode 100644 index 0000000..e5deaec --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots/tin_shift_cuda.cu @@ -0,0 +1,51 @@ +#include "parrots_cuda_helper.hpp" +#include "tin_shift_cuda_kernel.cuh" + +void TINShiftForwardCUDAKernelLauncher(const DArrayLite input, + const DArrayLite shift, + DArrayLite output, cudaStream_t stream) { + int output_size = output.size(); + int batch_size = input.dim(0); + int t_size = input.dim(1); + int channels = input.dim(2); + int hw_size = input.dim(3); + int group_size = shift.dim(1); + int group_channel = channels / group_size; + int num_kernels = batch_size * hw_size * channels; + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + input.elemType().prim(), ([&] { + tin_shift_forward_cuda_kernel + <<>>( + output_size, input.ptr(), shift.ptr(), + output.ptr(), batch_size, channels, t_size, hw_size, + group_size, group_channel); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} + +void TINShiftBackwardCUDAKernelLauncher(const DArrayLite grad_output, + const DArrayLite shift, + DArrayLite grad_input, + cudaStream_t stream) { + int output_size = grad_output.size(); + int batch_size = grad_output.dim(0); + int t_size = grad_output.dim(1); + int channels = grad_output.dim(2); + int hw_size = grad_output.dim(3); + int group_size = shift.dim(1); + int group_channel = channels / group_size; + int num_kernels = batch_size * hw_size * channels; + + PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_output.elemType().prim(), ([&] { + tin_shift_backward_cuda_kernel + <<>>( + output_size, grad_output.ptr(), shift.ptr(), + grad_input.ptr(), batch_size, channels, t_size, + hw_size, group_size, group_channel); + })); + + PARROTS_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/parrots_cpp_helper.hpp b/mmcv/mmcv/ops/csrc/parrots_cpp_helper.hpp new file mode 100644 index 0000000..5ff2e8f --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots_cpp_helper.hpp @@ -0,0 +1,11 @@ +#ifndef PARROTS_CPP_HELPER +#define PARROTS_CPP_HELPER +#include +#include +#include +#include +#include + +using namespace parrots; + +#endif // PARROTS_CPP_HELPER diff --git a/mmcv/mmcv/ops/csrc/parrots_cuda_helper.hpp b/mmcv/mmcv/ops/csrc/parrots_cuda_helper.hpp new file mode 100644 index 0000000..539009c --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots_cuda_helper.hpp @@ -0,0 +1,111 @@ +#ifndef PARROTS_CUDA_HELPER +#define PARROTS_CUDA_HELPER + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common_cuda_helper.hpp" +#include "parrots_cudawarpfunction.cuh" + +using namespace parrots; +using phalf = float16; + +#define __PHALF(x) (x.y) + +#define PARROTS_CUDA_CHECK(exp) \ + do { \ + cudaError_t err = exp; \ + if (err != cudaSuccess) { \ + fprintf(stderr, "cudaCheckError() failed : %s\n", \ + cudaGetErrorString(err)); \ + exit(-1); \ + } \ + } while (0) + +#define PARROTS_PRIVATE_CASE_TYPE(prim_type, type, ...) \ + case prim_type: { \ + using scalar_t = type; \ + return __VA_ARGS__(); \ + } + +#define PARROTS_DISPATCH_FLOATING_TYPES(TYPE, ...) \ + [&] { \ + const auto& the_type = TYPE; \ + switch (the_type) { \ + PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \ + PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__) \ + default: \ + PARROTS_NOTSUPPORTED; \ + } \ + }() + +#define PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, ...) \ + [&] { \ + const auto& the_type = TYPE; \ + switch (the_type) { \ + PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \ + PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__) \ + PARROTS_PRIVATE_CASE_TYPE(Prim::Float16, float16, __VA_ARGS__) \ + default: \ + PARROTS_NOTSUPPORTED; \ + } \ + }() + +/** atomicAdd **/ +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 + +static __inline__ __device__ double atomicAdd(double* address, double val) { + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + if (val == 0.0) return __longlong_as_double(old); + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} + +#endif + +static __inline__ __device__ float16 atomicAdd(float16* address, float16 val) { + unsigned int* aligned = + (unsigned int*)((size_t)address - ((size_t)address & 2)); + unsigned int old = *aligned; + unsigned int assumed; + unsigned short old_as_us; + do { + assumed = old; + old_as_us = + (unsigned short)((size_t)address & 2 ? old >> 16 : old & 0xffff); + +#if __CUDACC_VER_MAJOR__ >= 9 + float16 tmp; + tmp.x = old_as_us; + float16 sum = tmp + val; + unsigned short sum_as_us = sum.x; +// half sum = __float2half_rn(__half2float(__ushort_as_half(old_as_us)) +// + (float)(val)); unsigned short sum_as_us = __half_as_ushort(sum); +#else + unsigned short sum_as_us = + __float2half_rn(__half2float(old_as_us) + (float)(val)); +#endif + + unsigned int sum_as_ui = (size_t)address & 2 + ? (sum_as_us << 16) | (old & 0xffff) + : (old & 0xffff0000) | sum_as_us; + old = atomicCAS(aligned, assumed, sum_as_ui); + } while (assumed != old); + //__half_raw raw = {old_as_us}; + // return float16(raw); + return *reinterpret_cast(&old_as_us); +} +#endif // PARROTS_CUDA_HELPER diff --git a/mmcv/mmcv/ops/csrc/parrots_cudawarpfunction.cuh b/mmcv/mmcv/ops/csrc/parrots_cudawarpfunction.cuh new file mode 100644 index 0000000..7918a57 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/parrots_cudawarpfunction.cuh @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2019, SenseTime. + */ + +#ifndef INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_ +#define INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_ + +#ifndef __CUDACC__ +#error cudawarpfunction.cuh should only be included by .cu files +#endif +#include + +#include + +#ifdef PARROTS_USE_HALF +#include +#endif +#ifdef __CUDA_ARCH__ +#define CUDA_INTRINSIC_FUNC(Expr) Expr +#else +#define CUDA_INTRINSIC_FUNC(Expr) +#endif + +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 + +#ifdef PARROTS_USE_HALF + +#if CUDA_VERSION < 9000 + +__device__ inline float16 __shfl(float16 var, int srcLane, int width) { + CUDA_INTRINSIC_FUNC(return __shfl(var.y, srcLane, width);); +} + +__device__ inline float16 __shfl_up(float16 var, unsigned delta, int width) { + CUDA_INTRINSIC_FUNC(return __shfl_up(var.y, delta, width);); +} + +__device__ inline float16 __shfl_down(float16 var, unsigned delta, int width) { + CUDA_INTRINSIC_FUNC(return __shfl_down(var.y, delta, width);); +} + +__device__ inline float16 __shfl_xor(float16 var, int laneMask, int width) { + CUDA_INTRINSIC_FUNC(return __shfl_xor(var.y, laneMask, width);); +} + +#else // CUDA_VERSION >= 9000 + +__device__ inline float16 __shfl_sync(unsigned mask, float16 var, int srcLane, + int width = warpSize) { + CUDA_INTRINSIC_FUNC(float16 r; r.y = __shfl_sync(mask, var.y, srcLane, width); + return r;); +} + +__device__ inline float16 __shfl_up_sync(unsigned mask, float16 var, + unsigned delta, int width = warpSize) { + CUDA_INTRINSIC_FUNC( + float16 r; r.y = __shfl_up_sync(mask, var.y, delta, width); return r;); +} + +__device__ inline float16 __shfl_down_sync(unsigned mask, float16 var, + unsigned delta, + int width = warpSize) { + CUDA_INTRINSIC_FUNC( + float16 r; r.y = __shfl_down_sync(mask, var.y, delta, width); return r;); +} + +__device__ inline float16 __shfl_xor_sync(unsigned mask, float16 var, + int laneMask, int width) { + CUDA_INTRINSIC_FUNC(float16 r; + r.y = __shfl_xor_sync(mask, var.y, laneMask, width); + return r;); +} + +#endif // CUDA_VERSION < 9000 + +#endif // PARROTS_USE_HALF + +// warp shuffle interface with a dummy mask +#if CUDA_VERSION < 9000 + +template +__device__ inline T __shfl_sync(unsigned mask, T var, int srcLane, + int width = warpSize) { + CUDA_INTRINSIC_FUNC(return __shfl(var, srcLane, width);); +} + +template +__device__ inline T __shfl_up_sync(unsigned mask, T var, unsigned delta, + int width = warpSize) { + CUDA_INTRINSIC_FUNC(return __shfl_up(var, delta, width);); +} + +template +__device__ inline T __shfl_down_sync(unsigned mask, T var, unsigned delta, + int width = warpSize) { + CUDA_INTRINSIC_FUNC(return __shfl_down(var, delta, width);); +} + +template +__device__ inline T __shfl_xor_sync(unsigned mask, T var, int laneMask, + int width = warpSize) { + CUDA_INTRINSIC_FUNC(return __shfl_xor(var, laneMask, width);); +} + +#endif // CUDA_VERSION < 9000 + +#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 + +#endif // INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_ diff --git a/mmcv/mmcv/ops/csrc/psamask_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/psamask_cuda_kernel.cuh new file mode 100644 index 0000000..3e22944 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/psamask_cuda_kernel.cuh @@ -0,0 +1,140 @@ +#ifndef PSAMASK_CUDA_KERNEL_CUH +#define PSAMASK_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +// CUDA: grid stride looping +#ifndef CUDA_KERNEL_LOOP +#define CUDA_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ + i += blockDim.x * gridDim.x) +#endif + +template +__global__ void psamask_collect_forward_cuda( + const int nthreads, const int h_feature, const int w_feature, + const int h_mask, const int w_mask, const int half_h_mask, + const int half_w_mask, const T* mask_data, T* buffer_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + const int w = index % w_feature; + const int h = (index / w_feature) % h_feature; + const int n = index / w_feature / h_feature; + // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + buffer_data[(n * h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)) * + h_feature * w_feature + + h * w_feature + w] = mask_data + [((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) * + w_feature + + w]; + } + } + } +} + +template +__global__ void psamask_distribute_forward_cuda( + const int nthreads, const int h_feature, const int w_feature, + const int h_mask, const int w_mask, const int half_h_mask, + const int half_w_mask, const T* mask_data, T* buffer_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + const int w = index % w_feature; + const int h = (index / w_feature) % h_feature; + const int n = index / w_feature / h_feature; + // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + buffer_data[(n * h_feature * w_feature + h * w_feature + w) * + h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)] = mask_data + [((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) * + w_feature + + w]; + } + } + } +} + +template +__global__ void psamask_collect_backward_cuda( + const int nthreads, const int h_feature, const int w_feature, + const int h_mask, const int w_mask, const int half_h_mask, + const int half_w_mask, const T* buffer_diff, T* mask_diff) { + CUDA_KERNEL_LOOP(index, nthreads) { + const int w = index % w_feature; + const int h = (index / w_feature) % h_feature; + const int n = index / w_feature / h_feature; + // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + + h) * + w_feature + + w] = buffer_diff[(n * h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)) * + h_feature * w_feature + + h * w_feature + w]; + } + } + } +} + +template +__global__ void psamask_distribute_backward_cuda( + const int nthreads, const int h_feature, const int w_feature, + const int h_mask, const int w_mask, const int half_h_mask, + const int half_w_mask, const T* buffer_diff, T* mask_diff) { + CUDA_KERNEL_LOOP(index, nthreads) { + const int w = index % w_feature; + const int h = (index / w_feature) % h_feature; + const int n = index / w_feature / h_feature; + // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + + h) * + w_feature + + w] = + buffer_diff[(n * h_feature * w_feature + h * w_feature + w) * + h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)]; + } + } + } +} + +#endif // PSAMASK_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/pytorch/bbox_overlaps.cpp b/mmcv/mmcv/ops/csrc/pytorch/bbox_overlaps.cpp new file mode 100644 index 0000000..23bf7d4 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/bbox_overlaps.cpp @@ -0,0 +1,29 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2, + Tensor ious, const int mode, + const bool aligned, const int offset); + +void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, + const int mode, const bool aligned, const int offset) { + BBoxOverlapsCUDAKernelLauncher(bboxes1, bboxes2, ious, mode, aligned, offset); +} +#endif + +void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, + const int mode, const bool aligned, const int offset) { + if (bboxes1.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(bboxes1); + CHECK_CUDA_INPUT(bboxes2); + CHECK_CUDA_INPUT(ious); + + bbox_overlaps_cuda(bboxes1, bboxes2, ious, mode, aligned, offset); +#else + AT_ERROR("bbox_overlaps is not compiled with GPU support"); +#endif + } else { + AT_ERROR("bbox_overlaps is not implemented on CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/bbox_overlaps_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/bbox_overlaps_cuda.cu new file mode 100644 index 0000000..d6e26c2 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/bbox_overlaps_cuda.cu @@ -0,0 +1,22 @@ +#include "bbox_overlaps_cuda_kernel.cuh" +#include "pytorch_cuda_helper.hpp" + +void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2, + Tensor ious, const int mode, + const bool aligned, const int offset) { + int output_size = ious.numel(); + int num_bbox1 = bboxes1.size(0); + int num_bbox2 = bboxes2.size(0); + + at::cuda::CUDAGuard device_guard(bboxes1.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + bboxes1.scalar_type(), "bbox_overlaps_cuda_kernel", ([&] { + bbox_overlaps_cuda_kernel + <<>>( + bboxes1.data_ptr(), bboxes2.data_ptr(), + ious.data_ptr(), num_bbox1, num_bbox2, mode, aligned, + offset); + })); + AT_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/carafe.cpp b/mmcv/mmcv/ops/csrc/pytorch/carafe.cpp new file mode 100644 index 0000000..6761928 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/carafe.cpp @@ -0,0 +1,83 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks, + Tensor rfeatures, Tensor routput, + Tensor rmasks, Tensor output, + const int kernel_size, + const int group_size, + const int scale_factor); + +void CARAFEBackwardCUDAKernelLauncher( + const Tensor top_grad, const Tensor rfeatures, const Tensor masks, + Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, + Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, + const int kernel_size, const int group_size, const int scale_factor); + +void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures, + Tensor routput, Tensor rmasks, Tensor output, + int kernel_size, int group_size, int scale_factor) { + CARAFEForwardCUDAKernelLauncher(features, masks, rfeatures, routput, rmasks, + output, kernel_size, group_size, + scale_factor); +} + +void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks, + Tensor rtop_grad, Tensor rbottom_grad_hs, + Tensor rbottom_grad, Tensor rmask_grad, + Tensor bottom_grad, Tensor mask_grad, int kernel_size, + int group_size, int scale_factor) { + CARAFEBackwardCUDAKernelLauncher(top_grad, rfeatures, masks, rtop_grad, + rbottom_grad_hs, rbottom_grad, rmask_grad, + bottom_grad, mask_grad, kernel_size, + group_size, scale_factor); +} +#endif + +void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures, + Tensor routput, Tensor rmasks, Tensor output, + int kernel_size, int group_size, int scale_factor) { + if (features.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(features); + CHECK_CUDA_INPUT(masks); + CHECK_CUDA_INPUT(rfeatures); + CHECK_CUDA_INPUT(routput); + CHECK_CUDA_INPUT(rmasks); + CHECK_CUDA_INPUT(output); + carafe_forward_cuda(features, masks, rfeatures, routput, rmasks, output, + kernel_size, group_size, scale_factor); +#else + AT_ERROR("Carafe is not compiled with GPU support"); +#endif + } else { + AT_ERROR("Carafe is not implemented on CPU"); + } +} + +void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks, + Tensor rtop_grad, Tensor rbottom_grad_hs, + Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, + Tensor mask_grad, int kernel_size, int group_size, + int scale_factor) { + if (top_grad.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(top_grad); + CHECK_CUDA_INPUT(rfeatures); + CHECK_CUDA_INPUT(masks); + CHECK_CUDA_INPUT(rtop_grad); + CHECK_CUDA_INPUT(rbottom_grad_hs); + CHECK_CUDA_INPUT(rbottom_grad); + CHECK_CUDA_INPUT(rmask_grad); + CHECK_CUDA_INPUT(bottom_grad); + CHECK_CUDA_INPUT(mask_grad); + carafe_backward_cuda(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs, + rbottom_grad, rmask_grad, bottom_grad, mask_grad, + kernel_size, group_size, scale_factor); +#else + AT_ERROR("Carafe is not compiled with GPU support"); +#endif + } else { + AT_ERROR("Carafe is not implemented on CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/carafe_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/carafe_cuda.cu new file mode 100644 index 0000000..2f9ac05 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/carafe_cuda.cu @@ -0,0 +1,179 @@ +#include "carafe_cuda_kernel.cuh" +#include "pytorch_cuda_helper.hpp" + +void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks, + Tensor rfeatures, Tensor routput, + Tensor rmasks, Tensor output, + const int kernel_size, + const int group_size, + const int scale_factor) { + const int batch_size = output.size(0); + const int channels = output.size(1); + const int output_height = output.size(2); + const int output_width = output.size(3); + + const int input_height = features.size(2); + const int input_width = features.size(3); + + const int mask_channels = masks.size(1); + + rfeatures.resize_({batch_size, input_height, input_width, channels}); + routput.resize_({batch_size, output_height, output_width, channels}); + rmasks.resize_({batch_size, output_height, output_width, mask_channels}); + + // one warp per pixel + at::cuda::CUDAGuard device_guard(features.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + features.scalar_type(), "NCHW2NHWC_Feature", ([&] { + const scalar_t *bottom_data = features.data_ptr(); + scalar_t *top_data = rfeatures.data_ptr(); + const int dh = divideUP(channels, kTileDim); + const int dw = divideUP(input_height * input_width, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, channels, input_height * input_width, dh, dw, + bottom_data, top_data); + })); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + features.scalar_type(), "NCHW2NHWC_Masks", ([&] { + const scalar_t *bottom_data = masks.data_ptr(); + scalar_t *top_data = rmasks.data_ptr(); + const int dh = divideUP(mask_channels, kTileDim); + const int dw = divideUP(output_height * output_width, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, mask_channels, output_height * output_width, dh, dw, + bottom_data, top_data); + })); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + features.scalar_type(), "CARAFELaucherForward", ([&] { + const int num_kernels = + batch_size * output_height * output_width * THREADS_PER_PIXEL; + const scalar_t *bottom_data = rfeatures.data_ptr(); + const scalar_t *bottom_masks = rmasks.data_ptr(); + scalar_t *top_data = routput.data_ptr(); + + CARAFEForward<<>>( + num_kernels, bottom_data, bottom_masks, kernel_size, group_size, + scale_factor, channels, input_height, input_width, output_height, + output_width, mask_channels, top_data); + })); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + features.scalar_type(), "NHWC2NCHW", ([&] { + const scalar_t *bottom_data = routput.data_ptr(); + scalar_t *top_data = output.data_ptr(); + const int dh = divideUP(output_height * output_width, kTileDim); + const int dw = divideUP(channels, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, output_height * output_width, channels, dh, dw, + bottom_data, top_data); + })); + + AT_CUDA_CHECK(cudaGetLastError()); +} + +void CARAFEBackwardCUDAKernelLauncher( + const Tensor top_grad, const Tensor rfeatures, const Tensor masks, + Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad, + Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad, + const int kernel_size, const int group_size, const int scale_factor) { + const int batch_size = top_grad.size(0); + const int channels = top_grad.size(1); + const int output_height = top_grad.size(2); + const int output_width = top_grad.size(3); + + const int input_height = bottom_grad.size(2); + const int input_width = bottom_grad.size(3); + + const int mask_channels = masks.size(1); + + rtop_grad.resize_({batch_size, output_height, output_width, channels}); + rbottom_grad.resize_({batch_size, input_height, input_width, channels}); + rbottom_grad_hs.resize_({batch_size, output_height, output_width, channels}); + rmask_grad.resize_({batch_size, output_height, output_width, mask_channels}); + + at::cuda::CUDAGuard device_guard(top_grad.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.scalar_type(), "NCHW2NHWC_Top_Grad", ([&] { + const scalar_t *bottom_data = top_grad.data_ptr(); + scalar_t *top_data = rtop_grad.data_ptr(); + const int dh = divideUP(channels, kTileDim); + const int dw = divideUP(output_height * output_width, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, channels, output_height * output_width, dh, dw, + bottom_data, top_data); + })); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.scalar_type(), "CARAFELaucherBackward_Feature", ([&] { + const int num_kernels = + batch_size * output_height * output_width * THREADS_PER_PIXEL; + const scalar_t *top_diff = rtop_grad.data_ptr(); + const scalar_t *bottom_masks = masks.data_ptr(); + scalar_t *bottom_diff = rbottom_grad_hs.data_ptr(); + + CARAFEBackward_Feature + <<>>(num_kernels, top_diff, bottom_masks, kernel_size, + group_size, scale_factor, channels, input_height, + input_width, output_height, output_width, + mask_channels, bottom_diff); + })); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.scalar_type(), "FeatureSum", ([&] { + const int num_kernels = + batch_size * input_height * input_width * THREADS_PER_PIXEL; + const scalar_t *bottom_diff_hs = rbottom_grad_hs.data_ptr(); + scalar_t *bottom_diff = rbottom_grad.data_ptr(); + + FeatureSum + <<>>(num_kernels, bottom_diff_hs, scale_factor, channels, + input_height, input_width, bottom_diff); + })); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.scalar_type(), "NHWC2NCHW_Bottom_Grad", ([&] { + const scalar_t *bottom_data = rbottom_grad.data_ptr(); + scalar_t *top_data = bottom_grad.data_ptr(); + const int dh = divideUP(input_height * input_width, kTileDim); + const int dw = divideUP(channels, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, input_height * input_width, channels, dh, dw, + bottom_data, top_data); + })); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.scalar_type(), "CARAFELaucherBackward_Mask", ([&] { + const int num_kernels = batch_size * output_height * output_width * + mask_channels * WARP_SIZE; + const scalar_t *top_diff = rtop_grad.data_ptr(); + const scalar_t *bottom_data = rfeatures.data_ptr(); + scalar_t *mask_diff = rmask_grad.data_ptr(); + + CARAFEBackward_Mask + <<>>(num_kernels, top_diff, bottom_data, kernel_size, + group_size, scale_factor, channels, input_height, + input_width, output_height, output_width, + mask_channels, mask_diff); + })); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.scalar_type(), "NHWC2NCHW_Mask_Grad", ([&] { + const scalar_t *bottom_data = rmask_grad.data_ptr(); + scalar_t *top_data = mask_grad.data_ptr(); + const int dh = divideUP(output_height * output_width, kTileDim); + const int dw = divideUP(mask_channels, kTileDim); + BatchTranspose2DCUDAKernel + <<>>( + batch_size, output_height * output_width, mask_channels, dh, dw, + bottom_data, top_data); + })); + + AT_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/carafe_naive.cpp b/mmcv/mmcv/ops/csrc/pytorch/carafe_naive.cpp new file mode 100644 index 0000000..bb0aa09 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/carafe_naive.cpp @@ -0,0 +1,68 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features, + const Tensor masks, Tensor output, + const int kernel_size, + const int group_size, + const int scale_factor); + +void CARAFENAIVEBackwardCUDAKernelLauncher( + const Tensor top_grad, const Tensor features, const Tensor masks, + Tensor bottom_grad, Tensor mask_grad, const int kernel_size, + const int group_size, const int scale_factor); + +void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output, + int kernel_size, int group_size, + int scale_factor) { + CARAFENAIVEForwardCUDAKernelLauncher(features, masks, output, kernel_size, + group_size, scale_factor); +} + +void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks, + Tensor bottom_grad, Tensor mask_grad, + int kernel_size, int group_size, + int scale_factor) { + CARAFENAIVEBackwardCUDAKernelLauncher(top_grad, features, masks, bottom_grad, + mask_grad, kernel_size, group_size, + scale_factor); +} +#endif + +void carafe_naive_forward(Tensor features, Tensor masks, Tensor output, + int kernel_size, int group_size, int scale_factor) { + if (features.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(features); + CHECK_CUDA_INPUT(masks); + CHECK_CUDA_INPUT(output); + carafe_naive_forward_cuda(features, masks, output, kernel_size, group_size, + scale_factor); +#else + AT_ERROR("CarafeNaive is not compiled with GPU support"); +#endif + } else { + AT_ERROR("CarafeNaive is not implemented on CPU"); + } +} + +void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks, + Tensor bottom_grad, Tensor mask_grad, + int kernel_size, int group_size, int scale_factor) { + if (top_grad.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(top_grad); + CHECK_CUDA_INPUT(features); + CHECK_CUDA_INPUT(masks); + CHECK_CUDA_INPUT(bottom_grad); + CHECK_CUDA_INPUT(mask_grad); + carafe_naive_backward_cuda(top_grad, features, masks, bottom_grad, + mask_grad, kernel_size, group_size, + scale_factor); +#else + AT_ERROR("CarafeNaive is not compiled with GPU support"); +#endif + } else { + AT_ERROR("CarafeNaive is not implemented on CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/carafe_naive_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/carafe_naive_cuda.cu new file mode 100644 index 0000000..ffc05c8 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/carafe_naive_cuda.cu @@ -0,0 +1,51 @@ +#include "carafe_naive_cuda_kernel.cuh" +#include "pytorch_cuda_helper.hpp" + +void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features, + const Tensor masks, Tensor output, + const int kernel_size, + const int group_size, + const int scale_factor) { + int output_size = output.numel(); + int channels = output.size(1); + int height = output.size(2); + int width = output.size(3); + + at::cuda::CUDAGuard device_guard(features.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + features.scalar_type(), "CARAFENAIVEForward", ([&] { + carafe_naive_forward_cuda_kernel + <<>>( + output_size, features.data_ptr(), + masks.data_ptr(), output.data_ptr(), + kernel_size, group_size, scale_factor, channels, height, width); + })); + + AT_CUDA_CHECK(cudaGetLastError()); +} + +void CARAFENAIVEBackwardCUDAKernelLauncher( + const Tensor top_grad, const Tensor features, const Tensor masks, + Tensor bottom_grad, Tensor mask_grad, const int kernel_size, + const int group_size, const int scale_factor) { + int output_size = top_grad.numel(); + int channels = top_grad.size(1); + int height = top_grad.size(2); + int width = top_grad.size(3); + + at::cuda::CUDAGuard device_guard(top_grad.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + top_grad.scalar_type(), "CARAFENAIVEBackward", ([&] { + carafe_naive_backward_cuda_kernel + <<>>( + output_size, top_grad.data_ptr(), + features.data_ptr(), masks.data_ptr(), + bottom_grad.data_ptr(), + mask_grad.data_ptr(), kernel_size, group_size, + scale_factor, channels, height, width); + })); + + AT_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/cc_attention.cpp b/mmcv/mmcv/ops/csrc/pytorch/cc_attention.cpp new file mode 100644 index 0000000..9d7e48a --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/cc_attention.cpp @@ -0,0 +1,98 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void CAForwardCUDAKernelLauncher(const Tensor t, const Tensor f, Tensor weight); + +void CABackwardCUDAKernelLauncher(const Tensor dw, const Tensor t, + const Tensor f, Tensor dt, Tensor df); + +void CAMapForwardCUDAKernelLauncher(const Tensor weight, const Tensor g, + Tensor out); + +void CAMapBackwardCUDAKernelLauncher(const Tensor dout, const Tensor weight, + const Tensor g, Tensor dw, Tensor dg); + +void ca_forward_cuda(const Tensor t, const Tensor f, Tensor weight) { + CAForwardCUDAKernelLauncher(t, f, weight); +} + +void ca_backward_cuda(const Tensor dw, const Tensor t, const Tensor f, + Tensor dt, Tensor df) { + CABackwardCUDAKernelLauncher(dw, t, f, dt, df); +} + +void ca_map_forward_cuda(const Tensor weight, const Tensor g, Tensor out) { + CAMapForwardCUDAKernelLauncher(weight, g, out); +} + +void ca_map_backward_cuda(const Tensor dout, const Tensor weight, + const Tensor g, Tensor dw, Tensor dg) { + CAMapBackwardCUDAKernelLauncher(dout, weight, g, dw, dg); +} +#endif + +void ca_forward(const Tensor t, const Tensor f, Tensor weight) { + if (t.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(t); + CHECK_CUDA_INPUT(f); + CHECK_CUDA_INPUT(weight); + ca_forward_cuda(t, f, weight); +#else + AT_ERROR("ca is not compiled with GPU support"); +#endif + } else { + AT_ERROR("ca is not implemented on the CPU"); + } +} + +void ca_backward(const Tensor dw, const Tensor t, const Tensor f, Tensor dt, + Tensor df) { + if (dw.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(dw); + CHECK_CUDA_INPUT(t); + CHECK_CUDA_INPUT(f); + CHECK_CUDA_INPUT(dt); + CHECK_CUDA_INPUT(df); + ca_backward_cuda(dw, t, f, dt, df); +#else + AT_ERROR("ca is not compiled with GPU support"); +#endif + } else { + AT_ERROR("ca is not implemented on the CPU"); + } +} + +void ca_map_forward(const Tensor weight, const Tensor g, Tensor out) { + if (weight.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(g); + CHECK_CUDA_INPUT(out); + ca_map_forward_cuda(weight, g, out); +#else + AT_ERROR("ca_map is not compiled with GPU support"); +#endif + } else { + AT_ERROR("ca is not implemented on the CPU"); + } +} + +void ca_map_backward(const Tensor dout, const Tensor weight, const Tensor g, + Tensor dw, Tensor dg) { + if (dout.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(dout); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(g); + CHECK_CUDA_INPUT(dw); + CHECK_CUDA_INPUT(dg); + ca_map_backward_cuda(dout, weight, g, dw, dg); +#else + AT_ERROR("ca_map is not compiled with GPU support"); +#endif + } else { + AT_ERROR("ca is not implemented on the CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/cc_attention_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/cc_attention_cuda.cu new file mode 100644 index 0000000..b948d54 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/cc_attention_cuda.cu @@ -0,0 +1,142 @@ +// Modified from +// https://github.com/LikeLy-Journey/SegmenTron/blob/master/segmentron/modules/csrc/criss_cross_attention/ca_cuda.cu + +#include + +#include + +#include "cc_attention_cuda_kernel.cuh" +#include "pytorch_cuda_helper.hpp" + +void CAForwardCUDAKernelLauncher(const Tensor t, const Tensor f, + Tensor weight) { + AT_ASSERTM(t.device().is_cuda(), "input must be a CUDA tensor"); + AT_ASSERTM(f.device().is_cuda(), "input must be a CUDA tensor"); + + auto n = t.size(0); + auto c = t.size(1); + auto h = t.size(2); + auto w = t.size(3); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + // Run kernel + dim3 threads(32, 32); + int d1 = (w + threads.x - 1) / threads.x; + int d2 = (h + threads.y - 1) / threads.y; + int d3 = h + w; + dim3 blocks(d1, d2, d3); + + AT_DISPATCH_FLOATING_TYPES(t.scalar_type(), "ca_forward", [&] { + ca_forward_kernel<<>>( + t.contiguous().data_ptr(), + f.contiguous().data_ptr(), + weight.contiguous().data_ptr(), n, c, h, w); + }); + THCudaCheck(cudaGetLastError()); +} + +void CABackwardCUDAKernelLauncher(const Tensor dw, const Tensor t, + const Tensor f, Tensor dt, Tensor df) { + AT_ASSERTM(dw.device().is_cuda(), "input must be a CUDA tensor"); + AT_ASSERTM(t.device().is_cuda(), "input must be a CUDA tensor"); + AT_ASSERTM(f.device().is_cuda(), "input must be a CUDA tensor"); + + auto n = t.size(0); + auto c = t.size(1); + auto h = t.size(2); + auto w = t.size(3); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + // Run kernel + dim3 threads(32, 32); + int d1 = (w + threads.x - 1) / threads.x; + int d2 = (h + threads.y - 1) / threads.y; + int d3 = c; + dim3 blocks(d1, d2, d3); + + AT_DISPATCH_FLOATING_TYPES(t.scalar_type(), "ca_backward_kernel_t", [&] { + ca_backward_kernel_t<<>>( + dw.contiguous().data_ptr(), + t.contiguous().data_ptr(), + f.contiguous().data_ptr(), + dt.contiguous().data_ptr(), n, c, h, w); + }); + + AT_DISPATCH_FLOATING_TYPES(f.scalar_type(), "ca_backward_kernel_f", [&] { + ca_backward_kernel_f<<>>( + dw.contiguous().data_ptr(), + t.contiguous().data_ptr(), + f.contiguous().data_ptr(), + df.contiguous().data_ptr(), n, c, h, w); + }); + THCudaCheck(cudaGetLastError()); +} + +void CAMapForwardCUDAKernelLauncher(const Tensor weight, const Tensor g, + Tensor out) { + AT_ASSERTM(weight.device().is_cuda(), "input must be a CUDA tensor"); + AT_ASSERTM(g.device().is_cuda(), "input must be a CUDA tensor"); + + auto n = g.size(0); + auto c = g.size(1); + auto h = g.size(2); + auto w = g.size(3); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + // Run kernel + dim3 threads(32, 32); + int d1 = (w + threads.x - 1) / threads.x; + int d2 = (h + threads.y - 1) / threads.y; + int d3 = c; + dim3 blocks(d1, d2, d3); + + AT_DISPATCH_FLOATING_TYPES(g.scalar_type(), "ca_map_forward", [&] { + ca_map_forward_kernel<<>>( + weight.contiguous().data_ptr(), + g.contiguous().data_ptr(), + out.contiguous().data_ptr(), n, c, h, w); + }); + THCudaCheck(cudaGetLastError()); +} + +void CAMapBackwardCUDAKernelLauncher(const Tensor dout, const Tensor weight, + const Tensor g, Tensor dw, Tensor dg) { + AT_ASSERTM(dout.device().is_cuda(), "input must be a CUDA tensor"); + AT_ASSERTM(weight.device().is_cuda(), "input must be a CUDA tensor"); + AT_ASSERTM(g.device().is_cuda(), "input must be a CUDA tensor"); + + auto n = dout.size(0); + auto c = dout.size(1); + auto h = dout.size(2); + auto w = dout.size(3); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + // Run kernel + dim3 threads(32, 32); + int d1 = (w + threads.x - 1) / threads.x; + int d2 = (h + threads.y - 1) / threads.y; + int d3 = h + w; + dim3 blocks(d1, d2, d3); + + AT_DISPATCH_FLOATING_TYPES( + weight.scalar_type(), "ca_map_backward_kernel_w", [&] { + ca_map_backward_kernel_w<<>>( + dout.contiguous().data_ptr(), + weight.contiguous().data_ptr(), + g.contiguous().data_ptr(), + dw.contiguous().data_ptr(), n, c, h, w); + }); + + AT_DISPATCH_FLOATING_TYPES(g.scalar_type(), "ca_map_backward_kernel_g", [&] { + ca_map_backward_kernel_g<<>>( + dout.contiguous().data_ptr(), + weight.contiguous().data_ptr(), + g.contiguous().data_ptr(), + dg.contiguous().data_ptr(), n, c, h, w); + }); + THCudaCheck(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/corner_pool.cpp b/mmcv/mmcv/ops/csrc/pytorch/corner_pool.cpp new file mode 100644 index 0000000..58da7e9 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/corner_pool.cpp @@ -0,0 +1,239 @@ +// Modified from +// https://github.com/princeton-vl/CornerNet-Lite/tree/master/core/models/py_utils/_cpools/src +#include "pytorch_cpp_helper.hpp" + +Tensor bottom_pool_forward(Tensor input) { + // Initialize output + Tensor output = at::zeros_like(input); + // Get height + int64_t height = input.size(2); + output.copy_(input); + + for (int64_t ind = 1; ind < height; ind <<= 1) { + Tensor max_temp = at::slice(output, 2, ind, height); + Tensor cur_temp = at::slice(output, 2, ind, height).clone(); + Tensor next_temp = at::slice(output, 2, 0, height - ind).clone(); + at::max_out(max_temp, cur_temp, next_temp); + } + + return output; +} + +Tensor bottom_pool_backward(Tensor input, Tensor grad_output) { + auto output = at::zeros_like(input); + + int32_t batch = input.size(0); + int32_t channel = input.size(1); + int32_t height = input.size(2); + int32_t width = input.size(3); + + auto max_val = torch::zeros({batch, channel, width}, + at::device(at::kCUDA).dtype(at::kFloat)); + auto max_ind = torch::zeros({batch, channel, width}, + at::device(at::kCUDA).dtype(at::kLong)); + + auto input_temp = input.select(2, 0); + max_val.copy_(input_temp); + + max_ind.fill_(0); + + auto output_temp = output.select(2, 0); + auto grad_output_temp = grad_output.select(2, 0); + output_temp.copy_(grad_output_temp); + + auto un_max_ind = max_ind.unsqueeze(2); + auto gt_mask = torch::zeros({batch, channel, width}, + at::device(at::kCUDA).dtype(at::kBool)); + auto max_temp = torch::zeros({batch, channel, width}, + at::device(at::kCUDA).dtype(at::kFloat)); + for (int32_t ind = 0; ind < height - 1; ++ind) { + input_temp = input.select(2, ind + 1); + at::gt_out(gt_mask, input_temp, max_val); + + at::masked_select_out(max_temp, input_temp, gt_mask); + max_val.masked_scatter_(gt_mask, max_temp); + max_ind.masked_fill_(gt_mask, ind + 1); + + grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2); + output.scatter_add_(2, un_max_ind, grad_output_temp); + } + + return output; +} + +Tensor left_pool_forward(Tensor input) { + // Initialize output + Tensor output = at::zeros_like(input); + // Get width + int64_t width = input.size(3); + output.copy_(input); + + for (int64_t ind = 1; ind < width; ind <<= 1) { + Tensor max_temp = at::slice(output, 3, 0, width - ind); + Tensor cur_temp = at::slice(output, 3, 0, width - ind).clone(); + Tensor next_temp = at::slice(output, 3, ind, width).clone(); + at::max_out(max_temp, cur_temp, next_temp); + } + + return output; +} + +Tensor left_pool_backward(Tensor input, Tensor grad_output) { + auto output = at::zeros_like(input); + + int32_t batch = input.size(0); + int32_t channel = input.size(1); + int32_t height = input.size(2); + int32_t width = input.size(3); + + auto max_val = torch::zeros({batch, channel, height}, + at::device(at::kCUDA).dtype(at::kFloat)); + auto max_ind = torch::zeros({batch, channel, height}, + at::device(at::kCUDA).dtype(at::kLong)); + + auto input_temp = input.select(3, width - 1); + max_val.copy_(input_temp); + + max_ind.fill_(width - 1); + + auto output_temp = output.select(3, width - 1); + auto grad_output_temp = grad_output.select(3, width - 1); + output_temp.copy_(grad_output_temp); + + auto un_max_ind = max_ind.unsqueeze(3); + auto gt_mask = torch::zeros({batch, channel, height}, + at::device(at::kCUDA).dtype(at::kBool)); + auto max_temp = torch::zeros({batch, channel, height}, + at::device(at::kCUDA).dtype(at::kFloat)); + for (int32_t ind = 1; ind < width; ++ind) { + input_temp = input.select(3, width - ind - 1); + at::gt_out(gt_mask, input_temp, max_val); + + at::masked_select_out(max_temp, input_temp, gt_mask); + max_val.masked_scatter_(gt_mask, max_temp); + max_ind.masked_fill_(gt_mask, width - ind - 1); + + grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3); + output.scatter_add_(3, un_max_ind, grad_output_temp); + } + + return output; +} + +Tensor right_pool_forward(Tensor input) { + // Initialize output + Tensor output = at::zeros_like(input); + // Get width + int64_t width = input.size(3); + output.copy_(input); + + for (int64_t ind = 1; ind < width; ind <<= 1) { + Tensor max_temp = at::slice(output, 3, ind, width); + Tensor cur_temp = at::slice(output, 3, ind, width).clone(); + Tensor next_temp = at::slice(output, 3, 0, width - ind).clone(); + at::max_out(max_temp, cur_temp, next_temp); + } + + return output; +} + +Tensor right_pool_backward(Tensor input, Tensor grad_output) { + Tensor output = at::zeros_like(input); + + int32_t batch = input.size(0); + int32_t channel = input.size(1); + int32_t height = input.size(2); + int32_t width = input.size(3); + + auto max_val = torch::zeros({batch, channel, height}, + at::device(at::kCUDA).dtype(at::kFloat)); + auto max_ind = torch::zeros({batch, channel, height}, + at::device(at::kCUDA).dtype(at::kLong)); + + auto input_temp = input.select(3, 0); + max_val.copy_(input_temp); + + max_ind.fill_(0); + + auto output_temp = output.select(3, 0); + auto grad_output_temp = grad_output.select(3, 0); + output_temp.copy_(grad_output_temp); + + auto un_max_ind = max_ind.unsqueeze(3); + auto gt_mask = torch::zeros({batch, channel, height}, + at::device(at::kCUDA).dtype(at::kBool)); + auto max_temp = torch::zeros({batch, channel, height}, + at::device(at::kCUDA).dtype(at::kFloat)); + for (int32_t ind = 0; ind < width - 1; ++ind) { + input_temp = input.select(3, ind + 1); + at::gt_out(gt_mask, input_temp, max_val); + + at::masked_select_out(max_temp, input_temp, gt_mask); + max_val.masked_scatter_(gt_mask, max_temp); + max_ind.masked_fill_(gt_mask, ind + 1); + + grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3); + output.scatter_add_(3, un_max_ind, grad_output_temp); + } + + return output; +} + +Tensor top_pool_forward(Tensor input) { + // Initialize output + Tensor output = at::zeros_like(input); + // Get height + int64_t height = input.size(2); + output.copy_(input); + + for (int64_t ind = 1; ind < height; ind <<= 1) { + Tensor max_temp = at::slice(output, 2, 0, height - ind); + Tensor cur_temp = at::slice(output, 2, 0, height - ind).clone(); + Tensor next_temp = at::slice(output, 2, ind, height).clone(); + at::max_out(max_temp, cur_temp, next_temp); + } + + return output; +} + +Tensor top_pool_backward(Tensor input, Tensor grad_output) { + auto output = at::zeros_like(input); + + int32_t batch = input.size(0); + int32_t channel = input.size(1); + int32_t height = input.size(2); + int32_t width = input.size(3); + + auto max_val = torch::zeros({batch, channel, width}, + at::device(at::kCUDA).dtype(at::kFloat)); + auto max_ind = torch::zeros({batch, channel, width}, + at::device(at::kCUDA).dtype(at::kLong)); + + auto input_temp = input.select(2, height - 1); + max_val.copy_(input_temp); + + max_ind.fill_(height - 1); + + auto output_temp = output.select(2, height - 1); + auto grad_output_temp = grad_output.select(2, height - 1); + output_temp.copy_(grad_output_temp); + + auto un_max_ind = max_ind.unsqueeze(2); + auto gt_mask = torch::zeros({batch, channel, width}, + at::device(at::kCUDA).dtype(at::kBool)); + auto max_temp = torch::zeros({batch, channel, width}, + at::device(at::kCUDA).dtype(at::kFloat)); + for (int32_t ind = 1; ind < height; ++ind) { + input_temp = input.select(2, height - ind - 1); + at::gt_out(gt_mask, input_temp, max_val); + + at::masked_select_out(max_temp, input_temp, gt_mask); + max_val.masked_scatter_(gt_mask, max_temp); + max_ind.masked_fill_(gt_mask, height - ind - 1); + + grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2); + output.scatter_add_(2, un_max_ind, grad_output_temp); + } + + return output; +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/deform_conv.cpp b/mmcv/mmcv/ops/csrc/pytorch/deform_conv.cpp new file mode 100644 index 0000000..9b2dc44 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/deform_conv.cpp @@ -0,0 +1,138 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void DeformConvForwardCUDAKernelLauncher(Tensor input, Tensor weight, + Tensor offset, Tensor output, + Tensor columns, Tensor ones, int kW, + int kH, int dW, int dH, int padW, + int padH, int dilationW, int dilationH, + int group, int deformable_group, + int im2col_step); + +void DeformConvBackwardInputCUDAKernelLauncher( + Tensor input, Tensor offset, Tensor gradOutput, Tensor gradInput, + Tensor gradOffset, Tensor weight, Tensor columns, int kW, int kH, int dW, + int dH, int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, int im2col_step); + +void DeformConvBackwardParametersCUDAKernelLauncher( + Tensor input, Tensor offset, Tensor gradOutput, Tensor gradWeight, + Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW, + int padH, int dilationW, int dilationH, int group, int deformable_group, + float scale, int im2col_step); + +void deform_conv_forward_cuda(Tensor input, Tensor weight, Tensor offset, + Tensor output, Tensor columns, Tensor ones, + int kW, int kH, int dW, int dH, int padW, + int padH, int dilationW, int dilationH, int group, + int deformable_group, int im2col_step) { + DeformConvForwardCUDAKernelLauncher( + input, weight, offset, output, columns, ones, kW, kH, dW, dH, padW, padH, + dilationW, dilationH, group, deformable_group, im2col_step); +} + +void deform_conv_backward_input_cuda(Tensor input, Tensor offset, + Tensor gradOutput, Tensor gradInput, + Tensor gradOffset, Tensor weight, + Tensor columns, int kW, int kH, int dW, + int dH, int padW, int padH, int dilationW, + int dilationH, int group, + int deformable_group, int im2col_step) { + DeformConvBackwardInputCUDAKernelLauncher( + input, offset, gradOutput, gradInput, gradOffset, weight, columns, kW, kH, + dW, dH, padW, padH, dilationW, dilationH, group, deformable_group, + im2col_step); +} + +void deform_conv_backward_parameters_cuda( + Tensor input, Tensor offset, Tensor gradOutput, Tensor gradWeight, + Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW, + int padH, int dilationW, int dilationH, int group, int deformable_group, + float scale, int im2col_step) { + DeformConvBackwardParametersCUDAKernelLauncher( + input, offset, gradOutput, gradWeight, columns, ones, kW, kH, dW, dH, + padW, padH, dilationW, dilationH, group, deformable_group, scale, + im2col_step); +} +#endif + +void deform_conv_forward(Tensor input, Tensor weight, Tensor offset, + Tensor output, Tensor columns, Tensor ones, int kW, + int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, int im2col_step) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(offset); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(output); + CHECK_CUDA_INPUT(columns); + CHECK_CUDA_INPUT(ones); + + deform_conv_forward_cuda(input, weight, offset, output, columns, ones, kW, + kH, dW, dH, padW, padH, dilationW, dilationH, + group, deformable_group, im2col_step); +#else + AT_ERROR("DeformConv is not compiled with GPU support"); +#endif + } else { + AT_ERROR("DeformConv is not implemented on CPU"); + } +} + +void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput, + Tensor gradInput, Tensor gradOffset, + Tensor weight, Tensor columns, int kW, int kH, + int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, int im2col_step) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(offset); + CHECK_CUDA_INPUT(gradOutput); + CHECK_CUDA_INPUT(gradInput); + CHECK_CUDA_INPUT(gradOffset); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(columns); + + deform_conv_backward_input_cuda(input, offset, gradOutput, gradInput, + gradOffset, weight, columns, kW, kH, dW, dH, + padW, padH, dilationW, dilationH, group, + deformable_group, im2col_step); +#else + AT_ERROR("DeformConv is not compiled with GPU support"); +#endif + } else { + AT_ERROR("DeformConv is not implemented on CPU"); + } +} + +void deform_conv_backward_parameters(Tensor input, Tensor offset, + Tensor gradOutput, Tensor gradWeight, + Tensor columns, Tensor ones, int kW, + int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, float scale, + int im2col_step) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(offset); + CHECK_CUDA_INPUT(gradOutput); + CHECK_CUDA_INPUT(gradWeight); + CHECK_CUDA_INPUT(columns); + CHECK_CUDA_INPUT(ones); + + deform_conv_backward_parameters_cuda(input, offset, gradOutput, gradWeight, + columns, ones, kW, kH, dW, dH, padW, + padH, dilationW, dilationH, group, + deformable_group, scale, im2col_step); +#else + AT_ERROR("DeformConv is not compiled with GPU support"); +#endif + } else { + AT_ERROR("DeformConv is not implemented on CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/deform_conv_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/deform_conv_cuda.cu new file mode 100644 index 0000000..2d17f59 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/deform_conv_cuda.cu @@ -0,0 +1,517 @@ +#include "deform_conv_cuda_kernel.cuh" +#include "pytorch_cuda_helper.hpp" + +void deformable_im2col(Tensor data_im, Tensor data_offset, const int channels, + const int height, const int width, const int ksize_h, + const int ksize_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, + Tensor data_col) { + // num_axes should be smaller than block size + // todo: check parallel_imgs is correctly passed in + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = channels * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "deformable_im2col_gpu", ([&] { + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); + + deformable_im2col_gpu_kernel<<>>( + num_kernels, data_im_, data_offset_, height, width, ksize_h, + ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + channel_per_deformable_group, parallel_imgs, channels, + deformable_group, height_col, width_col, data_col_); + })); + AT_CUDA_CHECK(cudaGetLastError()); +} + +void deformable_col2im(Tensor data_col, Tensor data_offset, const int channels, + const int height, const int width, const int ksize_h, + const int ksize_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int parallel_imgs, const int deformable_group, + Tensor grad_im) { + // todo: make sure parallel_imgs is passed in correctly + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = + channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "deformable_col2im_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *grad_im_ = grad_im.data_ptr(); + + deformable_col2im_gpu_kernel<<>>( + num_kernels, data_col_, data_offset_, channels, height, width, + ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, + dilation_w, channel_per_deformable_group, parallel_imgs, + deformable_group, height_col, width_col, grad_im_); + })); + AT_CUDA_CHECK(cudaGetLastError()); +} + +void deformable_col2im_coord( + Tensor data_col, Tensor data_im, Tensor data_offset, const int channels, + const int height, const int width, const int ksize_h, const int ksize_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int parallel_imgs, + const int deformable_group, Tensor grad_offset) { + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * + deformable_group * parallel_imgs; + int channel_per_deformable_group = + channels * ksize_h * ksize_w / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *grad_offset_ = grad_offset.data_ptr(); + + deformable_col2im_coord_gpu_kernel<<< + GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, + at::cuda::getCurrentCUDAStream()>>>( + num_kernels, data_col_, data_im_, data_offset_, channels, height, + width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, + 2 * ksize_h * ksize_w * deformable_group, deformable_group, + height_col, width_col, grad_offset_); + })); + AT_CUDA_CHECK(cudaGetLastError()); +} + +void deform_conv_shape_check(Tensor input, Tensor offset, Tensor *gradOutput, + Tensor weight, int kH, int kW, int dH, int dW, + int padH, int padW, int dilationH, int dilationW, + int group, int deformable_group) { + TORCH_CHECK( + weight.ndimension() == 4, + "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, but got: %s", + weight.ndimension()); + + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + + TORCH_CHECK(kW > 0 && kH > 0, + "kernel size should be greater than zero, but got kH: %d kW: %d", + kH, kW); + + TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW), + "kernel size should be consistent with weight, ", + "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", + kH, kW, weight.size(2), weight.size(3)); + + TORCH_CHECK(dW > 0 && dH > 0, + "stride should be greater than zero, but got dH: %d dW: %d", dH, + dW); + + TORCH_CHECK( + dilationW > 0 && dilationH > 0, + "dilation should be greater than 0, but got dilationH: %d dilationW: %d", + dilationH, dilationW); + + int ndim = input.ndimension(); + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + TORCH_CHECK(ndim == 3 || ndim == 4, + "3D or 4D input tensor expected but got: %s", ndim); + + long nInputPlane = weight.size(1) * group; + long inputHeight = input.size(dimh); + long inputWidth = input.size(dimw); + long nOutputPlane = weight.size(0); + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + + TORCH_CHECK(nInputPlane % deformable_group == 0, + "input channels must divide deformable group size"); + + if (outputWidth < 1 || outputHeight < 1) + AT_ERROR( + "Given input size: (%ld x %ld x %ld). " + "Calculated output size: (%ld x %ld x %ld). Output size is too small", + nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight, + outputWidth); + + TORCH_CHECK(input.size(1) == nInputPlane, + "invalid number of input planes, expected: %d, but got: %d", + nInputPlane, input.size(1)); + + TORCH_CHECK((inputHeight >= kH && inputWidth >= kW), + "input image is smaller than kernel"); + + TORCH_CHECK( + (offset.size(2) == outputHeight && offset.size(3) == outputWidth), + "invalid spatial size of offset, expected height: %d width: %d, but " + "got height: %d width: %d", + outputHeight, outputWidth, offset.size(2), offset.size(3)); + + TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), + "invalid number of channels of offset"); + + if (gradOutput != NULL) { + TORCH_CHECK( + gradOutput->size(dimf) == nOutputPlane, + "invalid number of gradOutput planes, expected: %d, but got: %d", + nOutputPlane, gradOutput->size(dimf)); + + TORCH_CHECK( + (gradOutput->size(dimh) == outputHeight && + gradOutput->size(dimw) == outputWidth), + "invalid size of gradOutput, expected height: %d width: %d , but " + "got height: %d width: %d", + outputHeight, outputWidth, gradOutput->size(dimh), + gradOutput->size(dimw)); + } +} + +void DeformConvForwardCUDAKernelLauncher(Tensor input, Tensor weight, + Tensor offset, Tensor output, + Tensor columns, Tensor ones, int kW, + int kH, int dW, int dH, int padW, + int padH, int dilationW, int dilationH, + int group, int deformable_group, + int im2col_step) { + // todo: resize columns to include im2col: done + // todo: add im2col_step as input + // todo: add new output buffer and transpose it to output (or directly + // transpose output) todo: possibly change data indexing because of + // parallel_imgs + + deform_conv_shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, + padW, dilationH, dilationW, group, deformable_group); + at::DeviceGuard guard(input.device()); + + int batch = 1; + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input.unsqueeze_(0); + offset.unsqueeze_(0); + } + + // todo: assert batchsize dividable by im2col_step + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = weight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + + output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, + outputHeight, outputWidth}); + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < outputHeight * outputWidth) { + ones = at::ones({outputHeight, outputWidth}, input.options()); + } + + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + Tensor output_buffer = at::zeros({batchSize / im2col_step, nOutputPlane, + im2col_step * outputHeight, outputWidth}, + output.options()); + + output_buffer = output_buffer.view( + {output_buffer.size(0), group, output_buffer.size(1) / group, + output_buffer.size(2), output_buffer.size(3)}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, columns); + + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + + for (int g = 0; g < group; g++) { + output_buffer[elt][g] = output_buffer[elt][g] + .flatten(1) + .addmm_(weight[g].flatten(1), columns[g]) + .view_as(output_buffer[elt][g]); + } + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + } + + output_buffer = output_buffer.view( + {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2), + output_buffer.size(3), output_buffer.size(4)}); + + output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane, + im2col_step, outputHeight, outputWidth}); + output_buffer.transpose_(1, 2); + output.copy_(output_buffer); + output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + output = output.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); + } +} + +void DeformConvBackwardInputCUDAKernelLauncher( + Tensor input, Tensor offset, Tensor gradOutput, Tensor gradInput, + Tensor gradOffset, Tensor weight, Tensor columns, int kW, int kH, int dW, + int dH, int padW, int padH, int dilationW, int dilationH, int group, + int deformable_group, int im2col_step) { + deform_conv_shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, + padH, padW, dilationH, dilationW, group, + deformable_group); + at::DeviceGuard guard(input.device()); + + int batch = 1; + + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input = input.view({1, input.size(0), input.size(1), input.size(2)}); + offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)}); + gradOutput = gradOutput.view( + {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); + } + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = weight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + // change order of grad output + gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, + nOutputPlane, outputHeight, outputWidth}); + gradOutput.transpose_(1, 2); + + gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, + outputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + // divide into groups + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + gradOutput = gradOutput.view( + {gradOutput.size(0), group, gradOutput.size(1) / group, + gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)}); + + for (int g = 0; g < group; g++) { + columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), + gradOutput[elt][g].flatten(1), 0.0f, 1.0f); + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + gradOutput = gradOutput.view( + {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2), + gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)}); + + deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane, + inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, + dilationH, dilationW, im2col_step, deformable_group, + gradOffset[elt]); + + deformable_col2im(columns, offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, gradInput[elt]); + } + + gradOutput.transpose_(1, 2); + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + gradOffset = gradOffset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); + gradOffset = + gradOffset.view({offset.size(1), offset.size(2), offset.size(3)}); + } +} + +void DeformConvBackwardParametersCUDAKernelLauncher( + Tensor input, Tensor offset, Tensor gradOutput, Tensor gradWeight, + Tensor columns, Tensor ones, int kW, int kH, int dW, int dH, int padW, + int padH, int dilationW, int dilationH, int group, int deformable_group, + float scale, int im2col_step) { + // todo: transpose and reshape outGrad + // todo: reshape columns + // todo: add im2col_step as input + + deform_conv_shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, + dW, padH, padW, dilationH, dilationW, group, + deformable_group); + at::DeviceGuard guard(input.device()); + + int batch = 1; + + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input = input.view( + at::IntList({1, input.size(0), input.size(1), input.size(2)})); + gradOutput = gradOutput.view( + {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); + } + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = gradWeight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, + nOutputPlane, outputHeight, outputWidth}); + gradOutput.transpose_(1, 2); + + Tensor gradOutputBuffer = at::zeros_like(gradOutput); + gradOutputBuffer = + gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step, + outputHeight, outputWidth}); + gradOutputBuffer = gradOutputBuffer.contiguous(); + gradOutputBuffer.copy_(gradOutput); + gradOutputBuffer = + gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, + im2col_step * outputHeight, outputWidth}); + + gradOutput.transpose_(1, 2); + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, + inputHeight, inputWidth}); + offset = + offset.view({batchSize / im2col_step, im2col_step, + deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, + inputWidth, kH, kW, padH, padW, dH, dW, dilationH, + dilationW, im2col_step, deformable_group, columns); + + // divide into group + gradOutputBuffer = gradOutputBuffer.view( + {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group, + gradOutputBuffer.size(2), gradOutputBuffer.size(3)}); + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + gradWeight = + gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1), + gradWeight.size(2), gradWeight.size(3)}); + + for (int g = 0; g < group; g++) { + gradWeight[g] = gradWeight[g] + .flatten(1) + .addmm_(gradOutputBuffer[elt][g].flatten(1), + columns[g].transpose(1, 0), 1.0, scale) + .view_as(gradWeight[g]); + } + gradOutputBuffer = gradOutputBuffer.view( + {gradOutputBuffer.size(0), + gradOutputBuffer.size(1) * gradOutputBuffer.size(2), + gradOutputBuffer.size(3), gradOutputBuffer.size(4)}); + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1), + gradWeight.size(2), gradWeight.size(3), + gradWeight.size(4)}); + } + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/deform_roi_pool.cpp b/mmcv/mmcv/ops/csrc/pytorch/deform_roi_pool.cpp new file mode 100644 index 0000000..1022ea9 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/deform_roi_pool.cpp @@ -0,0 +1,81 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, + Tensor offset, Tensor output, + int pooled_height, int pooled_width, + float spatial_scale, + int sampling_ratio, float gamma); + +void DeformRoIPoolBackwardCUDAKernelLauncher( + Tensor grad_output, Tensor input, Tensor rois, Tensor offset, + Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, + float spatial_scale, int sampling_ratio, float gamma); + +void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset, + Tensor output, int pooled_height, + int pooled_width, float spatial_scale, + int sampling_ratio, float gamma) { + DeformRoIPoolForwardCUDAKernelLauncher(input, rois, offset, output, + pooled_height, pooled_width, + spatial_scale, sampling_ratio, gamma); +} + +void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input, + Tensor rois, Tensor offset, + Tensor grad_input, Tensor grad_offset, + int pooled_height, int pooled_width, + float spatial_scale, int sampling_ratio, + float gamma) { + DeformRoIPoolBackwardCUDAKernelLauncher( + grad_output, input, rois, offset, grad_input, grad_offset, pooled_height, + pooled_width, spatial_scale, sampling_ratio, gamma); +} +#endif + +void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset, + Tensor output, int pooled_height, int pooled_width, + float spatial_scale, int sampling_ratio, + float gamma) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(rois); + CHECK_CUDA_INPUT(offset); + CHECK_CUDA_INPUT(output); + + deform_roi_pool_forward_cuda(input, rois, offset, output, pooled_height, + pooled_width, spatial_scale, sampling_ratio, + gamma); +#else + AT_ERROR("DeformRoIPool is not compiled with GPU support"); +#endif + } else { + AT_ERROR("DeformRoIPool is not implemented on CPU"); + } +} + +void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois, + Tensor offset, Tensor grad_input, + Tensor grad_offset, int pooled_height, + int pooled_width, float spatial_scale, + int sampling_ratio, float gamma) { + if (grad_output.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(grad_output); + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(rois); + CHECK_CUDA_INPUT(offset); + CHECK_CUDA_INPUT(grad_input); + CHECK_CUDA_INPUT(grad_offset); + + deform_roi_pool_backward_cuda(grad_output, input, rois, offset, grad_input, + grad_offset, pooled_height, pooled_width, + spatial_scale, sampling_ratio, gamma); +#else + AT_ERROR("DeformRoIPool is not compiled with GPU support"); +#endif + } else { + AT_ERROR("DeformRoIPool is not implemented on CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/deform_roi_pool_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/deform_roi_pool_cuda.cu new file mode 100644 index 0000000..c856d6b --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/deform_roi_pool_cuda.cu @@ -0,0 +1,54 @@ +#include "deform_roi_pool_cuda_kernel.cuh" +#include "pytorch_cuda_helper.hpp" + +void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, + Tensor offset, Tensor output, + int pooled_height, int pooled_width, + float spatial_scale, + int sampling_ratio, float gamma) { + int output_size = output.numel(); + int channels = input.size(1); + int height = input.size(2); + int width = input.size(3); + + at::cuda::CUDAGuard device_guard(input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "deform_roi_pool_forward_cuda_kernel", [&] { + deform_roi_pool_forward_cuda_kernel + <<>>( + output_size, input.data_ptr(), + rois.data_ptr(), offset.data_ptr(), + output.data_ptr(), pooled_height, pooled_width, + static_cast(spatial_scale), sampling_ratio, + static_cast(gamma), channels, height, width); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} + +void DeformRoIPoolBackwardCUDAKernelLauncher( + Tensor grad_output, Tensor input, Tensor rois, Tensor offset, + Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width, + float spatial_scale, int sampling_ratio, float gamma) { + int output_size = grad_output.numel(); + int channels = grad_input.size(1); + int height = grad_input.size(2); + int width = grad_input.size(3); + + at::cuda::CUDAGuard device_guard(grad_output.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_output.scalar_type(), "deform_roi_pool_backward_cuda_kernel", [&] { + deform_roi_pool_backward_cuda_kernel + <<>>( + output_size, grad_output.data_ptr(), + input.data_ptr(), rois.data_ptr(), + offset.data_ptr(), grad_input.data_ptr(), + grad_offset.data_ptr(), pooled_height, pooled_width, + static_cast(spatial_scale), sampling_ratio, + static_cast(gamma), channels, height, width); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/focal_loss.cpp b/mmcv/mmcv/ops/csrc/pytorch/focal_loss.cpp new file mode 100644 index 0000000..ae65061 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/focal_loss.cpp @@ -0,0 +1,130 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target, + Tensor weight, Tensor output, + const float gamma, + const float alpha); + +void SigmoidFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target, + Tensor weight, + Tensor grad_input, + const float gamma, + const float alpha); + +void SoftmaxFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target, + Tensor weight, Tensor output, + const float gamma, + const float alpha); + +void SoftmaxFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target, + Tensor weight, Tensor buff, + Tensor grad_input, + const float gamma, + const float alpha); + +void sigmoid_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight, + Tensor output, float gamma, float alpha) { + SigmoidFocalLossForwardCUDAKernelLauncher(input, target, weight, output, + gamma, alpha); +} + +void sigmoid_focal_loss_backward_cuda(Tensor input, Tensor target, + Tensor weight, Tensor grad_input, + float gamma, float alpha) { + SigmoidFocalLossBackwardCUDAKernelLauncher(input, target, weight, grad_input, + gamma, alpha); +} + +void softmax_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight, + Tensor output, float gamma, float alpha) { + SoftmaxFocalLossForwardCUDAKernelLauncher(input, target, weight, output, + gamma, alpha); +} + +void softmax_focal_loss_backward_cuda(Tensor input, Tensor target, + Tensor weight, Tensor buff, + Tensor grad_input, float gamma, + float alpha) { + SoftmaxFocalLossBackwardCUDAKernelLauncher(input, target, weight, buff, + grad_input, gamma, alpha); +} +#endif + +void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight, + Tensor output, float gamma, float alpha) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(target); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(output); + + sigmoid_focal_loss_forward_cuda(input, target, weight, output, gamma, + alpha); +#else + AT_ERROR("SigmoidFocalLoss is not compiled with GPU support"); +#endif + } else { + AT_ERROR("SigmoidFocalLoss is not implemented on CPU"); + } +} + +void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight, + Tensor grad_input, float gamma, float alpha) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(target); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(grad_input); + + sigmoid_focal_loss_backward_cuda(input, target, weight, grad_input, gamma, + alpha); +#else + AT_ERROR("SigmoidFocalLoss is not compiled with GPU support"); +#endif + } else { + AT_ERROR("SigmoidFocalLoss is not implemented on CPU"); + } +} + +void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight, + Tensor output, float gamma, float alpha) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(target); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(output); + + softmax_focal_loss_forward_cuda(input, target, weight, output, gamma, + alpha); +#else + AT_ERROR("SoftmaxFocalLoss is not compiled with GPU support"); +#endif + } else { + AT_ERROR("SoftmaxFocalLoss is not implemented on CPU"); + } +} + +void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight, + Tensor buff, Tensor grad_input, float gamma, + float alpha) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(target); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(buff); + CHECK_CUDA_INPUT(grad_input); + + softmax_focal_loss_backward_cuda(input, target, weight, buff, grad_input, + gamma, alpha); +#else + AT_ERROR("SoftmaxFocalLoss is not compiled with GPU support"); +#endif + } else { + AT_ERROR("SoftmaxFocalLoss is not implemented on CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/focal_loss_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/focal_loss_cuda.cu new file mode 100644 index 0000000..c7cd215 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/focal_loss_cuda.cu @@ -0,0 +1,110 @@ +#include "pytorch_cuda_helper.hpp" +#include "sigmoid_focal_loss_cuda_kernel.cuh" +#include "softmax_focal_loss_cuda_kernel.cuh" + +void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target, + Tensor weight, Tensor output, + const float gamma, + const float alpha) { + int output_size = output.numel(); + int num_classes = input.size(1); + AT_ASSERTM(target.max().item() <= (int64_t)num_classes, + "target label should smaller or equal than num classes"); + at::cuda::CUDAGuard device_guard(input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "sigmoid_focal_loss_forward_cuda_kernel", [&] { + sigmoid_focal_loss_forward_cuda_kernel + <<>>( + output_size, input.data_ptr(), + target.data_ptr(), weight.data_ptr(), + output.data_ptr(), gamma, alpha, num_classes); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} + +void SigmoidFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target, + Tensor weight, + Tensor grad_input, + const float gamma, + const float alpha) { + int output_size = grad_input.numel(); + int num_classes = input.size(1); + + at::cuda::CUDAGuard device_guard(grad_input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "sigmoid_focal_loss_backward_cuda_kernel", [&] { + sigmoid_focal_loss_backward_cuda_kernel + <<>>( + output_size, input.data_ptr(), + target.data_ptr(), weight.data_ptr(), + grad_input.data_ptr(), gamma, alpha, num_classes); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} + +void SoftmaxFocalLossForwardCUDAKernelLauncher(Tensor softmax, Tensor target, + Tensor weight, Tensor output, + const float gamma, + const float alpha) { + int output_size = output.numel(); + int num_classes = softmax.size(1); + + AT_ASSERTM(target.max().item() <= (int64_t)num_classes, + "target label should smaller or equal than num classes"); + at::cuda::CUDAGuard device_guard(softmax.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + softmax.scalar_type(), "softmax_focal_loss_forward_cuda_kernel", [&] { + softmax_focal_loss_forward_cuda_kernel + <<>>( + output_size, softmax.data_ptr(), + target.data_ptr(), weight.data_ptr(), + output.data_ptr(), gamma, alpha, num_classes); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} + +void SoftmaxFocalLossBackwardCUDAKernelLauncher(Tensor softmax, Tensor target, + Tensor weight, Tensor buff, + Tensor grad_input, + const float gamma, + const float alpha) { + int num_classes = softmax.size(1); + + int output_size = buff.numel(); + at::cuda::CUDAGuard device_guard(grad_input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_input.scalar_type(), + "softmax_focal_loss_backward_cuda1_" + "kernel", + [&] { + softmax_focal_loss_backward_cuda1_kernel + <<>>( + output_size, softmax.data_ptr(), + target.data_ptr(), weight.data_ptr(), + buff.data_ptr(), gamma, alpha, num_classes); + }); + + AT_CUDA_CHECK(cudaGetLastError()); + + output_size = grad_input.numel(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_input.scalar_type(), + "softmax_focal_loss_backward_cuda2_" + "kernel", + [&] { + softmax_focal_loss_backward_cuda2_kernel + <<>>( + output_size, softmax.data_ptr(), + target.data_ptr(), buff.data_ptr(), + grad_input.data_ptr(), num_classes); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/info.cpp b/mmcv/mmcv/ops/csrc/pytorch/info.cpp new file mode 100644 index 0000000..a2ebafa --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/info.cpp @@ -0,0 +1,49 @@ +// modified from +// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +#include +int get_cudart_version() { return CUDART_VERSION; } +#endif + +std::string get_compiling_cuda_version() { +#ifdef MMCV_WITH_CUDA + std::ostringstream oss; + // copied from + // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 + auto printCudaStyleVersion = [&](int v) { + oss << (v / 1000) << "." << (v / 10 % 100); + if (v % 10 != 0) { + oss << "." << (v % 10); + } + }; + printCudaStyleVersion(get_cudart_version()); + return oss.str(); +#else + return std::string("not available"); +#endif +} + +// similar to +// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp +std::string get_compiler_version() { + std::ostringstream ss; +#if defined(__GNUC__) +#ifndef __clang__ + { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } +#endif +#endif + +#if defined(__clang_major__) + { + ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." + << __clang_patchlevel__; + } +#endif + +#if defined(_MSC_VER) + { ss << "MSVC " << _MSC_FULL_VER; } +#endif + return ss.str(); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/masked_conv2d.cpp b/mmcv/mmcv/ops/csrc/pytorch/masked_conv2d.cpp new file mode 100644 index 0000000..8efc8eb --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/masked_conv2d.cpp @@ -0,0 +1,74 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data, + const Tensor mask_h_idx, + const Tensor mask_w_idx, + Tensor top_data, const int kernel_h, + const int kernel_w, const int pad_h, + const int pad_w); + +void MaskedCol2imForwardCUDAKernelLauncher(const Tensor bottom_data, + const Tensor mask_h_idx, + const Tensor mask_w_idx, + Tensor top_data, const int height, + const int width, const int channels); + +void masked_im2col_forward_cuda(const Tensor im, const Tensor mask_h_idx, + const Tensor mask_w_idx, Tensor col, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w) { + // im: (n, ic, h, w), kernel size (kh, kw) + // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) + MaskedIm2colForwardCUDAKernelLauncher(im, mask_h_idx, mask_w_idx, col, + kernel_h, kernel_w, pad_h, pad_w); +} + +void masked_col2im_forward_cuda(const Tensor col, const Tensor mask_h_idx, + const Tensor mask_w_idx, Tensor im, int height, + int width, int channels) { + // im: (n, ic, h, w), kernel size (kh, kw) + // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) + MaskedCol2imForwardCUDAKernelLauncher(col, mask_h_idx, mask_w_idx, im, height, + width, channels); +} +#endif + +void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx, + const Tensor mask_w_idx, Tensor col, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w) { + if (im.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(im); + CHECK_CUDA_INPUT(mask_h_idx); + CHECK_CUDA_INPUT(mask_w_idx); + CHECK_CUDA_INPUT(col); + masked_im2col_forward_cuda(im, mask_h_idx, mask_w_idx, col, kernel_h, + kernel_w, pad_h, pad_w); +#else + AT_ERROR("MaskConv is not compiled with GPU support"); +#endif + } else { + AT_ERROR("MaskConv is not implemented on CPU"); + } +} + +void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx, + const Tensor mask_w_idx, Tensor im, int height, + int width, int channels) { + if (col.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(col); + CHECK_CUDA_INPUT(mask_h_idx); + CHECK_CUDA_INPUT(mask_w_idx); + CHECK_CUDA_INPUT(im); + masked_col2im_forward_cuda(col, mask_h_idx, mask_w_idx, im, height, width, + channels); +#else + AT_ERROR("MaskConv is not compiled with GPU support"); +#endif + } else { + AT_ERROR("MaskConv is not implemented on CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/masked_conv2d_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/masked_conv2d_cuda.cu new file mode 100644 index 0000000..564195c --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/masked_conv2d_cuda.cu @@ -0,0 +1,53 @@ +#include "masked_conv2d_cuda_kernel.cuh" +#include "pytorch_cuda_helper.hpp" + +void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data, + const Tensor mask_h_idx, + const Tensor mask_w_idx, + Tensor top_data, const int kernel_h, + const int kernel_w, const int pad_h, + const int pad_w) { + int channels = bottom_data.size(1); + int height = bottom_data.size(2); + int width = bottom_data.size(3); + int mask_cnt = mask_h_idx.size(0); + int output_size = mask_cnt * channels; + + at::cuda::CUDAGuard device_guard(bottom_data.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + bottom_data.scalar_type(), "MaskedIm2colLaucherForward", ([&] { + const scalar_t *bottom_data_ = bottom_data.data_ptr(); + const int64_t *mask_h_idx_ = mask_h_idx.data_ptr(); + const int64_t *mask_w_idx_ = mask_w_idx.data_ptr(); + scalar_t *top_data_ = top_data.data_ptr(); + MaskedIm2colForward + <<>>( + output_size, bottom_data_, height, width, kernel_h, kernel_w, + pad_h, pad_w, mask_h_idx_, mask_w_idx_, mask_cnt, top_data_); + })); + AT_CUDA_CHECK(cudaGetLastError()); +} + +void MaskedCol2imForwardCUDAKernelLauncher( + const Tensor bottom_data, const Tensor mask_h_idx, const Tensor mask_w_idx, + Tensor top_data, const int height, const int width, const int channels) { + int mask_cnt = mask_h_idx.size(0); + int output_size = mask_cnt * channels; + + at::cuda::CUDAGuard device_guard(bottom_data.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + bottom_data.scalar_type(), "MaskedCol2imLaucherForward", ([&] { + const scalar_t *bottom_data_ = bottom_data.data_ptr(); + const int64_t *mask_h_idx_ = mask_h_idx.data_ptr(); + const int64_t *mask_w_idx_ = mask_w_idx.data_ptr(); + scalar_t *top_data_ = top_data.data_ptr(); + + MaskedCol2imForward + <<>>( + output_size, bottom_data_, height, width, channels, mask_h_idx_, + mask_w_idx_, mask_cnt, top_data_); + })); + AT_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp b/mmcv/mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp new file mode 100644 index 0000000..a7e6013 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp @@ -0,0 +1,109 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void ModulatedDeformConvForwardCUDAKernelLauncher( + Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, + Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w, + const int stride_h, const int stride_w, const int pad_h, const int pad_w, + const int dilation_h, const int dilation_w, const int group, + const int deformable_group, const bool with_bias); + +void ModulatedDeformConvBackwardCUDAKernelLauncher( + Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, + Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight, + Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias); + +void modulated_deform_conv_forward_cuda( + Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, + Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w, + const int stride_h, const int stride_w, const int pad_h, const int pad_w, + const int dilation_h, const int dilation_w, const int group, + const int deformable_group, const bool with_bias) { + ModulatedDeformConvForwardCUDAKernelLauncher( + input, weight, bias, ones, offset, mask, output, columns, kernel_h, + kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, + deformable_group, with_bias); +} + +void modulated_deform_conv_backward_cuda( + Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, + Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight, + Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias) { + ModulatedDeformConvBackwardCUDAKernelLauncher( + input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight, + grad_bias, grad_offset, grad_mask, grad_output, kernel_h, kernel_w, + stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group, + deformable_group, with_bias); +} +#endif + +void modulated_deform_conv_forward( + Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, + Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w, + const int stride_h, const int stride_w, const int pad_h, const int pad_w, + const int dilation_h, const int dilation_w, const int group, + const int deformable_group, const bool with_bias) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(bias); + CHECK_CUDA_INPUT(ones); + CHECK_CUDA_INPUT(offset); + CHECK_CUDA_INPUT(mask); + CHECK_CUDA_INPUT(output); + CHECK_CUDA_INPUT(columns); + + modulated_deform_conv_forward_cuda( + input, weight, bias, ones, offset, mask, output, columns, kernel_h, + kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, + group, deformable_group, with_bias); +#else + AT_ERROR("ModulatedDeformConv is not compiled with GPU support"); +#endif + } else { + AT_ERROR("ModulatedDeformConv is not implemented on CPU"); + } +} + +void modulated_deform_conv_backward( + Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, + Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight, + Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(bias); + CHECK_CUDA_INPUT(ones); + CHECK_CUDA_INPUT(offset); + CHECK_CUDA_INPUT(mask); + CHECK_CUDA_INPUT(columns); + CHECK_CUDA_INPUT(grad_input); + CHECK_CUDA_INPUT(grad_weight); + CHECK_CUDA_INPUT(grad_bias); + CHECK_CUDA_INPUT(grad_offset); + CHECK_CUDA_INPUT(grad_mask); + CHECK_CUDA_INPUT(grad_output); + + modulated_deform_conv_backward_cuda( + input, weight, bias, ones, offset, mask, columns, grad_input, + grad_weight, grad_bias, grad_offset, grad_mask, grad_output, kernel_h, + kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, + group, deformable_group, with_bias); +#else + AT_ERROR("ModulatedDeformConv is not compiled with GPU support"); +#endif + } else { + AT_ERROR("ModulatedDeformConv is not implemented on CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/modulated_deform_conv_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/modulated_deform_conv_cuda.cu new file mode 100644 index 0000000..cba4937 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/modulated_deform_conv_cuda.cu @@ -0,0 +1,286 @@ +#include "modulated_deform_conv_cuda_kernel.cuh" +#include "pytorch_cuda_helper.hpp" + +void modulated_deformable_im2col_cuda( + const Tensor data_im, const Tensor data_offset, const Tensor data_mask, + const int batch_size, const int channels, const int height_im, + const int width_im, const int height_col, const int width_col, + const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, const int dilation_h, + const int dilation_w, const int deformable_group, Tensor data_col) { + // num_axes should be smaller than block size + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = channels * batch_size * height_col * width_col; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); + + modulated_deformable_im2col_gpu_kernel<<< + GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, + at::cuda::getCurrentCUDAStream()>>>( + num_kernels, data_im_, data_offset_, data_mask_, height_im, + width_im, kernel_h, kenerl_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, channel_per_deformable_group, batch_size, + channels, deformable_group, height_col, width_col, data_col_); + })); + AT_CUDA_CHECK(cudaGetLastError()); +} + +void modulated_deformable_col2im_cuda( + const Tensor data_col, const Tensor data_offset, const Tensor data_mask, + const int batch_size, const int channels, const int height_im, + const int width_im, const int height_col, const int width_col, + const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, const int dilation_h, + const int dilation_w, const int deformable_group, Tensor grad_im) { + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = + channels * kernel_h * kernel_w * batch_size * height_col * width_col; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *grad_im_ = grad_im.data_ptr(); + + modulated_deformable_col2im_gpu_kernel<<< + GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, + at::cuda::getCurrentCUDAStream()>>>( + num_kernels, data_col_, data_offset_, data_mask_, channels, + height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, + stride_w, dilation_h, dilation_w, channel_per_deformable_group, + batch_size, deformable_group, height_col, width_col, grad_im_); + })); + AT_CUDA_CHECK(cudaGetLastError()); +} + +void modulated_deformable_col2im_coord_cuda( + const Tensor data_col, const Tensor data_im, const Tensor data_offset, + const Tensor data_mask, const int batch_size, const int channels, + const int height_im, const int width_im, const int height_col, + const int width_col, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, const int deformable_group, + Tensor grad_offset, Tensor grad_mask) { + const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * + kernel_w * deformable_group; + const int channel_per_deformable_group = + channels * kernel_h * kernel_w / deformable_group; + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *grad_offset_ = grad_offset.data_ptr(); + scalar_t *grad_mask_ = grad_mask.data_ptr(); + + modulated_deformable_col2im_coord_gpu_kernel<<< + GET_BLOCKS(num_kernels), THREADS_PER_BLOCK, 0, + at::cuda::getCurrentCUDAStream()>>>( + num_kernels, data_col_, data_im_, data_offset_, data_mask_, + channels, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, + stride_h, stride_w, dilation_h, dilation_w, + channel_per_deformable_group, batch_size, + 2 * kernel_h * kernel_w * deformable_group, deformable_group, + height_col, width_col, grad_offset_, grad_mask_); + })); + AT_CUDA_CHECK(cudaGetLastError()); +} + +void ModulatedDeformConvForwardCUDAKernelLauncher( + Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, + Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w, + const int stride_h, const int stride_w, const int pad_h, const int pad_w, + const int dilation_h, const int dilation_w, const int group, + const int deformable_group, const bool with_bias) { + at::DeviceGuard guard(input.device()); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_out = weight.size(0); + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR("Input shape and kernel shape wont match: (%d x %d vs %d x %d).", + kernel_h_, kernel_w, kernel_h_, kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR("Input shape and kernel channels wont match: (%d vs %d).", + channels, channels_kernel * group); + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = at::ones({height_out, width_out}, input.options()); + } + + // resize output + output = output.view({batch, channels_out, height_out, width_out}).zero_(); + // resize temporary columns + columns = + at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, + input.options()); + + output = output.view({output.size(0), group, output.size(1) / group, + output.size(2), output.size(3)}); + + for (int b = 0; b < batch; b++) { + modulated_deformable_im2col_cuda( + input[b], offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, columns); + + // divide into group + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + + for (int g = 0; g < group; g++) { + output[b][g] = output[b][g] + .flatten(1) + .addmm_(weight[g].flatten(1), columns[g]) + .view_as(output[b][g]); + } + + weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), + weight.size(3), weight.size(4)}); + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + } + + output = output.view({output.size(0), output.size(1) * output.size(2), + output.size(3), output.size(4)}); + + if (with_bias) { + output += bias.view({1, bias.size(0), 1, 1}); + } +} + +void ModulatedDeformConvBackwardCUDAKernelLauncher( + Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, + Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight, + Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias) { + at::DeviceGuard guard(input.device()); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR("Input shape and kernel shape wont match: (%d x %d vs %d x %d).", + kernel_h_, kernel_w, kernel_h_, kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR("Input shape and kernel channels wont match: (%d vs %d).", + channels, channels_kernel * group); + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = at::ones({height_out, width_out}, input.options()); + } + + grad_input = grad_input.view({batch, channels, height, width}); + columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out}, + input.options()); + + grad_output = + grad_output.view({grad_output.size(0), group, grad_output.size(1) / group, + grad_output.size(2), grad_output.size(3)}); + + for (int b = 0; b < batch; b++) { + // divide int group + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, weight.size(0) / group, weight.size(1), + weight.size(2), weight.size(3)}); + + for (int g = 0; g < group; g++) { + columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), + grad_output[b][g].flatten(1), 0.0f, 1.0f); + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), + weight.size(3), weight.size(4)}); + + // gradient w.r.t. input coordinate data + modulated_deformable_col2im_coord_cuda( + columns, input[b], offset[b], mask[b], 1, channels, height, width, + height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, + stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b], + grad_mask[b]); + // gradient w.r.t. input data + modulated_deformable_col2im_cuda( + columns, offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, grad_input[b]); + + // gradient w.r.t. weight, dWeight should accumulate across the batch and + // group + modulated_deformable_im2col_cuda( + input[b], offset[b], mask[b], 1, channels, height, width, height_out, + width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + dilation_h, dilation_w, deformable_group, columns); + + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + grad_weight = grad_weight.view({group, grad_weight.size(0) / group, + grad_weight.size(1), grad_weight.size(2), + grad_weight.size(3)}); + if (with_bias) + grad_bias = grad_bias.view({group, grad_bias.size(0) / group}); + + for (int g = 0; g < group; g++) { + grad_weight[g] = + grad_weight[g] + .flatten(1) + .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1)) + .view_as(grad_weight[g]); + if (with_bias) { + grad_bias[g] = + grad_bias[g] + .view({-1, 1}) + .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1})) + .view(-1); + } + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1), + grad_weight.size(2), grad_weight.size(3), + grad_weight.size(4)}); + if (with_bias) + grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)}); + } + grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1), + grad_output.size(2), grad_output.size(3), + grad_output.size(4)}); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/nms.cpp b/mmcv/mmcv/ops/csrc/pytorch/nms.cpp new file mode 100644 index 0000000..a6db461 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/nms.cpp @@ -0,0 +1,260 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold, + int offset); + +Tensor nms_cuda(Tensor boxes, Tensor scores, float iou_threshold, int offset) { + return NMSCUDAKernelLauncher(boxes, scores, iou_threshold, offset); +} +#endif + +Tensor nms_cpu(Tensor boxes, Tensor scores, float iou_threshold, int offset) { + if (boxes.numel() == 0) { + return at::empty({0}, boxes.options().dtype(at::kLong)); + } + auto x1_t = boxes.select(1, 0).contiguous(); + auto y1_t = boxes.select(1, 1).contiguous(); + auto x2_t = boxes.select(1, 2).contiguous(); + auto y2_t = boxes.select(1, 3).contiguous(); + + Tensor areas_t = (x2_t - x1_t + offset) * (y2_t - y1_t + offset); + + auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); + + auto nboxes = boxes.size(0); + Tensor select_t = at::ones({nboxes}, boxes.options().dtype(at::kBool)); + + auto select = select_t.data_ptr(); + auto order = order_t.data_ptr(); + auto x1 = x1_t.data_ptr(); + auto y1 = y1_t.data_ptr(); + auto x2 = x2_t.data_ptr(); + auto y2 = y2_t.data_ptr(); + auto areas = areas_t.data_ptr(); + + for (int64_t _i = 0; _i < nboxes; _i++) { + if (select[_i] == false) continue; + auto i = order[_i]; + auto ix1 = x1[i]; + auto iy1 = y1[i]; + auto ix2 = x2[i]; + auto iy2 = y2[i]; + auto iarea = areas[i]; + + for (int64_t _j = _i + 1; _j < nboxes; _j++) { + if (select[_j] == false) continue; + auto j = order[_j]; + auto xx1 = std::max(ix1, x1[j]); + auto yy1 = std::max(iy1, y1[j]); + auto xx2 = std::min(ix2, x2[j]); + auto yy2 = std::min(iy2, y2[j]); + + auto w = std::max(0.f, xx2 - xx1 + offset); + auto h = std::max(0.f, yy2 - yy1 + offset); + auto inter = w * h; + auto ovr = inter / (iarea + areas[j] - inter); + if (ovr >= iou_threshold) select[_j] = false; + } + } + return order_t.masked_select(select_t); +} + +Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset) { + if (boxes.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(boxes); + CHECK_CUDA_INPUT(scores); + return nms_cuda(boxes, scores, iou_threshold, offset); +#else + AT_ERROR("nms is not compiled with GPU support"); +#endif + } else { + CHECK_CPU_INPUT(boxes); + CHECK_CPU_INPUT(scores); + return nms_cpu(boxes, scores, iou_threshold, offset); + } +} + +Tensor softnms_cpu(Tensor boxes, Tensor scores, Tensor dets, + float iou_threshold, float sigma, float min_score, + int method, int offset) { + if (boxes.numel() == 0) { + return at::empty({0}, boxes.options().dtype(at::kLong)); + } + + auto x1_t = boxes.select(1, 0).contiguous(); + auto y1_t = boxes.select(1, 1).contiguous(); + auto x2_t = boxes.select(1, 2).contiguous(); + auto y2_t = boxes.select(1, 3).contiguous(); + auto scores_t = scores.clone(); + + Tensor areas_t = (x2_t - x1_t + offset) * (y2_t - y1_t + offset); + + auto nboxes = boxes.size(0); + auto x1 = x1_t.data_ptr(); + auto y1 = y1_t.data_ptr(); + auto x2 = x2_t.data_ptr(); + auto y2 = y2_t.data_ptr(); + auto sc = scores_t.data_ptr(); + auto areas = areas_t.data_ptr(); + auto de = dets.data_ptr(); + + int64_t pos = 0; + Tensor inds_t = at::arange(nboxes, boxes.options().dtype(at::kLong)); + auto inds = inds_t.data_ptr(); + + for (int64_t i = 0; i < nboxes; i++) { + auto max_score = sc[i]; + auto max_pos = i; + + pos = i + 1; + // get max box + while (pos < nboxes) { + if (max_score < sc[pos]) { + max_score = sc[pos]; + max_pos = pos; + } + pos = pos + 1; + } + // swap + auto ix1 = de[i * 5 + 0] = x1[max_pos]; + auto iy1 = de[i * 5 + 1] = y1[max_pos]; + auto ix2 = de[i * 5 + 2] = x2[max_pos]; + auto iy2 = de[i * 5 + 3] = y2[max_pos]; + auto iscore = de[i * 5 + 4] = sc[max_pos]; + auto iarea = areas[max_pos]; + auto iind = inds[max_pos]; + x1[max_pos] = x1[i]; + y1[max_pos] = y1[i]; + x2[max_pos] = x2[i]; + y2[max_pos] = y2[i]; + sc[max_pos] = sc[i]; + areas[max_pos] = areas[i]; + inds[max_pos] = inds[i]; + x1[i] = ix1; + y1[i] = iy1; + x2[i] = ix2; + y2[i] = iy2; + sc[i] = iscore; + areas[i] = iarea; + inds[i] = iind; + + pos = i + 1; + while (pos < nboxes) { + auto xx1 = std::max(ix1, x1[pos]); + auto yy1 = std::max(iy1, y1[pos]); + auto xx2 = std::min(ix2, x2[pos]); + auto yy2 = std::min(iy2, y2[pos]); + + auto w = std::max(0.f, xx2 - xx1 + offset); + auto h = std::max(0.f, yy2 - yy1 + offset); + auto inter = w * h; + auto ovr = inter / (iarea + areas[pos] - inter); + + float weight = 1.; + if (method == 0) { + if (ovr >= iou_threshold) weight = 0; + } else if (method == 1) { + if (ovr >= iou_threshold) weight = 1 - ovr; + } else if (method == 2) { + weight = std::exp(-(ovr * ovr) / sigma); + } + sc[pos] *= weight; + // if box score falls below threshold, discard the box by + // swapping with last box update N + if (sc[pos] < min_score) { + x1[pos] = x1[nboxes - 1]; + y1[pos] = y1[nboxes - 1]; + x2[pos] = x2[nboxes - 1]; + y2[pos] = y2[nboxes - 1]; + sc[pos] = sc[nboxes - 1]; + areas[pos] = areas[nboxes - 1]; + inds[pos] = inds[nboxes - 1]; + nboxes = nboxes - 1; + pos = pos - 1; + } + pos = pos + 1; + } + } + return inds_t.slice(0, 0, nboxes); +} + +Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold, + float sigma, float min_score, int method, int offset) { + if (boxes.device().is_cuda()) { + AT_ERROR("softnms is not implemented on GPU"); + } else { + return softnms_cpu(boxes, scores, dets, iou_threshold, sigma, min_score, + method, offset); + } +} + +std::vector > nms_match_cpu(Tensor dets, float iou_threshold) { + auto x1_t = dets.select(1, 0).contiguous(); + auto y1_t = dets.select(1, 1).contiguous(); + auto x2_t = dets.select(1, 2).contiguous(); + auto y2_t = dets.select(1, 3).contiguous(); + auto scores = dets.select(1, 4).contiguous(); + + at::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t); + + auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); + + auto ndets = dets.size(0); + at::Tensor suppressed_t = + at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); + + auto suppressed = suppressed_t.data_ptr(); + auto order = order_t.data_ptr(); + auto x1 = x1_t.data_ptr(); + auto y1 = y1_t.data_ptr(); + auto x2 = x2_t.data_ptr(); + auto y2 = y2_t.data_ptr(); + auto areas = areas_t.data_ptr(); + + std::vector keep; + std::vector > matched; + + for (int64_t _i = 0; _i < ndets; _i++) { + auto i = order[_i]; + if (suppressed[i] == 1) continue; + keep.push_back(i); + std::vector v_i; + auto ix1 = x1[i]; + auto iy1 = y1[i]; + auto ix2 = x2[i]; + auto iy2 = y2[i]; + auto iarea = areas[i]; + + for (int64_t _j = _i + 1; _j < ndets; _j++) { + auto j = order[_j]; + if (suppressed[j] == 1) continue; + auto xx1 = std::max(ix1, x1[j]); + auto yy1 = std::max(iy1, y1[j]); + auto xx2 = std::min(ix2, x2[j]); + auto yy2 = std::min(iy2, y2[j]); + + auto w = std::max(static_cast(0), xx2 - xx1); + auto h = std::max(static_cast(0), yy2 - yy1); + auto inter = w * h; + auto ovr = inter / (iarea + areas[j] - inter); + if (ovr >= iou_threshold) { + suppressed[j] = 1; + v_i.push_back(j); + } + } + matched.push_back(v_i); + } + for (int i = 0; i < keep.size(); i++) + matched[i].insert(matched[i].begin(), keep[i]); + return matched; +} + +std::vector > nms_match(Tensor dets, float iou_threshold) { + if (dets.device().is_cuda()) { + AT_ERROR("nms_match is not implemented on GPU"); + } else { + return nms_match_cpu(dets, iou_threshold); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/nms_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/nms_cuda.cu new file mode 100644 index 0000000..893acae --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/nms_cuda.cu @@ -0,0 +1,52 @@ +#include "nms_cuda_kernel.cuh" +#include "pytorch_cuda_helper.hpp" + +Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold, + int offset) { + at::cuda::CUDAGuard device_guard(boxes.device()); + + if (boxes.numel() == 0) { + return at::empty({0}, boxes.options().dtype(at::kLong)); + } + auto order_t = std::get<1>(scores.sort(0, /*descending=*/true)); + auto boxes_sorted = boxes.index_select(0, order_t); + + int boxes_num = boxes.size(0); + const int col_blocks = DIVUP(boxes_num, threadsPerBlock); + Tensor mask = + at::empty({boxes_num, col_blocks}, boxes.options().dtype(at::kLong)); + dim3 blocks(col_blocks, col_blocks); + dim3 threads(threadsPerBlock); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + nms_cuda<<>>( + boxes_num, iou_threshold, offset, boxes_sorted.data_ptr(), + (unsigned long long*)mask.data_ptr()); + + at::Tensor mask_cpu = mask.to(at::kCPU); + unsigned long long* mask_host = + (unsigned long long*)mask_cpu.data_ptr(); + + std::vector remv(col_blocks); + memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); + + at::Tensor keep_t = + at::zeros({boxes_num}, boxes.options().dtype(at::kBool).device(at::kCPU)); + bool* keep = keep_t.data_ptr(); + + for (int i = 0; i < boxes_num; i++) { + int nblock = i / threadsPerBlock; + int inblock = i % threadsPerBlock; + + if (!(remv[nblock] & (1ULL << inblock))) { + keep[i] = true; + // set every overlap box with bit 1 in remv + unsigned long long* p = mask_host + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) { + remv[j] |= p[j]; + } + } + } + + AT_CUDA_CHECK(cudaGetLastError()); + return order_t.masked_select(keep_t.to(at::kCUDA)); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/psamask.cpp b/mmcv/mmcv/ops/csrc/pytorch/psamask.cpp new file mode 100644 index 0000000..d5d0e56 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/psamask.cpp @@ -0,0 +1,255 @@ +// Modified from +// https://github.com/hszhao/semseg/blob/master/lib/psa/src +#include "pytorch_cpp_helper.hpp" + +#ifndef min +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#endif +#ifndef max +#define max(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +void psamask_collect_forward(const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask, const Tensor mask_data, + Tensor buffer_data) { + for (int n = 0; n < num_; n++) { + for (int h = 0; h < h_feature; h++) { + for (int w = 0; w < w_feature; w++) { + // effective mask region : [hstart, hend) x [wstart, wend) with + // mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with + // feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + buffer_data.view({-1})[(n * h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)) * + h_feature * w_feature + + h * w_feature + w] = + mask_data.view( + {-1})[((n * h_mask * w_mask + hidx * w_mask + widx) * + h_feature + + h) * + w_feature + + w]; + } + } + } + } + } +} + +void psamask_distribute_forward(const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask, const Tensor mask_data, + Tensor buffer_data) { + for (int n = 0; n < num_; n++) { + for (int h = 0; h < h_feature; h++) { + for (int w = 0; w < w_feature; w++) { + // effective mask region : [hstart, hend) x [wstart, wend) with + // mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with + // feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + buffer_data.view( + {-1})[(n * h_feature * w_feature + h * w_feature + w) * + h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)] = + mask_data.view( + {-1})[((n * h_mask * w_mask + hidx * w_mask + widx) * + h_feature + + h) * + w_feature + + w]; + } + } + } + } + } +} + +void psamask_collect_backward(const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask, const Tensor buffer_diff, + Tensor mask_diff) { + for (int n = 0; n < num_; n++) { + for (int h = 0; h < h_feature; h++) { + for (int w = 0; w < w_feature; w++) { + // effective mask region : [hstart, hend) x [wstart, wend) with + // mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with + // feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + mask_diff.view({-1})[((n * h_mask * w_mask + hidx * w_mask + widx) * + h_feature + + h) * + w_feature + + w] = + buffer_diff.view({-1})[(n * h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)) * + h_feature * w_feature + + h * w_feature + w]; + } + } + } + } + } +} + +void psamask_distribute_backward(const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask, + const Tensor buffer_diff, Tensor mask_diff) { + for (int n = 0; n < num_; n++) { + for (int h = 0; h < h_feature; h++) { + for (int w = 0; w < w_feature; w++) { + // effective mask region : [hstart, hend) x [wstart, wend) with + // mask-indexed + const int hstart = max(0, half_h_mask - h); + const int hend = min(h_mask, h_feature + half_h_mask - h); + const int wstart = max(0, half_w_mask - w); + const int wend = min(w_mask, w_feature + half_w_mask - w); + // (hidx, widx ) with mask-indexed + // (hidx + h - half_h_mask, widx + w - half_w_mask) with + // feature-indexed + for (int hidx = hstart; hidx < hend; hidx++) { + for (int widx = wstart; widx < wend; widx++) { + mask_diff.view({-1})[((n * h_mask * w_mask + hidx * w_mask + widx) * + h_feature + + h) * + w_feature + + w] = + buffer_diff.view( + {-1})[(n * h_feature * w_feature + h * w_feature + w) * + h_feature * w_feature + + (hidx + h - half_h_mask) * w_feature + + (widx + w - half_w_mask)]; + } + } + } + } + } +} + +void psamask_forward_cpu(const int psa_type, const Tensor input, Tensor output, + const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask) { + if (psa_type == 0) + psamask_collect_forward(num_, h_feature, w_feature, h_mask, w_mask, + half_h_mask, half_w_mask, input, output); + else + psamask_distribute_forward(num_, h_feature, w_feature, h_mask, w_mask, + half_h_mask, half_w_mask, input, output); +} + +void psamask_backward_cpu(const int psa_type, const Tensor grad_output, + Tensor grad_input, const int num_, + const int h_feature, const int w_feature, + const int h_mask, const int w_mask, + const int half_h_mask, const int half_w_mask) { + if (psa_type == 0) + psamask_collect_backward(num_, h_feature, w_feature, h_mask, w_mask, + half_h_mask, half_w_mask, grad_output, grad_input); + else + psamask_distribute_backward(num_, h_feature, w_feature, h_mask, w_mask, + half_h_mask, half_w_mask, grad_output, + grad_input); +} + +#ifdef MMCV_WITH_CUDA +void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input, + Tensor output, const int num_, + const int h_feature, const int w_feature, + const int h_mask, const int w_mask, + const int half_h_mask, + const int half_w_mask); + +void PSAMaskBackwardCUDAKernelLauncher( + const int psa_type, const Tensor grad_output, Tensor grad_input, + const int num_, const int h_feature, const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, const int half_w_mask); + +void psamask_forward_cuda(const int psa_type, const Tensor input, Tensor output, + const int num_, const int h_feature, + const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, + const int half_w_mask) { + PSAMaskForwardCUDAKernelLauncher(psa_type, input, output, num_, h_feature, + w_feature, h_mask, w_mask, half_h_mask, + half_w_mask); +} + +void psamask_backward_cuda(const int psa_type, const Tensor grad_output, + Tensor grad_input, const int num_, + const int h_feature, const int w_feature, + const int h_mask, const int w_mask, + const int half_h_mask, const int half_w_mask) { + PSAMaskBackwardCUDAKernelLauncher(psa_type, grad_output, grad_input, num_, + h_feature, w_feature, h_mask, w_mask, + half_h_mask, half_w_mask); +} +#endif + +void psamask_forward(const Tensor input, Tensor output, const int psa_type, + const int num_, const int h_feature, const int w_feature, + const int h_mask, const int w_mask, const int half_h_mask, + const int half_w_mask) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(output); + psamask_forward_cuda(psa_type, input, output, num_, h_feature, w_feature, + h_mask, w_mask, half_h_mask, half_w_mask); +#else + AT_ERROR("PSAMask is not compiled with GPU support"); +#endif + } else { + psamask_forward_cpu(psa_type, input, output, num_, h_feature, w_feature, + h_mask, w_mask, half_h_mask, half_w_mask); + } +} + +void psamask_backward(Tensor grad_output, const Tensor grad_input, + const int psa_type, const int num_, const int h_feature, + const int w_feature, const int h_mask, const int w_mask, + const int half_h_mask, const int half_w_mask) { + if (grad_input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(grad_input); + CHECK_CUDA_INPUT(grad_output); + psamask_backward_cuda(psa_type, grad_output, grad_input, num_, h_feature, + w_feature, h_mask, w_mask, half_h_mask, half_w_mask); +#else + AT_ERROR("PSAMask is not compiled with GPU support"); +#endif + } else { + psamask_backward_cpu(psa_type, grad_output, grad_input, num_, h_feature, + w_feature, h_mask, w_mask, half_h_mask, half_w_mask); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/psamask_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/psamask_cuda.cu new file mode 100644 index 0000000..7bea8aa --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/psamask_cuda.cu @@ -0,0 +1,62 @@ +// Modified from +// https://github.com/hszhao/semseg/blob/master/lib/psa/src + +#include +#include + +#include + +#include "psamask_cuda_kernel.cuh" +#include "pytorch_cuda_helper.hpp" + +void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input, + Tensor output, const int num_, + const int h_feature, const int w_feature, + const int h_mask, const int w_mask, + const int half_h_mask, + const int half_w_mask) { + int nthreads = num_ * h_feature * w_feature; + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + if (psa_type == 0) + AT_DISPATCH_FLOATING_TYPES( + input.scalar_type(), "psamask_collect_forward_cuda", [&] { + psamask_collect_forward_cuda<<>>( + nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask, input.data_ptr(), + output.data_ptr()); + }); + else + AT_DISPATCH_FLOATING_TYPES( + input.scalar_type(), "psamask_distribute_forward_cuda", [&] { + psamask_distribute_forward_cuda + <<>>( + nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask, input.data_ptr(), + output.data_ptr()); + }); +} + +void PSAMaskBackwardCUDAKernelLauncher( + const int psa_type, const Tensor grad_output, Tensor grad_input, + const int num_, const int h_feature, const int w_feature, const int h_mask, + const int w_mask, const int half_h_mask, const int half_w_mask) { + int nthreads = num_ * h_feature * w_feature; + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + if (psa_type == 0) + AT_DISPATCH_FLOATING_TYPES( + grad_input.scalar_type(), "psamask_collect_backward_cuda", [&] { + psamask_collect_backward_cuda<<>>( + nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask, grad_output.data_ptr(), + grad_input.data_ptr()); + }); + else + AT_DISPATCH_FLOATING_TYPES( + grad_input.scalar_type(), "psamask_distribute_backward_cuda", [&] { + psamask_distribute_backward_cuda + <<>>( + nthreads, h_feature, w_feature, h_mask, w_mask, half_h_mask, + half_w_mask, grad_output.data_ptr(), + grad_input.data_ptr()); + }); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/pybind.cpp b/mmcv/mmcv/ops/csrc/pytorch/pybind.cpp new file mode 100644 index 0000000..abf5ded --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/pybind.cpp @@ -0,0 +1,360 @@ +#include "pytorch_cpp_helper.hpp" + +std::string get_compiler_version(); +std::string get_compiling_cuda_version(); + +void carafe_naive_forward(Tensor features, Tensor masks, Tensor output, + int kernel_size, int group_size, int scale_factor); + +void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks, + Tensor bottom_grad, Tensor mask_grad, + int kernel_size, int group_size, int scale_factor); + +void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures, + Tensor routput, Tensor rmasks, Tensor output, + int kernel_size, int group_size, int scale_factor); + +void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks, + Tensor rtop_grad, Tensor rbottom_grad_hs, + Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, + Tensor mask_grad, int kernel_size, int group_size, + int scale_factor); + +void deform_conv_forward(Tensor input, Tensor weight, Tensor offset, + Tensor output, Tensor columns, Tensor ones, int kW, + int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, int im2col_step); + +void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput, + Tensor gradInput, Tensor gradOffset, + Tensor weight, Tensor columns, int kW, int kH, + int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, int im2col_step); + +void deform_conv_backward_parameters(Tensor input, Tensor offset, + Tensor gradOutput, Tensor gradWeight, + Tensor columns, Tensor ones, int kW, + int kH, int dW, int dH, int padW, int padH, + int dilationW, int dilationH, int group, + int deformable_group, float scale, + int im2col_step); + +void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset, + Tensor output, int pooled_height, int pooled_width, + float spatial_scale, int sampling_ratio, + float gamma); + +void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois, + Tensor offset, Tensor grad_input, + Tensor grad_offset, int pooled_height, + int pooled_width, float spatial_scale, + int sampling_ratio, float gamma); + +void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight, + Tensor output, float gamma, float alpha); + +void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight, + Tensor grad_input, float gamma, float alpha); + +void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight, + Tensor output, float gamma, float alpha); + +void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight, + Tensor buff, Tensor grad_input, float gamma, + float alpha); + +void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, + const int mode, const bool aligned, const int offset); + +void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx, + const Tensor mask_w_idx, Tensor col, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w); + +void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx, + const Tensor mask_w_idx, Tensor im, int height, + int width, int channels); + +void modulated_deform_conv_forward( + Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, + Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w, + const int stride_h, const int stride_w, const int pad_h, const int pad_w, + const int dilation_h, const int dilation_w, const int group, + const int deformable_group, const bool with_bias); + +void modulated_deform_conv_backward( + Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, + Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight, + Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, + int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, + const bool with_bias); + +Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset); + +Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold, + float sigma, float min_score, int method, int offset); + +std::vector > nms_match(Tensor dets, float iou_threshold); + +void roi_align_forward(Tensor input, Tensor rois, Tensor output, + Tensor argmax_y, Tensor argmax_x, int aligned_height, + int aligned_width, float spatial_scale, + int sampling_ratio, int pool_mode, bool aligned); + +void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y, + Tensor argmax_x, Tensor grad_input, int aligned_height, + int aligned_width, float spatial_scale, + int sampling_ratio, int pool_mode, bool aligned); + +void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax, + int pooled_height, int pooled_width, float spatial_scale); + +void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax, + Tensor grad_input, int pooled_height, int pooled_width, + float spatial_scale); + +void sync_bn_forward_mean(const Tensor input, Tensor mean); + +void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var); + +void sync_bn_forward_output(const Tensor input, const Tensor mean, + const Tensor var, const Tensor weight, + const Tensor bias, Tensor running_mean, + Tensor running_var, Tensor norm, Tensor std, + Tensor output, float eps, float momentum, + int group_size); + +void sync_bn_backward_param(const Tensor grad_output, const Tensor norm, + Tensor grad_weight, Tensor grad_bias); + +void sync_bn_backward_data(const Tensor grad_output, const Tensor weight, + const Tensor grad_weight, const Tensor grad_bias, + const Tensor norm, const Tensor std, + Tensor grad_input); + +void ca_forward(const Tensor t, const Tensor f, Tensor weight); + +void ca_backward(const Tensor dw, const Tensor t, const Tensor f, Tensor dt, + Tensor df); + +void ca_map_forward(const Tensor weight, const Tensor g, Tensor out); + +void ca_map_backward(const Tensor dout, const Tensor weight, const Tensor g, + Tensor dw, Tensor dg); + +void psamask_forward(const Tensor input, Tensor output, const int psa_type, + const int num_, const int h_feature, const int w_feature, + const int h_mask, const int w_mask, const int half_h_mask, + const int half_w_mask); + +void psamask_backward(Tensor grad_output, const Tensor grad_input, + const int psa_type, const int num_, const int h_feature, + const int w_feature, const int h_mask, const int w_mask, + const int half_h_mask, const int half_w_mask); + +void tin_shift_forward(Tensor input, Tensor shift, Tensor output); + +void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input); + +Tensor bottom_pool_forward(Tensor input); + +Tensor bottom_pool_backward(Tensor input, Tensor grad_output); + +Tensor left_pool_forward(Tensor input); + +Tensor left_pool_backward(Tensor input, Tensor grad_output); + +Tensor right_pool_forward(Tensor input); + +Tensor right_pool_backward(Tensor input, Tensor grad_output); + +Tensor top_pool_forward(Tensor input); + +Tensor top_pool_backward(Tensor input, Tensor grad_output); + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); + m.def("get_compiling_cuda_version", &get_compiling_cuda_version, + "get_compiling_cuda_version"); + m.def("carafe_naive_forward", &carafe_naive_forward, "carafe_naive_forward", + py::arg("features"), py::arg("masks"), py::arg("output"), + py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor")); + m.def("carafe_naive_backward", &carafe_naive_backward, + "carafe_naive_backward", py::arg("top_grad"), py::arg("features"), + py::arg("masks"), py::arg("bottom_grad"), py::arg("mask_grad"), + py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor")); + m.def("carafe_forward", &carafe_forward, "carafe_forward", + py::arg("features"), py::arg("masks"), py::arg("rfeatures"), + py::arg("routput"), py::arg("rmasks"), py::arg("output"), + py::arg("kernel_size"), py::arg("group_size"), py::arg("scale_factor")); + m.def("carafe_backward", &carafe_backward, "carafe_backward", + py::arg("top_grad"), py::arg("rfeatures"), py::arg("masks"), + py::arg("rtop_grad"), py::arg("rbottom_grad_hs"), + py::arg("rbottom_grad"), py::arg("rmask_grad"), py::arg("bottom_grad"), + py::arg("mask_grad"), py::arg("kernel_size"), py::arg("group_size"), + py::arg("scale_factor")); + m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward", + py::arg("input"), py::arg("weight"), py::arg("offset"), + py::arg("output"), py::arg("columns"), py::arg("ones"), py::arg("kW"), + py::arg("kH"), py::arg("dW"), py::arg("dH"), py::arg("padH"), + py::arg("padW"), py::arg("dilationW"), py::arg("dilationH"), + py::arg("group"), py::arg("deformable_group"), py::arg("im2col_step")); + m.def("deform_conv_backward_input", &deform_conv_backward_input, + "deform_conv_backward_input", py::arg("input"), py::arg("offset"), + py::arg("gradOutput"), py::arg("gradInput"), py::arg("gradOffset"), + py::arg("weight"), py::arg("columns"), py::arg("kW"), py::arg("kH"), + py::arg("dW"), py::arg("dH"), py::arg("padH"), py::arg("padW"), + py::arg("dilationW"), py::arg("dilationH"), py::arg("group"), + py::arg("deformable_group"), py::arg("im2col_step")); + m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, + "deform_conv_backward_parameters", py::arg("input"), py::arg("offset"), + py::arg("gradOutput"), py::arg("gradWeight"), py::arg("columns"), + py::arg("ones"), py::arg("kW"), py::arg("kH"), py::arg("dW"), + py::arg("dH"), py::arg("padH"), py::arg("padW"), py::arg("dilationW"), + py::arg("dilationH"), py::arg("group"), py::arg("deformable_group"), + py::arg("scale"), py::arg("im2col_step")); + m.def("deform_roi_pool_forward", &deform_roi_pool_forward, + "deform roi pool forward", py::arg("input"), py::arg("rois"), + py::arg("offset"), py::arg("output"), py::arg("pooled_height"), + py::arg("pooled_width"), py::arg("spatial_scale"), + py::arg("sampling_ratio"), py::arg("gamma")); + m.def("deform_roi_pool_backward", &deform_roi_pool_backward, + "deform roi pool backward", py::arg("grad_output"), py::arg("input"), + py::arg("rois"), py::arg("offset"), py::arg("grad_input"), + py::arg("grad_offset"), py::arg("pooled_height"), + py::arg("pooled_width"), py::arg("spatial_scale"), + py::arg("sampling_ratio"), py::arg("gamma")); + m.def("sigmoid_focal_loss_forward", &sigmoid_focal_loss_forward, + "sigmoid_focal_loss_forward ", py::arg("input"), py::arg("target"), + py::arg("weight"), py::arg("output"), py::arg("gamma"), + py::arg("alpha")); + m.def("sigmoid_focal_loss_backward", &sigmoid_focal_loss_backward, + "sigmoid_focal_loss_backward", py::arg("input"), py::arg("target"), + py::arg("weight"), py::arg("grad_input"), py::arg("gamma"), + py::arg("alpha")); + m.def("softmax_focal_loss_forward", &softmax_focal_loss_forward, + "softmax_focal_loss_forward", py::arg("input"), py::arg("target"), + py::arg("weight"), py::arg("output"), py::arg("gamma"), + py::arg("alpha")); + m.def("softmax_focal_loss_backward", &softmax_focal_loss_backward, + "softmax_focal_loss_backward", py::arg("input"), py::arg("target"), + py::arg("weight"), py::arg("buff"), py::arg("grad_input"), + py::arg("gamma"), py::arg("alpha")); + m.def("bbox_overlaps", &bbox_overlaps, "bbox_overlaps", py::arg("bboxes1"), + py::arg("bboxes2"), py::arg("ious"), py::arg("mode"), + py::arg("aligned"), py::arg("offset")); + m.def("masked_im2col_forward", &masked_im2col_forward, + "masked_im2col_forward", py::arg("im"), py::arg("mask_h_idx"), + py::arg("mask_w_idx"), py::arg("col"), py::arg("kernel_h"), + py::arg("kernel_w"), py::arg("pad_h"), py::arg("pad_w")); + m.def("masked_col2im_forward", &masked_col2im_forward, + "masked_col2im_forward", py::arg("col"), py::arg("mask_h_idx"), + py::arg("mask_w_idx"), py::arg("im"), py::arg("height"), + py::arg("width"), py::arg("channels")); + m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, + "modulated deform conv forward", py::arg("input"), py::arg("weight"), + py::arg("bias"), py::arg("ones"), py::arg("offset"), py::arg("mask"), + py::arg("output"), py::arg("columns"), py::arg("kernel_h"), + py::arg("kernel_w"), py::arg("stride_h"), py::arg("stride_w"), + py::arg("pad_h"), py::arg("pad_w"), py::arg("dilation_h"), + py::arg("dilation_w"), py::arg("group"), py::arg("deformable_group"), + py::arg("with_bias")); + m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, + "modulated deform conv backward", py::arg("input"), py::arg("weight"), + py::arg("bias"), py::arg("ones"), py::arg("offset"), py::arg("mask"), + py::arg("columns"), py::arg("grad_input"), py::arg("grad_weight"), + py::arg("grad_bias"), py::arg("grad_offset"), py::arg("grad_mask"), + py::arg("grad_output"), py::arg("kernel_h"), py::arg("kernel_w"), + py::arg("stride_h"), py::arg("stride_w"), py::arg("pad_h"), + py::arg("pad_w"), py::arg("dilation_h"), py::arg("dilation_w"), + py::arg("group"), py::arg("deformable_group"), py::arg("with_bias")); + m.def("nms", &nms, "nms (CPU/CUDA) ", py::arg("boxes"), py::arg("scores"), + py::arg("iou_threshold"), py::arg("offset")); + m.def("softnms", &softnms, "softnms (CPU) ", py::arg("boxes"), + py::arg("scores"), py::arg("dets"), py::arg("iou_threshold"), + py::arg("sigma"), py::arg("min_score"), py::arg("method"), + py::arg("offset")); + m.def("nms_match", &nms_match, "nms_match (CPU) ", py::arg("dets"), + py::arg("iou_threshold")); + m.def("roi_align_forward", &roi_align_forward, "roi_align forward", + py::arg("input"), py::arg("rois"), py::arg("output"), + py::arg("argmax_y"), py::arg("argmax_x"), py::arg("aligned_height"), + py::arg("aligned_width"), py::arg("spatial_scale"), + py::arg("sampling_ratio"), py::arg("pool_mode"), py::arg("aligned")); + m.def("roi_align_backward", &roi_align_backward, "roi_align backward", + py::arg("grad_output"), py::arg("rois"), py::arg("argmax_y"), + py::arg("argmax_x"), py::arg("grad_input"), py::arg("aligned_height"), + py::arg("aligned_width"), py::arg("spatial_scale"), + py::arg("sampling_ratio"), py::arg("pool_mode"), py::arg("aligned")); + m.def("roi_pool_forward", &roi_pool_forward, "roi_pool forward", + py::arg("input"), py::arg("rois"), py::arg("output"), py::arg("argmax"), + py::arg("pooled_height"), py::arg("pooled_width"), + py::arg("spatial_scale")); + m.def("roi_pool_backward", &roi_pool_backward, "roi_pool backward", + py::arg("grad_output"), py::arg("rois"), py::arg("argmax"), + py::arg("grad_input"), py::arg("pooled_height"), + py::arg("pooled_width"), py::arg("spatial_scale")); + m.def("sync_bn_forward_mean", &sync_bn_forward_mean, "sync_bn forward_mean", + py::arg("input"), py::arg("mean")); + m.def("sync_bn_forward_var", &sync_bn_forward_var, "sync_bn forward_var", + py::arg("input"), py::arg("mean"), py::arg("var")); + m.def("sync_bn_forward_output", &sync_bn_forward_output, + "sync_bn forward_output", py::arg("input"), py::arg("mean"), + py::arg("var"), py::arg("weight"), py::arg("bias"), + py::arg("running_mean"), py::arg("running_var"), py::arg("norm"), + py::arg("std"), py::arg("output"), py::arg("eps"), py::arg("momentum"), + py::arg("group_size")); + m.def("sync_bn_backward_param", &sync_bn_backward_param, + "sync_bn backward_param", py::arg("grad_output"), py::arg("norm"), + py::arg("grad_weight"), py::arg("grad_bias")); + m.def("sync_bn_backward_data", &sync_bn_backward_data, + "sync_bn backward_data", py::arg("grad_output"), py::arg("weight"), + py::arg("grad_weight"), py::arg("grad_bias"), py::arg("norm"), + py::arg("std"), py::arg("grad_input")); + m.def("ca_forward", &ca_forward, "ccattention forward", py::arg("t"), + py::arg("f"), py::arg("weight")); + m.def("ca_backward", &ca_backward, "ccattention backward", py::arg("dw"), + py::arg("t"), py::arg("f"), py::arg("dt"), py::arg("df")); + m.def("ca_map_forward", &ca_map_forward, "ccattention map forward", + py::arg("weight"), py::arg("g"), py::arg("out")); + m.def("ca_map_backward", &ca_map_backward, "ccattention map backward", + py::arg("dout"), py::arg("weight"), py::arg("g"), py::arg("dw"), + py::arg("dg")); + m.def("psamask_forward", &psamask_forward, "PSAMASK forward (CPU/CUDA)", + py::arg("input"), py::arg("output"), py::arg("psa_type"), + py::arg("num_"), py::arg("h_feature"), py::arg("w_feature"), + py::arg("h_mask"), py::arg("w_mask"), py::arg("half_h_mask"), + py::arg("half_w_mask")); + m.def("psamask_backward", &psamask_backward, "PSAMASK backward (CPU/CUDA)", + py::arg("grad_output"), py::arg("grad_input"), py::arg("psa_type"), + py::arg("num_"), py::arg("h_feature"), py::arg("w_feature"), + py::arg("h_mask"), py::arg("w_mask"), py::arg("half_h_mask"), + py::arg("half_w_mask")); + m.def("tin_shift_forward", &tin_shift_forward, "tin_shift forward", + py::arg("input"), py::arg("shift"), py::arg("output")); + m.def("tin_shift_backward", &tin_shift_backward, "tin_shift backward", + py::arg("grad_output"), py::arg("shift"), py::arg("grad_input")); + m.def("bottom_pool_forward", &bottom_pool_forward, "Bottom Pool Forward", + py::arg("input"), py::call_guard()); + m.def("bottom_pool_backward", &bottom_pool_backward, "Bottom Pool Backward", + py::arg("input"), py::arg("grad_output"), + py::call_guard()); + m.def("left_pool_forward", &left_pool_forward, "Left Pool Forward", + py::arg("input"), py::call_guard()); + m.def("left_pool_backward", &left_pool_backward, "Left Pool Backward", + py::arg("input"), py::arg("grad_output"), + py::call_guard()); + m.def("right_pool_forward", &right_pool_forward, "Right Pool Forward", + py::arg("input"), py::call_guard()); + m.def("right_pool_backward", &right_pool_backward, "Right Pool Backward", + py::arg("input"), py::arg("grad_output"), + py::call_guard()); + m.def("top_pool_forward", &top_pool_forward, "Top Pool Forward", + py::arg("input"), py::call_guard()); + m.def("top_pool_backward", &top_pool_backward, "Top Pool Backward", + py::arg("input"), py::arg("grad_output"), + py::call_guard()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/roi_align.cpp b/mmcv/mmcv/ops/csrc/pytorch/roi_align.cpp new file mode 100644 index 0000000..b79fafb --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/roi_align.cpp @@ -0,0 +1,129 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, + Tensor argmax_y, Tensor argmax_x, + int aligned_height, int aligned_width, + float spatial_scale, int sampling_ratio, + int pool_mode, bool aligned); + +void ROIAlignBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois, + Tensor argmax_y, Tensor argmax_x, + Tensor grad_input, int aligned_height, + int aligned_width, float spatial_scale, + int sampling_ratio, int pool_mode, + bool aligned); + +void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output, + Tensor argmax_y, Tensor argmax_x, + int aligned_height, int aligned_width, + float spatial_scale, int sampling_ratio, + int pool_mode, bool aligned) { + ROIAlignForwardCUDAKernelLauncher( + input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width, + spatial_scale, sampling_ratio, pool_mode, aligned); +} + +void roi_align_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax_y, + Tensor argmax_x, Tensor grad_input, + int aligned_height, int aligned_width, + float spatial_scale, int sampling_ratio, + int pool_mode, bool aligned) { + ROIAlignBackwardCUDAKernelLauncher( + grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height, + aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned); +} +#endif + +void ROIAlignForwardCPULauncher(Tensor input, Tensor rois, Tensor output, + Tensor argmax_y, Tensor argmax_x, + int aligned_height, int aligned_width, + float spatial_scale, int sampling_ratio, + int pool_mode, bool aligned); + +void ROIAlignBackwardCPULauncher(Tensor grad_output, Tensor rois, + Tensor argmax_y, Tensor argmax_x, + Tensor grad_input, int aligned_height, + int aligned_width, float spatial_scale, + int sampling_ratio, int pool_mode, + bool aligned); + +void roi_align_forward_cpu(Tensor input, Tensor rois, Tensor output, + Tensor argmax_y, Tensor argmax_x, int aligned_height, + int aligned_width, float spatial_scale, + int sampling_ratio, int pool_mode, bool aligned) { + ROIAlignForwardCPULauncher(input, rois, output, argmax_y, argmax_x, + aligned_height, aligned_width, spatial_scale, + sampling_ratio, pool_mode, aligned); +} + +void roi_align_backward_cpu(Tensor grad_output, Tensor rois, Tensor argmax_y, + Tensor argmax_x, Tensor grad_input, + int aligned_height, int aligned_width, + float spatial_scale, int sampling_ratio, + int pool_mode, bool aligned) { + ROIAlignBackwardCPULauncher(grad_output, rois, argmax_y, argmax_x, grad_input, + aligned_height, aligned_width, spatial_scale, + sampling_ratio, pool_mode, aligned); +} + +void roi_align_forward(Tensor input, Tensor rois, Tensor output, + Tensor argmax_y, Tensor argmax_x, int aligned_height, + int aligned_width, float spatial_scale, + int sampling_ratio, int pool_mode, bool aligned) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(rois); + CHECK_CUDA_INPUT(output); + CHECK_CUDA_INPUT(argmax_y); + CHECK_CUDA_INPUT(argmax_x); + + roi_align_forward_cuda(input, rois, output, argmax_y, argmax_x, + aligned_height, aligned_width, spatial_scale, + sampling_ratio, pool_mode, aligned); +#else + AT_ERROR("RoIAlign is not compiled with GPU support"); +#endif + } else { + CHECK_CPU_INPUT(input); + CHECK_CPU_INPUT(rois); + CHECK_CPU_INPUT(output); + CHECK_CPU_INPUT(argmax_y); + CHECK_CPU_INPUT(argmax_x); + roi_align_forward_cpu(input, rois, output, argmax_y, argmax_x, + aligned_height, aligned_width, spatial_scale, + sampling_ratio, pool_mode, aligned); + } +} + +void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y, + Tensor argmax_x, Tensor grad_input, int aligned_height, + int aligned_width, float spatial_scale, + int sampling_ratio, int pool_mode, bool aligned) { + if (grad_output.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(grad_output); + CHECK_CUDA_INPUT(rois); + CHECK_CUDA_INPUT(argmax_y); + CHECK_CUDA_INPUT(argmax_x); + CHECK_CUDA_INPUT(grad_input); + + roi_align_backward_cuda(grad_output, rois, argmax_y, argmax_x, grad_input, + aligned_height, aligned_width, spatial_scale, + sampling_ratio, pool_mode, aligned); +#else + AT_ERROR("RoIAlign is not compiled with GPU support"); +#endif + } else { + CHECK_CPU_INPUT(grad_output); + CHECK_CPU_INPUT(rois); + CHECK_CPU_INPUT(argmax_y); + CHECK_CPU_INPUT(argmax_x); + CHECK_CPU_INPUT(grad_input); + + roi_align_backward_cpu(grad_output, rois, argmax_y, argmax_x, grad_input, + aligned_height, aligned_width, spatial_scale, + sampling_ratio, pool_mode, aligned); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/roi_align_cpu.cpp b/mmcv/mmcv/ops/csrc/pytorch/roi_align_cpu.cpp new file mode 100644 index 0000000..ac64eb3 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/roi_align_cpu.cpp @@ -0,0 +1,430 @@ +// Modified from +// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlign +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#include +#include + +#include "../pytorch_cpp_helper.hpp" + +// implementation taken from Caffe2 +template +struct PreCalc { + int pos1; + int pos2; + int pos3; + int pos4; + T w1; + T w2; + T w3; + T w4; +}; + +template +void pre_calc_for_bilinear_interpolate( + const int height, const int width, const int pooled_height, + const int pooled_width, const int iy_upper, const int ix_upper, + T roi_start_h, T roi_start_w, T bin_size_h, T bin_size_w, + int roi_bin_grid_h, int roi_bin_grid_w, std::vector>& pre_calc) { + int pre_calc_index = 0; + for (int ph = 0; ph < pooled_height; ph++) { + for (int pw = 0; pw < pooled_width; pw++) { + for (int iy = 0; iy < iy_upper; iy++) { + const T yy = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < ix_upper; ix++) { + const T xx = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + T x = xx; + T y = yy; + // deal with: inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + PreCalc pc; + pc.pos1 = 0; + pc.pos2 = 0; + pc.pos3 = 0; + pc.pos4 = 0; + pc.w1 = 0; + pc.w2 = 0; + pc.w3 = 0; + pc.w4 = 0; + pre_calc[pre_calc_index] = pc; + pre_calc_index += 1; + continue; + } + + if (y <= 0) { + y = 0; + } + if (x <= 0) { + x = 0; + } + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + // save weights and indices + PreCalc pc; + pc.pos1 = y_low * width + x_low; + pc.pos2 = y_low * width + x_high; + pc.pos3 = y_high * width + x_low; + pc.pos4 = y_high * width + x_high; + pc.w1 = w1; + pc.w2 = w2; + pc.w3 = w3; + pc.w4 = w4; + pre_calc[pre_calc_index] = pc; + + pre_calc_index += 1; + } + } + } + } +} + +template +void ROIAlignForward(const int nthreads, const T* input, const T* rois, + T* output, T* argmax_y, T* argmax_x, + const int pooled_height, const int pooled_width, + const T spatial_scale, const int sampling_ratio, + const int pool_mode, // 0 - max pool, 1 - avg pool + const bool aligned, const int channels, const int height, + const int width) { + int n_rois = nthreads / channels / pooled_width / pooled_height; + // (n, c, ph, pw) is an element in the pooled output + // can be parallelized using omp + // #pragma omp parallel for num_threads(32) + for (int n = 0; n < n_rois; n++) { + int index_n = n * channels * pooled_width * pooled_height; + + const T* offset_rois = rois + n * 5; + int roi_batch_ind = offset_rois[0]; + + // Do not use rounding; this implementation detail is critical + T offset = aligned ? (T)0.5 : (T)0.0; + T roi_start_w = offset_rois[1] * spatial_scale - offset; + T roi_start_h = offset_rois[2] * spatial_scale - offset; + T roi_end_w = offset_rois[3] * spatial_scale - offset; + T roi_end_h = offset_rois[4] * spatial_scale - offset; + + T roi_width = roi_end_w - roi_start_w; + T roi_height = roi_end_h - roi_start_h; + if (aligned) { + AT_ASSERTM(roi_width >= 0 && roi_height >= 0, + "ROIs in ROIAlign cannot have non-negative size!"); + } else { // for backward-compatibility only + roi_width = std::max(roi_width, (T)1.); + roi_height = std::max(roi_height, (T)1.); + } + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // When the grid is empty, output zeros == 0/1, instead of NaN. + const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 + + // we want to precalculate indices and weights shared by all channels, + // this is the key point of optimization + std::vector> pre_calc(roi_bin_grid_h * roi_bin_grid_w * + pooled_width * pooled_height); + pre_calc_for_bilinear_interpolate( + height, width, pooled_height, pooled_width, roi_bin_grid_h, + roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w, + roi_bin_grid_h, roi_bin_grid_w, pre_calc); + + for (int c = 0; c < channels; c++) { + int index_n_c = index_n + c * pooled_width * pooled_height; + const T* offset_input = + input + (roi_batch_ind * channels + c) * height * width; + int pre_calc_index = 0; + + for (int ph = 0; ph < pooled_height; ph++) { + for (int pw = 0; pw < pooled_width; pw++) { + int index = index_n_c + ph * pooled_width + pw; + + T output_val = 0.; + T maxval = -10000; + T maxidx_y = -1.f, maxidx_x = -1.f; + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + const T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + PreCalc pc = pre_calc[pre_calc_index]; + T val = pc.w1 * offset_input[pc.pos1] + + pc.w2 * offset_input[pc.pos2] + + pc.w3 * offset_input[pc.pos3] + + pc.w4 * offset_input[pc.pos4]; + if (val > maxval) { + maxval = val; + maxidx_y = y; + maxidx_x = x; + } + output_val += val; + pre_calc_index += 1; + } + } + if (pool_mode == 0) { + // We do max pooling inside a bin + output[index] = maxval; + argmax_y[index] = maxidx_y; + argmax_x[index] = maxidx_x; + } else if (pool_mode == 1) { + // We do average (integral) pooling inside a bin + output[index] = output_val / count; + } // if + } // for pw + } // for ph + } // for c + } // for n +} + +template +void bilinear_interpolate_gradient(const int height, const int width, T y, T x, + T& w1, T& w2, T& w3, T& w4, int& x_low, + int& x_high, int& y_low, int& y_high, + const int index /* index for debug only*/) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + w1 = w2 = w3 = w4 = 0.; + x_low = x_high = y_low = y_high = -1; + return; + } + + if (y <= 0) y = 0; + if (x <= 0) x = 0; + + y_low = (int)y; + x_low = (int)x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + + // reference in forward + // T v1 = input[y_low * width + x_low]; + // T v2 = input[y_low * width + x_high]; + // T v3 = input[y_high * width + x_low]; + // T v4 = input[y_high * width + x_high]; + // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + return; +} + +template +inline void add(T* address, const T& val) { + *address += val; +} + +template +void ROIAlignBackward(const int nthreads, const T* grad_output, const T* rois, + const T* argmax_y, const T* argmax_x, T* grad_input, + const int pooled_height, const int pooled_width, + const T spatial_scale, const int sampling_ratio, + const int pool_mode, // 0 - max pool, 1 - avg pool + const bool aligned, const int channels, const int height, + const int width, const int n_stride, const int c_stride, + const int h_stride, const int w_stride) { + for (int index = 0; index < nthreads; index++) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_rois = rois + n * 5; + int roi_batch_ind = offset_rois[0]; + + // Do not use rounding; this implementation detail is critical + T offset = aligned ? (T)0.5 : (T)0.0; + T roi_start_w = offset_rois[1] * spatial_scale - offset; + T roi_start_h = offset_rois[2] * spatial_scale - offset; + T roi_end_w = offset_rois[3] * spatial_scale - offset; + T roi_end_h = offset_rois[4] * spatial_scale - offset; + + T roi_width = roi_end_w - roi_start_w; + T roi_height = roi_end_h - roi_start_h; + if (aligned) { + AT_ASSERTM(roi_width >= 0 && roi_height >= 0, + "ROIs in ROIAlign do not have non-negative size!"); + } else { // for backward-compatibility only + roi_width = std::max(roi_width, (T)1.); + roi_height = std::max(roi_height, (T)1.); + } + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + T* offset_grad_input = + grad_input + ((roi_batch_ind * channels + c) * height * width); + + int output_offset = n * n_stride + c * c_stride; + const T* offset_grad_output = grad_output + output_offset; + const T grad_output_this_bin = + offset_grad_output[ph * h_stride + pw * w_stride]; + + if (pool_mode == 0) { + // We do max pooling inside a bin + T y = argmax_y[index], x = argmax_x[index]; + if (y != -1.f) { + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, + x_low, x_high, y_low, y_high, index); + + T g1 = grad_output_this_bin * w1; + T g2 = grad_output_this_bin * w2; + T g3 = grad_output_this_bin * w3; + T g4 = grad_output_this_bin * w4; + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + // atomic add is not needed for now since it is single threaded + add(offset_grad_input + y_low * width + x_low, static_cast(g1)); + add(offset_grad_input + y_low * width + x_high, static_cast(g2)); + add(offset_grad_input + y_high * width + x_low, static_cast(g3)); + add(offset_grad_input + y_high * width + x_high, static_cast(g4)); + } // if + } // mode + } else if (pool_mode == 1) { + // We do average (integral) pooling inside a bin + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_width / pooled_width); + + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + const T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + + bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, + x_low, x_high, y_low, y_high, index); + + T g1 = grad_output_this_bin * w1 / count; + T g2 = grad_output_this_bin * w2 / count; + T g3 = grad_output_this_bin * w3 / count; + T g4 = grad_output_this_bin * w4 / count; + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + // atomic add is not needed for now since it is single threaded + add(offset_grad_input + y_low * width + x_low, static_cast(g1)); + add(offset_grad_input + y_low * width + x_high, static_cast(g2)); + add(offset_grad_input + y_high * width + x_low, static_cast(g3)); + add(offset_grad_input + y_high * width + x_high, + static_cast(g4)); + } // if + } // ix + } // iy + } // mode + } // for +} // ROIAlignBackward + +void ROIAlignForwardCPULauncher(Tensor input, Tensor rois, Tensor output, + Tensor argmax_y, Tensor argmax_x, + int aligned_height, int aligned_width, + float spatial_scale, int sampling_ratio, + int pool_mode, bool aligned) { + int output_size = output.numel(); + int channels = input.size(1); + int height = input.size(2); + int width = input.size(3); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "ROIAlign_forward", [&] { + ROIAlignForward( + output_size, input.data_ptr(), rois.data_ptr(), + output.data_ptr(), argmax_y.data_ptr(), + argmax_x.data_ptr(), aligned_height, aligned_width, + static_cast(spatial_scale), sampling_ratio, pool_mode, + aligned, channels, height, width); + }); +} + +void ROIAlignBackwardCPULauncher(Tensor grad_output, Tensor rois, + Tensor argmax_y, Tensor argmax_x, + Tensor grad_input, int aligned_height, + int aligned_width, float spatial_scale, + int sampling_ratio, int pool_mode, + bool aligned) { + int output_size = grad_output.numel(); + int channels = grad_input.size(1); + int height = grad_input.size(2); + int width = grad_input.size(3); + + // get stride values to ensure indexing into gradients is correct. + int n_stride = grad_output.stride(0); + int c_stride = grad_output.stride(1); + int h_stride = grad_output.stride(2); + int w_stride = grad_output.stride(3); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_output.scalar_type(), "ROIAlign_backward", [&] { + ROIAlignBackward( + output_size, grad_output.data_ptr(), + rois.data_ptr(), argmax_y.data_ptr(), + argmax_x.data_ptr(), grad_input.data_ptr(), + aligned_height, aligned_width, static_cast(spatial_scale), + sampling_ratio, pool_mode, aligned, channels, height, width, + n_stride, c_stride, h_stride, w_stride); + }); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/roi_align_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/roi_align_cuda.cu new file mode 100644 index 0000000..822e918 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/roi_align_cuda.cu @@ -0,0 +1,57 @@ +#include "pytorch_cuda_helper.hpp" +#include "roi_align_cuda_kernel.cuh" + +void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, + Tensor argmax_y, Tensor argmax_x, + int aligned_height, int aligned_width, + float spatial_scale, int sampling_ratio, + int pool_mode, bool aligned) { + int output_size = output.numel(); + int channels = input.size(1); + int height = input.size(2); + int width = input.size(3); + + at::cuda::CUDAGuard device_guard(input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "roi_align_forward_cuda_kernel", [&] { + roi_align_forward_cuda_kernel + <<>>( + output_size, input.data_ptr(), + rois.data_ptr(), output.data_ptr(), + argmax_y.data_ptr(), argmax_x.data_ptr(), + aligned_height, aligned_width, + static_cast(spatial_scale), sampling_ratio, pool_mode, + aligned, channels, height, width); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} + +void ROIAlignBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois, + Tensor argmax_y, Tensor argmax_x, + Tensor grad_input, int aligned_height, + int aligned_width, float spatial_scale, + int sampling_ratio, int pool_mode, + bool aligned) { + int output_size = grad_output.numel(); + int channels = grad_input.size(1); + int height = grad_input.size(2); + int width = grad_input.size(3); + + at::cuda::CUDAGuard device_guard(grad_output.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_output.scalar_type(), "roi_align_backward_cuda_kernel", [&] { + roi_align_backward_cuda_kernel + <<>>( + output_size, grad_output.data_ptr(), + rois.data_ptr(), argmax_y.data_ptr(), + argmax_x.data_ptr(), grad_input.data_ptr(), + aligned_height, aligned_width, + static_cast(spatial_scale), sampling_ratio, pool_mode, + aligned, channels, height, width); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/roi_pool.cpp b/mmcv/mmcv/ops/csrc/pytorch/roi_pool.cpp new file mode 100644 index 0000000..e412705 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/roi_pool.cpp @@ -0,0 +1,66 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, + Tensor argmax, int pooled_height, + int pooled_width, float spatial_scale); + +void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois, + Tensor argmax, Tensor grad_input, + int pooled_height, int pooled_width, + float spatial_scale); + +void roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output, + Tensor argmax, int pooled_height, int pooled_width, + float spatial_scale) { + ROIPoolForwardCUDAKernelLauncher(input, rois, output, argmax, pooled_height, + pooled_width, spatial_scale); +} + +void roi_pool_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax, + Tensor grad_input, int pooled_height, + int pooled_width, float spatial_scale) { + ROIPoolBackwardCUDAKernelLauncher(grad_output, rois, argmax, grad_input, + pooled_height, pooled_width, spatial_scale); +} +#endif + +void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax, + int pooled_height, int pooled_width, + float spatial_scale) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(rois); + CHECK_CUDA_INPUT(output); + CHECK_CUDA_INPUT(argmax); + + roi_pool_forward_cuda(input, rois, output, argmax, pooled_height, + pooled_width, spatial_scale); +#else + AT_ERROR("RoIPool is not compiled with GPU support"); +#endif + } else { + AT_ERROR("RoIPool is not implemented on CPU"); + } +} + +void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax, + Tensor grad_input, int pooled_height, int pooled_width, + float spatial_scale) { + if (grad_output.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(grad_output); + CHECK_CUDA_INPUT(rois); + CHECK_CUDA_INPUT(argmax); + CHECK_CUDA_INPUT(grad_input); + + roi_pool_backward_cuda(grad_output, rois, argmax, grad_input, pooled_height, + pooled_width, spatial_scale); +#else + AT_ERROR("RoIPool is not compiled with GPU support"); +#endif + } else { + AT_ERROR("RoIPool is not implemented on CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/roi_pool_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/roi_pool_cuda.cu new file mode 100644 index 0000000..313f1d7 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/roi_pool_cuda.cu @@ -0,0 +1,49 @@ +#include "pytorch_cuda_helper.hpp" +#include "roi_pool_cuda_kernel.cuh" + +void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, + Tensor argmax, int pooled_height, + int pooled_width, float spatial_scale) { + int output_size = output.numel(); + int channels = input.size(1); + int height = input.size(2); + int width = input.size(3); + + at::cuda::CUDAGuard device_guard(input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "roi_pool_forward_cuda_kernel", [&] { + roi_pool_forward_cuda_kernel + <<>>( + output_size, input.data_ptr(), + rois.data_ptr(), output.data_ptr(), + argmax.data_ptr(), pooled_height, pooled_width, + static_cast(spatial_scale), channels, height, width); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} + +void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois, + Tensor argmax, Tensor grad_input, + int pooled_height, int pooled_width, + float spatial_scale) { + int output_size = grad_output.numel(); + int channels = grad_input.size(1); + int height = grad_input.size(2); + int width = grad_input.size(3); + + at::cuda::CUDAGuard device_guard(grad_output.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_output.scalar_type(), "roi_pool_backward_cuda_kernel", [&] { + roi_pool_backward_cuda_kernel + <<>>( + output_size, grad_output.data_ptr(), + rois.data_ptr(), argmax.data_ptr(), + grad_input.data_ptr(), pooled_height, pooled_width, + channels, height, width); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/sync_bn.cpp b/mmcv/mmcv/ops/csrc/pytorch/sync_bn.cpp new file mode 100644 index 0000000..b8b29a8 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/sync_bn.cpp @@ -0,0 +1,158 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean); + +void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean, + Tensor var); + +void SyncBNForwardOutputCUDAKernelLauncher( + const Tensor input, const Tensor mean, const Tensor var, + Tensor running_mean, Tensor running_var, const Tensor weight, + const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, + float momentum, int group_size); + +void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output, + const Tensor norm, + Tensor grad_weight, + Tensor grad_bias); + +void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output, + const Tensor weight, + const Tensor grad_weight, + const Tensor grad_bias, + const Tensor norm, const Tensor std, + Tensor grad_input); + +void sync_bn_forward_mean_cuda(const Tensor input, Tensor mean) { + SyncBNForwardMeanCUDAKernelLauncher(input, mean); +} + +void sync_bn_forward_var_cuda(const Tensor input, const Tensor mean, + Tensor var) { + SyncBNForwardVarCUDAKernelLauncher(input, mean, var); +} + +void sync_bn_forward_output_cuda(const Tensor input, const Tensor mean, + const Tensor var, Tensor running_mean, + Tensor running_var, const Tensor weight, + const Tensor bias, Tensor norm, Tensor std, + Tensor output, float eps, float momentum, + int group_size) { + SyncBNForwardOutputCUDAKernelLauncher(input, mean, var, running_mean, + running_var, weight, bias, norm, std, + output, eps, momentum, group_size); +} + +void sync_bn_backward_param_cuda(const Tensor grad_output, const Tensor norm, + Tensor grad_weight, Tensor grad_bias) { + SyncBNBackwardParamCUDAKernelLauncher(grad_output, norm, grad_weight, + grad_bias); +} + +void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight, + const Tensor grad_weight, + const Tensor grad_bias, const Tensor norm, + const Tensor std, Tensor grad_input) { + SyncBNBackwardDataCUDAKernelLauncher(grad_output, weight, grad_weight, + grad_bias, norm, std, grad_input); +} +#endif + +void sync_bn_forward_mean(const Tensor input, Tensor mean) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(mean); + sync_bn_forward_mean_cuda(input, mean); +#else + AT_ERROR("SyncBatchNorm is not compiled with GPU support"); +#endif + } else { + AT_ERROR("SyncBatchNorm is not implemented on CPU"); + } +} + +void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(mean); + CHECK_CUDA_INPUT(var); + sync_bn_forward_var_cuda(input, mean, var); +#else + AT_ERROR("SyncBatchNorm is not compiled with GPU support"); +#endif + } else { + AT_ERROR("SyncBatchNorm is not implemented on CPU"); + } +} + +void sync_bn_forward_output(const Tensor input, const Tensor mean, + const Tensor var, const Tensor weight, + const Tensor bias, Tensor running_mean, + Tensor running_var, Tensor norm, Tensor std, + Tensor output, float eps, float momentum, + int group_size) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(mean); + CHECK_CUDA_INPUT(var); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(bias); + CHECK_CUDA_INPUT(running_mean); + CHECK_CUDA_INPUT(running_var); + CHECK_CUDA_INPUT(norm); + CHECK_CUDA_INPUT(std); + CHECK_CUDA_INPUT(output); + sync_bn_forward_output_cuda(input, mean, var, running_mean, running_var, + weight, bias, norm, std, output, eps, momentum, + group_size); +#else + AT_ERROR("SyncBatchNorm is not compiled with GPU support"); +#endif + } else { + AT_ERROR("SyncBatchNorm is not implemented on CPU"); + } +} + +void sync_bn_backward_param(const Tensor grad_output, const Tensor norm, + Tensor grad_weight, Tensor grad_bias) { + if (grad_output.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(grad_output); + CHECK_CUDA_INPUT(norm); + CHECK_CUDA_INPUT(grad_weight); + CHECK_CUDA_INPUT(grad_bias); + sync_bn_backward_param_cuda(grad_output, norm, grad_weight, grad_bias); +#else + AT_ERROR("SyncBatchNorm is not compiled with GPU support"); +#endif + } else { + AT_ERROR("SyncBatchNorm is not implemented on CPU"); + } +} + +void sync_bn_backward_data(const Tensor grad_output, const Tensor weight, + const Tensor grad_weight, const Tensor grad_bias, + const Tensor norm, const Tensor std, + Tensor grad_input) { + if (grad_output.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(grad_output); + CHECK_CUDA_INPUT(weight); + CHECK_CUDA_INPUT(grad_weight); + CHECK_CUDA_INPUT(grad_bias); + CHECK_CUDA_INPUT(norm); + CHECK_CUDA_INPUT(std); + CHECK_CUDA_INPUT(grad_input); + sync_bn_backward_data_cuda(grad_output, weight, grad_weight, grad_bias, + norm, std, grad_input); +#else + AT_ERROR("SyncBatchNorm is not compiled with GPU support"); +#endif + } else { + AT_ERROR("SyncBatchNorm is not implemented on CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/sync_bn_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/sync_bn_cuda.cu new file mode 100644 index 0000000..334d9e0 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/sync_bn_cuda.cu @@ -0,0 +1,109 @@ +#include "pytorch_cuda_helper.hpp" +#include "sync_bn_cuda_kernel.cuh" + +void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean) { + int num = input.size(0); + int channels = input.size(1); + int spatial = input.size(2); + + at::cuda::CUDAGuard device_guard(input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] { + sync_bn_forward_mean_cuda_kernel + <<>>( + input.data_ptr(), mean.data_ptr(), num, + channels, spatial); + }); + AT_CUDA_CHECK(cudaGetLastError()); +} + +void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean, + Tensor var) { + int num = input.size(0); + int channels = input.size(1); + int spatial = input.size(2); + + at::cuda::CUDAGuard device_guard(input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] { + sync_bn_forward_var_cuda_kernel + <<>>( + input.data_ptr(), mean.data_ptr(), + var.data_ptr(), num, channels, spatial); + }); + AT_CUDA_CHECK(cudaGetLastError()); +} + +void SyncBNForwardOutputCUDAKernelLauncher( + const Tensor input, const Tensor mean, const Tensor var, + Tensor running_mean, Tensor running_var, const Tensor weight, + const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps, + float momentum, int group_size) { + int num = input.size(0); + int channels = input.size(1); + int spatial = input.size(2); + + at::cuda::CUDAGuard device_guard(input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "sync_bn_forward_mean_cuda_kernel", [&] { + sync_bn_forward_output_cuda_kernel + <<>>( + input.data_ptr(), mean.data_ptr(), + var.data_ptr(), running_mean.data_ptr(), + running_var.data_ptr(), weight.data_ptr(), + bias.data_ptr(), norm.data_ptr(), + std.data_ptr(), output.data_ptr(), num, + channels, spatial, eps, momentum, group_size); + }); + AT_CUDA_CHECK(cudaGetLastError()); +} + +void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output, + const Tensor norm, + Tensor grad_weight, + Tensor grad_bias) { + int num = grad_output.size(0); + int channels = grad_output.size(1); + int spatial = grad_output.size(2); + + at::cuda::CUDAGuard device_guard(grad_output.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_output.scalar_type(), "sync_bn_backward_param_cuda_kernel", [&] { + sync_bn_backward_param_cuda_kernel + <<>>( + grad_output.data_ptr(), norm.data_ptr(), + grad_weight.data_ptr(), grad_bias.data_ptr(), num, + channels, spatial); + }); + AT_CUDA_CHECK(cudaGetLastError()); +} + +void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output, + const Tensor weight, + const Tensor grad_weight, + const Tensor grad_bias, + const Tensor norm, const Tensor std, + Tensor grad_input) { + int output_size = grad_input.numel(); + int num = grad_input.size(0); + int channels = grad_input.size(1); + int spatial = grad_input.size(2); + + at::cuda::CUDAGuard device_guard(grad_input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_output.scalar_type(), "sync_bn_backward_data_cuda_kernel", [&] { + sync_bn_backward_data_cuda_kernel + <<>>( + output_size, grad_output.data_ptr(), + weight.data_ptr(), grad_weight.data_ptr(), + grad_bias.data_ptr(), norm.data_ptr(), + std.data_ptr(), grad_input.data_ptr(), num, + channels, spatial); + }); + AT_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/tin_shift.cpp b/mmcv/mmcv/ops/csrc/pytorch/tin_shift.cpp new file mode 100644 index 0000000..255ce4f --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/tin_shift.cpp @@ -0,0 +1,51 @@ +#include "pytorch_cpp_helper.hpp" + +#ifdef MMCV_WITH_CUDA +void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift, + Tensor output); + +void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift, + Tensor grad_input); + +void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output) { + TINShiftForwardCUDAKernelLauncher(input, shift, output); +} + +void tin_shift_backward_cuda(Tensor grad_output, Tensor shift, + Tensor grad_input) { + TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input); +} + +#endif + +void tin_shift_forward(Tensor input, Tensor shift, Tensor output) { + if (input.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(input); + CHECK_CUDA_INPUT(shift); + CHECK_CUDA_INPUT(output); + + tin_shift_forward_cuda(input, shift, output); +#else + AT_ERROR("TINShift is not compiled with GPU support"); +#endif + } else { + AT_ERROR("TINShift is not implemented on CPU"); + } +} + +void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input) { + if (grad_output.device().is_cuda()) { +#ifdef MMCV_WITH_CUDA + CHECK_CUDA_INPUT(grad_output); + CHECK_CUDA_INPUT(shift); + CHECK_CUDA_INPUT(grad_input); + + tin_shift_backward_cuda(grad_output, shift, grad_input); +#else + AT_ERROR("TINShift is not compiled with GPU support"); +#endif + } else { + AT_ERROR("TINShift is not implemented on CPU"); + } +} diff --git a/mmcv/mmcv/ops/csrc/pytorch/tin_shift_cuda.cu b/mmcv/mmcv/ops/csrc/pytorch/tin_shift_cuda.cu new file mode 100644 index 0000000..996816e --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch/tin_shift_cuda.cu @@ -0,0 +1,53 @@ +#include "pytorch_cuda_helper.hpp" +#include "tin_shift_cuda_kernel.cuh" + +void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift, + Tensor output) { + int output_size = output.numel(); + int batch_size = input.size(0); + int t_size = input.size(1); + int channels = input.size(2); + int hw_size = input.size(3); + int group_size = shift.size(1); + int group_channel = channels / group_size; + int num_kernels = batch_size * hw_size * channels; + + at::cuda::CUDAGuard device_guard(input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "tin_shift_forward_cuda_kernel", [&] { + tin_shift_forward_cuda_kernel + <<>>( + output_size, input.data_ptr(), shift.data_ptr(), + output.data_ptr(), batch_size, channels, t_size, + hw_size, group_size, group_channel); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} + +void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift, + Tensor grad_input) { + int output_size = grad_output.numel(); + int batch_size = grad_output.size(0); + int t_size = grad_output.size(1); + int channels = grad_output.size(2); + int hw_size = grad_output.size(3); + int group_size = shift.size(1); + int group_channel = channels / group_size; + int num_kernels = batch_size * hw_size * channels; + + at::cuda::CUDAGuard device_guard(grad_output.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad_output.scalar_type(), "tin_shift_backward_cuda_kernel", [&] { + tin_shift_backward_cuda_kernel + <<>>( + output_size, grad_output.data_ptr(), + shift.data_ptr(), grad_input.data_ptr(), + batch_size, channels, t_size, hw_size, group_size, + group_channel); + }); + + AT_CUDA_CHECK(cudaGetLastError()); +} diff --git a/mmcv/mmcv/ops/csrc/pytorch_cpp_helper.hpp b/mmcv/mmcv/ops/csrc/pytorch_cpp_helper.hpp new file mode 100644 index 0000000..b812e62 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch_cpp_helper.hpp @@ -0,0 +1,22 @@ +#ifndef PYTORCH_CPP_HELPER +#define PYTORCH_CPP_HELPER +#include + +#include + +using namespace at; + +#define CHECK_CUDA(x) \ + TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CPU(x) \ + TORCH_CHECK(!x.device().is_cuda(), #x " must be a CPU tensor") +#define CHECK_CONTIGUOUS(x) \ + TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") +#define CHECK_CUDA_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) +#define CHECK_CPU_INPUT(x) \ + CHECK_CPU(x); \ + CHECK_CONTIGUOUS(x) + +#endif // PYTORCH_CPP_HELPER diff --git a/mmcv/mmcv/ops/csrc/pytorch_cuda_helper.hpp b/mmcv/mmcv/ops/csrc/pytorch_cuda_helper.hpp new file mode 100644 index 0000000..bf51f79 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/pytorch_cuda_helper.hpp @@ -0,0 +1,18 @@ +#ifndef PYTORCH_CUDA_HELPER +#define PYTORCH_CUDA_HELPER + +#include +#include +#include + +#include + +#include "common_cuda_helper.hpp" + +using at::Half; +using at::Tensor; +using phalf = at::Half; + +#define __PHALF(x) (x) + +#endif // PYTORCH_CUDA_HELPER diff --git a/mmcv/mmcv/ops/csrc/roi_align_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/roi_align_cuda_kernel.cuh new file mode 100644 index 0000000..fb30074 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/roi_align_cuda_kernel.cuh @@ -0,0 +1,205 @@ +#ifndef ROI_ALIGN_CUDA_KERNEL_CUH +#define ROI_ALIGN_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +/*** Forward ***/ +template +__global__ void roi_align_forward_cuda_kernel( + const int nthreads, const T* input, const T* rois, T* output, T* argmax_y, + T* argmax_x, const int pooled_height, const int pooled_width, + const T spatial_scale, const int sampling_ratio, + const int pool_mode, // 0 - max pool, 1 - avg pool + const bool aligned, const int channels, const int height, const int width) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_rois = rois + n * 5; + int roi_batch_ind = offset_rois[0]; + + // Do not using rounding; this implementation detail is critical + T offset = aligned ? (T)0.5 : (T)0.0; + T roi_start_w = offset_rois[1] * spatial_scale - offset; + T roi_start_h = offset_rois[2] * spatial_scale - offset; + T roi_end_w = offset_rois[3] * spatial_scale - offset; + T roi_end_h = offset_rois[4] * spatial_scale - offset; + + T roi_width = roi_end_w - roi_start_w; + T roi_height = roi_end_h - roi_start_h; + if (!aligned) { // for backward-compatibility only + roi_width = max(roi_width, (T)1.); + roi_height = max(roi_height, (T)1.); + } + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + const T* offset_input = + input + (roi_batch_ind * channels + c) * height * width; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = + (sampling_ratio > 0) + ? sampling_ratio + : static_cast(ceil(roi_height / pooled_height)); + int roi_bin_grid_w = (sampling_ratio > 0) + ? sampling_ratio + : static_cast(ceil(roi_width / pooled_width)); + + if (pool_mode == 0) { + // We do max pooling inside a bin + T maxval = -FLT_MAX; + T maxidx_y = -1.f, maxidx_x = -1.f; + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + const T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + T val = + bilinear_interpolate(offset_input, height, width, y, x, index); + if (val > maxval) { + maxval = val; + maxidx_y = y; + maxidx_x = x; + } + } + } + output[index] = maxval; + argmax_y[index] = maxidx_y; + argmax_x[index] = maxidx_x; + } else if (pool_mode == 1) { + // We do average pooling inside a bin + const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); + T output_val = 0.; + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + const T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + T val = + bilinear_interpolate(offset_input, height, width, y, x, index); + output_val += val; + } + } + output[index] = output_val / count; + } + } +} + +/*** Backward ***/ +template +__global__ void roi_align_backward_cuda_kernel( + const int nthreads, const T* grad_output, const T* rois, const T* argmax_y, + const T* argmax_x, T* grad_input, const int pooled_height, + const int pooled_width, const T spatial_scale, const int sampling_ratio, + const int pool_mode, // 0 - max pool, 1 - avg pool + const bool aligned, const int channels, const int height, const int width) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T grad_output_this_bin = grad_output[index]; + + const T* offset_rois = rois + n * 5; + int roi_batch_ind = offset_rois[0]; + T* offset_grad_input = + grad_input + ((roi_batch_ind * channels + c) * height * width); + + if (pool_mode == 0) { + T y = argmax_y[index], x = argmax_x[index]; + if (y != -1.f) { + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, + x_low, x_high, y_low, y_high, index); + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + atomicAdd(offset_grad_input + y_low * width + x_low, + grad_output_this_bin * w1); + atomicAdd(offset_grad_input + y_low * width + x_high, + grad_output_this_bin * w2); + atomicAdd(offset_grad_input + y_high * width + x_low, + grad_output_this_bin * w3); + atomicAdd(offset_grad_input + y_high * width + x_high, + grad_output_this_bin * w4); + } + } + } else if (pool_mode == 1) { + // Do not using rounding; this implementation detail is critical + T offset = aligned ? (T)0.5 : (T)0.0; + T roi_start_w = offset_rois[1] * spatial_scale - offset; + T roi_start_h = offset_rois[2] * spatial_scale - offset; + T roi_end_w = offset_rois[3] * spatial_scale - offset; + T roi_end_h = offset_rois[4] * spatial_scale - offset; + + T roi_width = roi_end_w - roi_start_w; + T roi_height = roi_end_h - roi_start_h; + if (!aligned) { // for backward-compatibility only + roi_width = max(roi_width, (T)1.); + roi_height = max(roi_height, (T)1.); + } + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = + (sampling_ratio > 0) + ? sampling_ratio + : static_cast(ceil(roi_height / pooled_height)); + int roi_bin_grid_w = + (sampling_ratio > 0) + ? sampling_ratio + : static_cast(ceil(roi_width / pooled_width)); + + // We do average (integral) pooling inside a bin + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + const T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, + x_low, x_high, y_low, y_high, index); + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + atomicAdd(offset_grad_input + y_low * width + x_low, + grad_output_this_bin * w1 / count); + atomicAdd(offset_grad_input + y_low * width + x_high, + grad_output_this_bin * w2 / count); + atomicAdd(offset_grad_input + y_high * width + x_low, + grad_output_this_bin * w3 / count); + atomicAdd(offset_grad_input + y_high * width + x_high, + grad_output_this_bin * w4 / count); + } + } + } + } + } +} + +#endif // ROI_ALIGN_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/roi_pool_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/roi_pool_cuda_kernel.cuh new file mode 100644 index 0000000..d499ade --- /dev/null +++ b/mmcv/mmcv/ops/csrc/roi_pool_cuda_kernel.cuh @@ -0,0 +1,92 @@ +#ifndef ROI_POOL_CUDA_KERNEL_CUH +#define ROI_POOL_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +template +__global__ void roi_pool_forward_cuda_kernel( + const int nthreads, const T* input, const T* rois, T* output, int* argmax, + const int pooled_height, const int pooled_width, const T spatial_scale, + const int channels, const int height, const int width) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_rois = rois + n * 5; + int roi_batch_ind = offset_rois[0]; + // calculate the roi region on feature maps + T roi_x1 = offset_rois[1] * spatial_scale; + T roi_y1 = offset_rois[2] * spatial_scale; + T roi_x2 = (offset_rois[3] + 1) * spatial_scale; + T roi_y2 = (offset_rois[4] + 1) * spatial_scale; + + // force malformed rois to be 1x1 + T roi_w = roi_x2 - roi_x1; + T roi_h = roi_y2 - roi_y1; + if (roi_w <= 0 || roi_h <= 0) continue; + + T bin_size_w = roi_w / static_cast(pooled_width); + T bin_size_h = roi_h / static_cast(pooled_height); + + // the corresponding bin region + int bin_x1 = floor(static_cast(pw) * bin_size_w + roi_x1); + int bin_y1 = floor(static_cast(ph) * bin_size_h + roi_y1); + int bin_x2 = ceil(static_cast(pw + 1) * bin_size_w + roi_x1); + int bin_y2 = ceil(static_cast(ph + 1) * bin_size_h + roi_y1); + + // add roi offsets and clip to input boundaries + bin_x1 = min(max(bin_x1, 0), width); + bin_y1 = min(max(bin_y1, 0), height); + bin_x2 = min(max(bin_x2, 0), width); + bin_y2 = min(max(bin_y2, 0), height); + bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1); + + const T* offset_input = + input + (roi_batch_ind * channels + c) * height * width; + // Define an empty pooling region to be zero + // If nothing is pooled, argmax = -1 causes nothing to be backprop'd + T max_val = is_empty ? 0 : -FLT_MAX; + int max_idx = -1; + for (int h = bin_y1; h < bin_y2; ++h) { + for (int w = bin_x1; w < bin_x2; ++w) { + int offset = h * width + w; + if (offset_input[offset] > max_val) { + max_val = offset_input[offset]; + max_idx = offset; + } + } + } + output[index] = max_val; + if (argmax != NULL) argmax[index] = max_idx; + } +} + +template +__global__ void roi_pool_backward_cuda_kernel( + const int nthreads, const T* grad_output, const T* rois, const int* argmax, + T* grad_input, const int pooled_height, const int pooled_width, + const int channels, const int height, const int width) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c) is an element in the pooled output + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + int roi_batch_ind = rois[n * 5]; + T* grad_input_offset = + grad_input + ((roi_batch_ind * channels + c) * height * width); + int argmax_index = argmax[index]; + + if (argmax_index != -1) { + atomicAdd(grad_input_offset + argmax_index, grad_output[index]); + } + } +} + +#endif // ROI_POOL_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/sigmoid_focal_loss_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/sigmoid_focal_loss_cuda_kernel.cuh new file mode 100644 index 0000000..fb7c636 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/sigmoid_focal_loss_cuda_kernel.cuh @@ -0,0 +1,70 @@ +#ifndef SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH +#define SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +template +__global__ void sigmoid_focal_loss_forward_cuda_kernel( + const int nthreads, const T* input, const int64_t* target, const T* weight, + T* output, const T gamma, const T alpha, const int num_classes) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + int n = index / num_classes; + int c = index % num_classes; + + int64_t t = target[n]; + T flag_p = (t == c); + T flag_n = (t != c); + + // p = sigmoid(x) = 1. / 1. + expf(-x) + T p = (T)1. / ((T)1. + expf(-input[index])); + + // (1 - p)**gamma * log(p) + T term_p = pow(((T)1. - p), gamma) * log(max(p, (T)FLT_MIN)); + // p**gamma * log(1 - p) + T term_n = pow(p, gamma) * log(max((T)1. - p, (T)FLT_MIN)); + + output[index] = (T)0.; + output[index] += -flag_p * alpha * term_p; + output[index] += -flag_n * ((T)1. - alpha) * term_n; + if (weight != NULL) { + output[index] *= weight[t]; + } + } +} + +template +__global__ void sigmoid_focal_loss_backward_cuda_kernel( + const int nthreads, const T* input, const int64_t* target, const T* weight, + T* grad_input, const T gamma, const T alpha, const int num_classes) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + int n = index / num_classes; + int c = index % num_classes; + + int64_t t = target[n]; + T flag_p = (t == c); + T flag_n = (t != c); + + // p = sigmoid(x) = 1. / 1. + expf(-x) + T p = (T)1. / ((T)1. + exp(-input[index])); + + // (1 - p)**gamma * (1 - p - gamma*p*log(p)) + T term_p = pow(((T)1. - p), gamma) * + ((T)1. - p - (gamma * p * log(max(p, (T)FLT_MIN)))); + // p**gamma * (gamma * (1 - p) * log(1 - p) - p) + T term_n = pow(p, gamma) * + (gamma * ((T)1. - p) * log(max((T)1. - p, (T)FLT_MIN)) - p); + + grad_input[index] = (T)0.; + grad_input[index] += -flag_p * alpha * term_p; + grad_input[index] += -flag_n * ((T)1. - alpha) * term_n; + if (weight != NULL) { + grad_input[index] *= weight[t]; + } + } +} + +#endif // SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/softmax_focal_loss_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/softmax_focal_loss_cuda_kernel.cuh new file mode 100644 index 0000000..c8ff05b --- /dev/null +++ b/mmcv/mmcv/ops/csrc/softmax_focal_loss_cuda_kernel.cuh @@ -0,0 +1,71 @@ +#ifndef SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH +#define SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +template +__global__ void softmax_focal_loss_forward_cuda_kernel( + const int nthreads, const T* softmax, const int64_t* target, + const T* weight, T* output, const T gamma, const T alpha, + const int num_classes) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + int64_t label = target[index]; + T pred = softmax[index * num_classes + label]; + + if (label >= 0) { + output[index] = + -alpha * pow((T)1. - pred, gamma) * log(max(pred, (T)FLT_MIN)); + } else { + output[index] = 0; + } + if (weight != NULL) { + output[index] *= weight[label]; + } + } +} + +template +__global__ void softmax_focal_loss_backward_cuda1_kernel( + const int nthreads, const T* softmax, const int64_t* target, + const T* weight, T* buff, const T gamma, const T alpha, + const int num_classes) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + int64_t label = target[index]; + T pred = softmax[index * num_classes + label]; + + if (label >= 0) { + buff[index] = alpha * (-pow((T)1. - pred, gamma) + + gamma * pow((T)1. - pred, gamma - 1) * pred * + log(max(pred, (T)FLT_MIN))); + } else { + buff[index] = 0; + } + if (weight != NULL) { + buff[index] *= weight[label]; + } + } +} + +template +__global__ void softmax_focal_loss_backward_cuda2_kernel( + const int nthreads, const T* softmax, const int64_t* target, const T* buff, + T* grad_input, const int num_classes) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + int n = index / num_classes; + int c = index % num_classes; + int64_t label = target[n]; + + if (label >= 0) { + T flag = (label == c ? (T)1. : (T)0.); + grad_input[index] = buff[n] * (flag - softmax[index]); + } else { + grad_input[index] = 0; + } + } +} + +#endif // SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/sync_bn_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/sync_bn_cuda_kernel.cuh new file mode 100644 index 0000000..41844f8 --- /dev/null +++ b/mmcv/mmcv/ops/csrc/sync_bn_cuda_kernel.cuh @@ -0,0 +1,330 @@ +#ifndef SYNCBN_CUDA_KERNEL_CUH +#define SYNCBN_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +template +__global__ void sync_bn_forward_mean_cuda_kernel(const T *input, float *mean, + int num, int channels, + int spatial) { + __shared__ float buffer[THREADS_PER_BLOCK]; + int tid = threadIdx.x; + int c = blockIdx.x; + buffer[tid] = 0; + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; + buffer[tid] += input[index]; + } + __syncthreads(); + + for (int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + buffer[tid] += buffer[tid + s]; + } + __syncthreads(); + } + int total = num * spatial; + if (tid == 0) { + mean[c] = buffer[0] / total; + } +} + +template <> +__global__ void sync_bn_forward_mean_cuda_kernel(const phalf *input, + float *mean, int num, + int channels, int spatial) { + __shared__ float buffer[THREADS_PER_BLOCK]; + int tid = threadIdx.x; + int c = blockIdx.x; + buffer[tid] = 0; + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; + buffer[tid] += static_cast(input[index]); + } + __syncthreads(); + + for (int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + buffer[tid] += buffer[tid + s]; + } + __syncthreads(); + } + int total = num * spatial; + if (tid == 0) { + mean[c] = buffer[0] / total; + } +} + +template +__global__ void sync_bn_forward_var_cuda_kernel(const T *input, + const float *mean, float *var, + int num, int channels, + int spatial) { + __shared__ float buffer[THREADS_PER_BLOCK]; + int tid = threadIdx.x; + int c = blockIdx.x; + buffer[tid] = 0; + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; + float td = input[index] - mean[c]; + buffer[tid] += td * td; + } + __syncthreads(); + for (int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + buffer[tid] += buffer[tid + s]; + } + __syncthreads(); + } + int total = num * spatial; + if (tid == 0) { + var[c] = buffer[0] / total; + } +} + +template <> +__global__ void sync_bn_forward_var_cuda_kernel(const phalf *input, + const float *mean, float *var, + int num, int channels, + int spatial) { + __shared__ float buffer[THREADS_PER_BLOCK]; + int tid = threadIdx.x; + int c = blockIdx.x; + buffer[tid] = 0; + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; + float td = static_cast(input[index]) - mean[c]; + buffer[tid] += td * td; + } + __syncthreads(); + for (int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + buffer[tid] += buffer[tid + s]; + } + __syncthreads(); + } + int total = num * spatial; + if (tid == 0) { + var[c] = buffer[0] / total; + } +} + +template +__global__ void sync_bn_forward_output_cuda_kernel( + const T *input, const float *mean, const float *var, float *running_mean, + float *running_var, const float *weight, const float *bias, float *norm, + float *std, T *output, int num, int channels, int spatial, float eps, + float momentum, int group_size) { + int tid = threadIdx.x; + int c = blockIdx.x; + float mean_value = mean[c]; + float std_value = sqrt(var[c] + eps); + + if (weight != nullptr) { + float weight_value = weight[c]; + float bias_value = bias[c]; + if (norm != nullptr) { + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = + (i / spatial) * channels * spatial + c * spatial + i % spatial; + norm[index] = (input[index] - mean_value) / std_value; + output[index] = norm[index] * weight_value + bias_value; + } + } else { + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = + (i / spatial) * channels * spatial + c * spatial + i % spatial; + output[index] = + (input[index] - mean_value) / std_value * weight_value + bias_value; + } + } + } else { + if (norm != nullptr) { + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = + (i / spatial) * channels * spatial + c * spatial + i % spatial; + output[index] = norm[index] = (input[index] - mean_value) / std_value; + } + } else { + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = + (i / spatial) * channels * spatial + c * spatial + i % spatial; + output[index] = (input[index] - mean_value) / std_value; + } + } + } + if (tid == 0) { + if (std != nullptr) std[c] = std_value; + if (running_mean != nullptr) { + running_mean[c] = + momentum * mean_value + (1 - momentum) * running_mean[c]; + int count = num * spatial * group_size; + float var_unbias = count > 1 ? var[c] * count / (count - 1) : var[c]; + running_var[c] = momentum * var_unbias + (1 - momentum) * running_var[c]; + } + } +} + +template <> +__global__ void sync_bn_forward_output_cuda_kernel( + const phalf *input, const float *mean, const float *var, + float *running_mean, float *running_var, const float *weight, + const float *bias, float *norm, float *std, phalf *output, int num, + int channels, int spatial, float eps, float momentum, int group_size) { + int tid = threadIdx.x; + int c = blockIdx.x; + float mean_value = mean[c]; + float std_value = sqrt(var[c] + eps); + if (weight != nullptr) { + float weight_value = weight[c]; + float bias_value = bias[c]; + if (norm != nullptr) { + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = + (i / spatial) * channels * spatial + c * spatial + i % spatial; + norm[index] = + (static_cast(input[index]) - mean_value) / std_value; + output[index] = + static_cast(norm[index] * weight_value + bias_value); + } + } else { + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = + (i / spatial) * channels * spatial + c * spatial + i % spatial; + output[index] = + static_cast((static_cast(input[index]) - mean_value) / + std_value * weight_value + + bias_value); + } + } + } else { + if (norm != nullptr) { + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = + (i / spatial) * channels * spatial + c * spatial + i % spatial; + norm[index] = + (static_cast(input[index]) - mean_value) / std_value; + output[index] = static_cast(norm[index]); + } + } else { + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = + (i / spatial) * channels * spatial + c * spatial + i % spatial; + output[index] = static_cast( + (static_cast(input[index]) - mean_value) / std_value); + } + } + } + if (tid == 0) { + if (std != nullptr) std[c] = std_value; + if (running_mean != nullptr) { + running_mean[c] = + momentum * mean_value + (1 - momentum) * running_mean[c]; + int count = num * spatial * group_size; + float var_unbias = count > 1 ? var[c] * count / (count - 1) : var[c]; + running_var[c] = momentum * var_unbias + (1 - momentum) * running_var[c]; + } + } +} + +template +__global__ void sync_bn_backward_param_cuda_kernel(const T *grad_output, + const float *norm, + float *grad_weight, + float *grad_bias, int num, + int channels, int spatial) { + __shared__ float buffer1[THREADS_PER_BLOCK]; + __shared__ float buffer2[THREADS_PER_BLOCK]; + + int tid = threadIdx.x; + int c = blockIdx.x; + buffer1[tid] = buffer2[tid] = 0; + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; + buffer1[tid] += grad_output[index] * norm[index]; + buffer2[tid] += grad_output[index]; + } + __syncthreads(); + + for (int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + buffer1[tid] += buffer1[tid + s]; + buffer2[tid] += buffer2[tid + s]; + } + __syncthreads(); + } + if (tid == 0) { + grad_weight[c] = buffer1[0]; + grad_bias[c] = buffer2[0]; + } +} + +template <> +__global__ void sync_bn_backward_param_cuda_kernel(const phalf *grad_output, + const float *norm, + float *grad_weight, + float *grad_bias, int num, + int channels, int spatial) { + __shared__ float buffer1[THREADS_PER_BLOCK]; + __shared__ float buffer2[THREADS_PER_BLOCK]; + + int tid = threadIdx.x; + int c = blockIdx.x; + buffer1[tid] = buffer2[tid] = 0; + for (int i = tid; i < num * spatial; i += blockDim.x) { + int index = (i / spatial) * channels * spatial + c * spatial + i % spatial; + buffer1[tid] += static_cast(grad_output[index]) * norm[index]; + buffer2[tid] += static_cast(grad_output[index]); + } + __syncthreads(); + + for (int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + buffer1[tid] += buffer1[tid + s]; + buffer2[tid] += buffer2[tid + s]; + } + __syncthreads(); + } + if (tid == 0) { + grad_weight[c] = buffer1[0]; + grad_bias[c] = buffer2[0]; + } +} + +template +__global__ void sync_bn_backward_data_cuda_kernel( + int output_size, const T *grad_output, const float *weight, + const float *grad_weight, const float *grad_bias, const float *norm, + const float *std, T *grad_input, int num, int channels, int spatial) { + int factor = num * spatial; + CUDA_1D_KERNEL_LOOP(index, output_size) { + int c = (index / spatial) % channels; + grad_input[index] = + weight[c] * + (grad_output[index] - + (grad_weight[c] * norm[index] + grad_bias[c]) / factor) / + std[c]; + } +} + +template <> +__global__ void sync_bn_backward_data_cuda_kernel( + int output_size, const phalf *grad_output, const float *weight, + const float *grad_weight, const float *grad_bias, const float *norm, + const float *std, phalf *grad_input, int num, int channels, int spatial) { + int factor = num * spatial; + CUDA_1D_KERNEL_LOOP(index, output_size) { + int c = (index / spatial) % channels; + grad_input[index] = static_cast( + weight[c] * + (static_cast(grad_output[index]) - + (grad_weight[c] * norm[index] + grad_bias[c]) / factor) / + std[c]); + } +} + +#endif // SYNCBN_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/csrc/tin_shift_cuda_kernel.cuh b/mmcv/mmcv/ops/csrc/tin_shift_cuda_kernel.cuh new file mode 100644 index 0000000..352244b --- /dev/null +++ b/mmcv/mmcv/ops/csrc/tin_shift_cuda_kernel.cuh @@ -0,0 +1,60 @@ +#ifndef TIN_SHIFT_CUDA_KERNEL_CUH +#define TIN_SHIFT_CUDA_KERNEL_CUH + +#ifdef MMCV_USE_PARROTS +#include "parrots_cuda_helper.hpp" +#else +#include "pytorch_cuda_helper.hpp" +#endif + +template +__global__ void tin_shift_forward_cuda_kernel( + const int nthreads, const T* input, const int* shift, T* output, + const int batch_size, const int channels, const int t_size, + const int hw_size, const int group_size, const int group_channel) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + const int hw_index = index % hw_size; + const int j = (index / hw_size) % channels; + + const int n_index = (index / hw_size / channels) % batch_size; + int group_id = j / group_channel; + int t_shift = shift[n_index * group_size + group_id]; + int offset = n_index * t_size * hw_size * channels + hw_size * j + hw_index; + for (int i = 0; i < t_size; i++) { + int now_t = i + t_shift; + int data_id = i * hw_size * channels + offset; + if (now_t < 0 || now_t >= t_size) { + continue; + } + int out_id = now_t * hw_size * channels + offset; + output[out_id] = input[data_id]; + } + } +} + +template +__global__ void tin_shift_backward_cuda_kernel( + const int nthreads, const T* input, const int* shift, T* output, + const int batch_size, const int channels, const int t_size, + const int hw_size, const int group_size, const int group_channel) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + const int hw_index = index % hw_size; + const int j = (index / hw_size) % channels; + + const int n_index = (index / hw_size / channels) % batch_size; + int group_id = j / group_channel; + int t_shift = shift[n_index * group_size + group_id]; + int offset = n_index * t_size * hw_size * channels + hw_size * j + hw_index; + for (int i = 0; i < t_size; i++) { + int now_t = i + t_shift; + int data_id = i * hw_size * channels + offset; + if (now_t < 0 || now_t >= t_size) { + continue; + } + int out_id = now_t * hw_size * channels + offset; + output[out_id] = input[data_id]; + } + } +} + +#endif // TIN_SHIFT_CUDA_KERNEL_CUH diff --git a/mmcv/mmcv/ops/deform_conv.py b/mmcv/mmcv/ops/deform_conv.py new file mode 100644 index 0000000..250e096 --- /dev/null +++ b/mmcv/mmcv/ops/deform_conv.py @@ -0,0 +1,324 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair, _single + +from mmcv.utils import deprecated_api_warning +from ..cnn import CONV_LAYERS +from ..utils import ext_loader, print_log + +ext_module = ext_loader.load_ext('_ext', [ + 'deform_conv_forward', 'deform_conv_backward_input', + 'deform_conv_backward_parameters' +]) + + +class DeformConv2dFunction(Function): + + @staticmethod + def symbolic(g, + input, + offset, + weight, + stride, + padding, + dilation, + groups, + deform_groups, + bias=False, + im2col_step=32): + return g.op( + 'MMCVDeformConv2d', + input, + offset, + weight, + stride_i=stride, + padding_i=padding, + dilation_i=dilation, + groups_i=groups, + deform_groups_i=deform_groups, + bias_i=bias, + im2col_step_i=im2col_step) + + @staticmethod + def forward(ctx, + input, + offset, + weight, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=False, + im2col_step=32): + if input is not None and input.dim() != 4: + raise ValueError( + f'Expected 4D tensor as input, got {input.dim()}D tensor \ + instead.') + assert bias is False, 'Only support bias is False.' + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deform_groups = deform_groups + ctx.im2col_step = im2col_step + + ctx.save_for_backward(input, offset, weight) + + output = input.new_empty( + DeformConv2dFunction._output_size(ctx, input, weight)) + + ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones + + cur_im2col_step = min(ctx.im2col_step, input.size(0)) + assert (input.size(0) % + cur_im2col_step) == 0, 'im2col step must divide batchsize' + ext_module.deform_conv_forward( + input, + weight, + offset, + output, + ctx.bufs_[0], + ctx.bufs_[1], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + im2col_step=cur_im2col_step) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, weight = ctx.saved_tensors + + grad_input = grad_offset = grad_weight = None + + cur_im2col_step = min(ctx.im2col_step, input.size(0)) + assert (input.size(0) % + cur_im2col_step) == 0, 'im2col step must divide batchsize' + + grad_output = grad_output.contiguous() + if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + ext_module.deform_conv_backward_input( + input, + offset, + grad_output, + grad_input, + grad_offset, + weight, + ctx.bufs_[0], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + im2col_step=cur_im2col_step) + + if ctx.needs_input_grad[2]: + grad_weight = torch.zeros_like(weight) + ext_module.deform_conv_backward_parameters( + input, + offset, + grad_output, + grad_weight, + ctx.bufs_[0], + ctx.bufs_[1], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + scale=1, + im2col_step=cur_im2col_step) + + return grad_input, grad_offset, grad_weight, \ + None, None, None, None, None, None, None + + @staticmethod + def _output_size(ctx, input, weight): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = ctx.padding[d] + kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = ctx.stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + 'convolution input is too small (output would be ' + + 'x'.join(map(str, output_size)) + ')') + return output_size + + +deform_conv2d = DeformConv2dFunction.apply + + +class DeformConv2d(nn.Module): + + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, + cls_name='DeformConv2d') + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=False): + super(DeformConv2d, self).__init__() + + assert not bias, \ + f'bias={bias} is not supported in DeformConv2d.' + assert in_channels % groups == 0, \ + f'in_channels {in_channels} cannot be divisible by groups {groups}' + assert out_channels % groups == 0, \ + f'out_channels {out_channels} cannot be divisible by groups \ + {groups}' + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deform_groups = deform_groups + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + # only weight, no bias + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // self.groups, + *self.kernel_size)) + + self.reset_parameters() + + def reset_parameters(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + + def forward(self, x, offset): + # To fix an assert error in deform_conv_cuda.cpp:128 + # input image is smaller than kernel + input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) < + self.kernel_size[1]) + if input_pad: + pad_h = max(self.kernel_size[0] - x.size(2), 0) + pad_w = max(self.kernel_size[1] - x.size(3), 0) + x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() + offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0) + offset = offset.contiguous() + out = deform_conv2d(x, offset, self.weight, self.stride, self.padding, + self.dilation, self.groups, self.deform_groups) + if input_pad: + out = out[:, :, :out.size(2) - pad_h, :out.size(3) - + pad_w].contiguous() + return out + + +@CONV_LAYERS.register_module('DCN') +class DeformConv2dPack(DeformConv2d): + """A Deformable Conv Encapsulation that acts as normal Conv layers. + + The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. + The spatial arrangement is like: + + .. code:: text + + (x0, y0) (x1, y1) (x2, y2) + (x3, y3) (x4, y4) (x5, y5) + (x6, y6) (x7, y7) (x8, y8) + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int or tuple[int]): Same as nn.Conv2d. + padding (int or tuple[int]): Same as nn.Conv2d. + dilation (int or tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(DeformConv2dPack, self).__init__(*args, **kwargs) + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + dilation=_pair(self.dilation), + bias=True) + self.init_offset() + + def init_offset(self): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + offset = self.conv_offset(x) + return deform_conv2d(x, offset, self.weight, self.stride, self.padding, + self.dilation, self.groups, self.deform_groups) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + version = local_metadata.get('version', None) + + if version is None or version < 2: + # the key is different in early versions + # In version < 2, DeformConvPack loads previous benchmark models. + if (prefix + 'conv_offset.weight' not in state_dict + and prefix[:-1] + '_offset.weight' in state_dict): + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( + prefix[:-1] + '_offset.weight') + if (prefix + 'conv_offset.bias' not in state_dict + and prefix[:-1] + '_offset.bias' in state_dict): + state_dict[prefix + + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + + '_offset.bias') + + if version is not None and version > 1: + print_log( + f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to ' + 'version 2.', + logger='root') + + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, missing_keys, unexpected_keys, + error_msgs) diff --git a/mmcv/mmcv/ops/deform_roi_pool.py b/mmcv/mmcv/ops/deform_roi_pool.py new file mode 100644 index 0000000..8ec9dd7 --- /dev/null +++ b/mmcv/mmcv/ops/deform_roi_pool.py @@ -0,0 +1,203 @@ +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward']) + + +class DeformRoIPoolFunction(Function): + + @staticmethod + def symbolic(g, input, rois, offset, output_size, spatial_scale, + sampling_ratio, gamma): + return g.op( + 'MMCVDeformRoIPool', + input, + rois, + offset, + pooled_height=output_size[0], + pooled_width=output_size[1], + spatial_scale=spatial_scale, + sampling_ratio=sampling_ratio, + gamma=gamma) + + @staticmethod + def forward(ctx, + input, + rois, + offset, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + if offset is None: + offset = input.new_zeros(0) + ctx.output_size = _pair(output_size) + ctx.spatial_scale = float(spatial_scale) + ctx.sampling_ratio = int(sampling_ratio) + ctx.gamma = float(gamma) + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + + ext_module.deform_roi_pool_forward( + input, + rois, + offset, + output, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + gamma=ctx.gamma) + + ctx.save_for_backward(input, rois, offset) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, rois, offset = ctx.saved_tensors + grad_input = grad_output.new_zeros(input.shape) + grad_offset = grad_output.new_zeros(offset.shape) + + ext_module.deform_roi_pool_backward( + grad_output, + input, + rois, + offset, + grad_input, + grad_offset, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + gamma=ctx.gamma) + if grad_offset.numel() == 0: + grad_offset = None + return grad_input, None, grad_offset, None, None, None, None + + +deform_roi_pool = DeformRoIPoolFunction.apply + + +class DeformRoIPool(nn.Module): + + def __init__(self, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(DeformRoIPool, self).__init__() + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + self.sampling_ratio = int(sampling_ratio) + self.gamma = float(gamma) + + def forward(self, input, rois, offset=None): + return deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + + +class DeformRoIPoolPack(DeformRoIPool): + + def __init__(self, + output_size, + output_channels, + deform_fc_channels=1024, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale, + sampling_ratio, gamma) + + self.output_channels = output_channels + self.deform_fc_channels = deform_fc_channels + + self.offset_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 2)) + self.offset_fc[-1].weight.data.zero_() + self.offset_fc[-1].bias.data.zero_() + + def forward(self, input, rois): + assert input.size(1) == self.output_channels + x = deform_roi_pool(input, rois, None, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + rois_num = rois.size(0) + offset = self.offset_fc(x.view(rois_num, -1)) + offset = offset.view(rois_num, 2, self.output_size[0], + self.output_size[1]) + return deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + + +class ModulatedDeformRoIPoolPack(DeformRoIPool): + + def __init__(self, + output_size, + output_channels, + deform_fc_channels=1024, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(ModulatedDeformRoIPoolPack, + self).__init__(output_size, spatial_scale, sampling_ratio, gamma) + + self.output_channels = output_channels + self.deform_fc_channels = deform_fc_channels + + self.offset_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 2)) + self.offset_fc[-1].weight.data.zero_() + self.offset_fc[-1].bias.data.zero_() + + self.mask_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 1), + nn.Sigmoid()) + self.mask_fc[2].weight.data.zero_() + self.mask_fc[2].bias.data.zero_() + + def forward(self, input, rois): + assert input.size(1) == self.output_channels + x = deform_roi_pool(input, rois, None, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + rois_num = rois.size(0) + offset = self.offset_fc(x.view(rois_num, -1)) + offset = offset.view(rois_num, 2, self.output_size[0], + self.output_size[1]) + mask = self.mask_fc(x.view(rois_num, -1)) + mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1]) + d = deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + return d * mask diff --git a/mmcv/mmcv/ops/deprecated_wrappers.py b/mmcv/mmcv/ops/deprecated_wrappers.py new file mode 100644 index 0000000..863611b --- /dev/null +++ b/mmcv/mmcv/ops/deprecated_wrappers.py @@ -0,0 +1,42 @@ +# This file is for backward compatibility. +# Module wrappers for empty tensor have been moved to mmcv.cnn.bricks. +import warnings + +from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d + + +class Conv2d_deprecated(Conv2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') + + +class ConvTranspose2d_deprecated(ConvTranspose2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing ConvTranspose2d wrapper from "mmcv.ops" will be ' + 'deprecated in the future. Please import them from "mmcv.cnn" ' + 'instead') + + +class MaxPool2d_deprecated(MaxPool2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') + + +class Linear_deprecated(Linear): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing Linear wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') diff --git a/mmcv/mmcv/ops/focal_loss.py b/mmcv/mmcv/ops/focal_loss.py new file mode 100644 index 0000000..9e05b16 --- /dev/null +++ b/mmcv/mmcv/ops/focal_loss.py @@ -0,0 +1,211 @@ +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward', + 'softmax_focal_loss_forward', 'softmax_focal_loss_backward' +]) + + +class SigmoidFocalLossFunction(Function): + + @staticmethod + def symbolic(g, input, target, gamma, alpha, weight, reduction): + return g.op( + 'MMCVSigmoidFocalLoss', + input, + target, + gamma=gamma, + alpha=alpha, + weight=weight, + reduction=reduction) + + @staticmethod + def forward(ctx, + input, + target, + gamma=2.0, + alpha=0.25, + weight=None, + reduction='mean'): + + assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) + assert input.dim() == 2 + assert target.dim() == 1 + assert input.size(0) == target.size(0) + if weight is None: + weight = input.new_empty(0) + else: + assert weight.dim() == 1 + assert input.size(1) == weight.size(0) + ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} + assert reduction in ctx.reduction_dict.keys() + + ctx.gamma = float(gamma) + ctx.alpha = float(alpha) + ctx.reduction = ctx.reduction_dict[reduction] + + output = input.new_zeros(input.size()) + + ext_module.sigmoid_focal_loss_forward( + input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) + if ctx.reduction == ctx.reduction_dict['mean']: + output = output.sum() / input.size(0) + elif ctx.reduction == ctx.reduction_dict['sum']: + output = output.sum() + ctx.save_for_backward(input, target, weight) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, target, weight = ctx.saved_tensors + + grad_input = input.new_zeros(input.size()) + + ext_module.sigmoid_focal_loss_backward( + input, + target, + weight, + grad_input, + gamma=ctx.gamma, + alpha=ctx.alpha) + + grad_input *= grad_output + if ctx.reduction == ctx.reduction_dict['mean']: + grad_input /= input.size(0) + return grad_input, None, None, None, None, None + + +sigmoid_focal_loss = SigmoidFocalLossFunction.apply + + +class SigmoidFocalLoss(nn.Module): + + def __init__(self, gamma, alpha, weight=None, reduction='mean'): + super(SigmoidFocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + self.register_buffer('weight', weight) + self.reduction = reduction + + def forward(self, input, target): + return sigmoid_focal_loss(input, target, self.gamma, self.alpha, + self.weight, self.reduction) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(gamma={self.gamma}, ' + s += f'alpha={self.alpha}, ' + s += f'reduction={self.reduction})' + return s + + +class SoftmaxFocalLossFunction(Function): + + @staticmethod + def symbolic(g, input, target, gamma, alpha, weight, reduction): + return g.op( + 'MMCVSoftmaxFocalLoss', + input, + target, + gamma=gamma, + alpha=alpha, + weight=weight, + reduction=reduction) + + @staticmethod + def forward(ctx, + input, + target, + gamma=2.0, + alpha=0.25, + weight=None, + reduction='mean'): + + assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) + assert input.dim() == 2 + assert target.dim() == 1 + assert input.size(0) == target.size(0) + if weight is None: + weight = input.new_empty(0) + else: + assert weight.dim() == 1 + assert input.size(1) == weight.size(0) + ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} + assert reduction in ctx.reduction_dict.keys() + + ctx.gamma = float(gamma) + ctx.alpha = float(alpha) + ctx.reduction = ctx.reduction_dict[reduction] + + channel_stats, _ = torch.max(input, dim=1) + input_softmax = input - channel_stats.unsqueeze(1).expand_as(input) + input_softmax.exp_() + + channel_stats = input_softmax.sum(dim=1) + input_softmax /= channel_stats.unsqueeze(1).expand_as(input) + + output = input.new_zeros(input.size(0)) + ext_module.softmax_focal_loss_forward( + input_softmax, + target, + weight, + output, + gamma=ctx.gamma, + alpha=ctx.alpha) + + if ctx.reduction == ctx.reduction_dict['mean']: + output = output.sum() / input.size(0) + elif ctx.reduction == ctx.reduction_dict['sum']: + output = output.sum() + ctx.save_for_backward(input_softmax, target, weight) + return output + + @staticmethod + def backward(ctx, grad_output): + input_softmax, target, weight = ctx.saved_tensors + buff = input_softmax.new_zeros(input_softmax.size(0)) + grad_input = input_softmax.new_zeros(input_softmax.size()) + + ext_module.softmax_focal_loss_backward( + input_softmax, + target, + weight, + buff, + grad_input, + gamma=ctx.gamma, + alpha=ctx.alpha) + + grad_input *= grad_output + if ctx.reduction == ctx.reduction_dict['mean']: + grad_input /= input_softmax.size(0) + return grad_input, None, None, None, None, None + + +softmax_focal_loss = SoftmaxFocalLossFunction.apply + + +class SoftmaxFocalLoss(nn.Module): + + def __init__(self, gamma, alpha, weight=None, reduction='mean'): + super(SoftmaxFocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + self.register_buffer('weight', weight) + self.reduction = reduction + + def forward(self, input, target): + return softmax_focal_loss(input, target, self.gamma, self.alpha, + self.weight, self.reduction) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(gamma={self.gamma}, ' + s += f'alpha={self.alpha}, ' + s += f'reduction={self.reduction})' + return s diff --git a/mmcv/mmcv/ops/info.py b/mmcv/mmcv/ops/info.py new file mode 100644 index 0000000..912acf4 --- /dev/null +++ b/mmcv/mmcv/ops/info.py @@ -0,0 +1,20 @@ +import torch + +if torch.__version__ == 'parrots': + import parrots + + def get_compiler_version(): + return 'GCC ' + parrots.version.compiler + + def get_compiling_cuda_version(): + return parrots.version.cuda +else: + from ..utils import ext_loader + ext_module = ext_loader.load_ext( + '_ext', ['get_compiler_version', 'get_compiling_cuda_version']) + + def get_compiler_version(): + return ext_module.get_compiler_version() + + def get_compiling_cuda_version(): + return ext_module.get_compiling_cuda_version() diff --git a/mmcv/mmcv/ops/masked_conv.py b/mmcv/mmcv/ops/masked_conv.py new file mode 100644 index 0000000..88b536c --- /dev/null +++ b/mmcv/mmcv/ops/masked_conv.py @@ -0,0 +1,110 @@ +import math + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['masked_im2col_forward', 'masked_col2im_forward']) + + +class MaskedConv2dFunction(Function): + + @staticmethod + def symbolic(g, features, mask, weight, bias, padding, stride): + return g.op( + 'MMCVMaskedConv2d', + features, + mask, + weight, + bias, + padding=padding, + stride=stride) + + @staticmethod + def forward(ctx, features, mask, weight, bias, padding=0, stride=1): + assert mask.dim() == 3 and mask.size(0) == 1 + assert features.dim() == 4 and features.size(0) == 1 + assert features.size()[2:] == mask.size()[1:] + pad_h, pad_w = _pair(padding) + stride_h, stride_w = _pair(stride) + if stride_h != 1 or stride_w != 1: + raise ValueError( + 'Stride could not only be 1 in masked_conv2d currently.') + out_channel, in_channel, kernel_h, kernel_w = weight.size() + + batch_size = features.size(0) + out_h = int( + math.floor((features.size(2) + 2 * pad_h - + (kernel_h - 1) - 1) / stride_h + 1)) + out_w = int( + math.floor((features.size(3) + 2 * pad_w - + (kernel_h - 1) - 1) / stride_w + 1)) + mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False) + output = features.new_zeros(batch_size, out_channel, out_h, out_w) + if mask_inds.numel() > 0: + mask_h_idx = mask_inds[:, 0].contiguous() + mask_w_idx = mask_inds[:, 1].contiguous() + data_col = features.new_zeros(in_channel * kernel_h * kernel_w, + mask_inds.size(0)) + ext_module.masked_im2col_forward( + features, + mask_h_idx, + mask_w_idx, + data_col, + kernel_h=kernel_h, + kernel_w=kernel_w, + pad_h=pad_h, + pad_w=pad_w) + + masked_output = torch.addmm(1, bias[:, None], 1, + weight.view(out_channel, -1), data_col) + ext_module.masked_col2im_forward( + masked_output, + mask_h_idx, + mask_w_idx, + output, + height=out_h, + width=out_w, + channels=out_channel) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + return (None, ) * 5 + + +masked_conv2d = MaskedConv2dFunction.apply + + +class MaskedConv2d(nn.Conv2d): + """A MaskedConv2d which inherits the official Conv2d. + + The masked forward doesn't implement the backward function and only + supports the stride parameter to be 1 currently. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super(MaskedConv2d, + self).__init__(in_channels, out_channels, kernel_size, stride, + padding, dilation, groups, bias) + + def forward(self, input, mask=None): + if mask is None: # fallback to the normal Conv2d + return super(MaskedConv2d, self).forward(input) + else: + return masked_conv2d(input, mask, self.weight, self.bias, + self.padding) diff --git a/mmcv/mmcv/ops/merge_cells.py b/mmcv/mmcv/ops/merge_cells.py new file mode 100644 index 0000000..b881026 --- /dev/null +++ b/mmcv/mmcv/ops/merge_cells.py @@ -0,0 +1,148 @@ +from abc import abstractmethod + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..cnn import ConvModule + + +class BaseMergeCell(nn.Module): + """The basic class for cells used in NAS-FPN and NAS-FCOS. + + BaseMergeCell takes 2 inputs. After applying concolution + on them, they are resized to the target size. Then, + they go through binary_op, which depends on the type of cell. + If with_out_conv is True, the result of output will go through + another convolution layer. + + Args: + in_channels (int): number of input channels in out_conv layer. + out_channels (int): number of output channels in out_conv layer. + with_out_conv (bool): Whether to use out_conv layer + out_conv_cfg (dict): Config dict for convolution layer, which should + contain "groups", "kernel_size", "padding", "bias" to build + out_conv layer. + out_norm_cfg (dict): Config dict for normalization layer in out_conv. + out_conv_order (tuple): The order of conv/norm/activation layers in + out_conv. + with_input1_conv (bool): Whether to use convolution on input1. + with_input2_conv (bool): Whether to use convolution on input2. + input_conv_cfg (dict): Config dict for building input1_conv layer and + input2_conv layer, which is expected to contain the type of + convolution. + Default: None, which means using conv2d. + input_norm_cfg (dict): Config dict for normalization layer in + input1_conv and input2_conv layer. Default: None. + upsample_mode (str): Interpolation method used to resize the output + of input1_conv and input2_conv to target size. Currently, we + support ['nearest', 'bilinear']. Default: 'nearest'. + """ + + def __init__(self, + fused_channels=256, + out_channels=256, + with_out_conv=True, + out_conv_cfg=dict( + groups=1, kernel_size=3, padding=1, bias=True), + out_norm_cfg=None, + out_conv_order=('act', 'conv', 'norm'), + with_input1_conv=False, + with_input2_conv=False, + input_conv_cfg=None, + input_norm_cfg=None, + upsample_mode='nearest'): + super(BaseMergeCell, self).__init__() + assert upsample_mode in ['nearest', 'bilinear'] + self.with_out_conv = with_out_conv + self.with_input1_conv = with_input1_conv + self.with_input2_conv = with_input2_conv + self.upsample_mode = upsample_mode + + if self.with_out_conv: + self.out_conv = ConvModule( + fused_channels, + out_channels, + **out_conv_cfg, + norm_cfg=out_norm_cfg, + order=out_conv_order) + + self.input1_conv = self._build_input_conv( + out_channels, input_conv_cfg, + input_norm_cfg) if with_input1_conv else nn.Sequential() + self.input2_conv = self._build_input_conv( + out_channels, input_conv_cfg, + input_norm_cfg) if with_input2_conv else nn.Sequential() + + def _build_input_conv(self, channel, conv_cfg, norm_cfg): + return ConvModule( + channel, + channel, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + bias=True) + + @abstractmethod + def _binary_op(self, x1, x2): + pass + + def _resize(self, x, size): + if x.shape[-2:] == size: + return x + elif x.shape[-2:] < size: + return F.interpolate(x, size=size, mode=self.upsample_mode) + else: + assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0 + kernel_size = x.shape[-1] // size[-1] + x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size) + return x + + def forward(self, x1, x2, out_size=None): + assert x1.shape[:2] == x2.shape[:2] + assert out_size is None or len(out_size) == 2 + if out_size is None: # resize to larger one + out_size = max(x1.size()[2:], x2.size()[2:]) + + x1 = self.input1_conv(x1) + x2 = self.input2_conv(x2) + + x1 = self._resize(x1, out_size) + x2 = self._resize(x2, out_size) + + x = self._binary_op(x1, x2) + if self.with_out_conv: + x = self.out_conv(x) + return x + + +class SumCell(BaseMergeCell): + + def __init__(self, in_channels, out_channels, **kwargs): + super(SumCell, self).__init__(in_channels, out_channels, **kwargs) + + def _binary_op(self, x1, x2): + return x1 + x2 + + +class ConcatCell(BaseMergeCell): + + def __init__(self, in_channels, out_channels, **kwargs): + super(ConcatCell, self).__init__(in_channels * 2, out_channels, + **kwargs) + + def _binary_op(self, x1, x2): + ret = torch.cat([x1, x2], dim=1) + return ret + + +class GlobalPoolingCell(BaseMergeCell): + + def __init__(self, in_channels=None, out_channels=None, **kwargs): + super().__init__(in_channels, out_channels, **kwargs) + self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) + + def _binary_op(self, x1, x2): + x2_att = self.global_pool(x2).sigmoid() + return x2 + x2_att * x1 diff --git a/mmcv/mmcv/ops/modulated_deform_conv.py b/mmcv/mmcv/ops/modulated_deform_conv.py new file mode 100644 index 0000000..b8ff1ad --- /dev/null +++ b/mmcv/mmcv/ops/modulated_deform_conv.py @@ -0,0 +1,273 @@ +import math + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair, _single + +from mmcv.utils import deprecated_api_warning +from ..cnn import CONV_LAYERS +from ..utils import ext_loader, print_log + +ext_module = ext_loader.load_ext( + '_ext', + ['modulated_deform_conv_forward', 'modulated_deform_conv_backward']) + + +class ModulatedDeformConv2dFunction(Function): + + @staticmethod + def symbolic(g, input, offset, mask, weight, bias, stride, padding, + dilation, groups, deform_groups): + return g.op( + 'MMCVModulatedDeformConv2d', + input, + offset, + mask, + weight, + bias, + stride_i=stride, + padding_i=padding, + dilation_i=dilation, + groups_i=groups, + deform_groups_i=deform_groups) + + @staticmethod + def forward(ctx, + input, + offset, + mask, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1): + if input is not None and input.dim() != 4: + raise ValueError( + f'Expected 4D tensor as input, got {input.dim()}D tensor \ + instead.') + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deform_groups = deform_groups + ctx.with_bias = bias is not None + if not ctx.with_bias: + bias = input.new_empty(0) # fake tensor + ctx.save_for_backward(input, offset, mask, weight, bias) + output = input.new_empty( + ModulatedDeformConv2dFunction._output_size(ctx, input, weight)) + ctx._bufs = [input.new_empty(0), input.new_empty(0)] + ext_module.modulated_deform_conv_forward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + output, + ctx._bufs[1], + kernel_h=weight.size(2), + kernel_w=weight.size(3), + stride_h=ctx.stride[0], + stride_w=ctx.stride[1], + pad_h=ctx.padding[0], + pad_w=ctx.padding[1], + dilation_h=ctx.dilation[0], + dilation_w=ctx.dilation[1], + group=ctx.groups, + deformable_group=ctx.deform_groups, + with_bias=ctx.with_bias) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, mask, weight, bias = ctx.saved_tensors + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + grad_mask = torch.zeros_like(mask) + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(bias) + grad_output = grad_output.contiguous() + ext_module.modulated_deform_conv_backward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + ctx._bufs[1], + grad_input, + grad_weight, + grad_bias, + grad_offset, + grad_mask, + grad_output, + kernel_h=weight.size(2), + kernel_w=weight.size(3), + stride_h=ctx.stride[0], + stride_w=ctx.stride[1], + pad_h=ctx.padding[0], + pad_w=ctx.padding[1], + dilation_h=ctx.dilation[0], + dilation_w=ctx.dilation[1], + group=ctx.groups, + deformable_group=ctx.deform_groups, + with_bias=ctx.with_bias) + if not ctx.with_bias: + grad_bias = None + + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, + None, None, None, None, None) + + @staticmethod + def _output_size(ctx, input, weight): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = ctx.padding[d] + kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = ctx.stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + 'convolution input is too small (output would be ' + + 'x'.join(map(str, output_size)) + ')') + return output_size + + +modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply + + +class ModulatedDeformConv2d(nn.Module): + + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, + cls_name='ModulatedDeformConv2d') + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=True): + super(ModulatedDeformConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deform_groups = deform_groups + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // groups, + *self.kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.register_parameter('bias', None) + self.init_weights() + + def init_weights(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + def forward(self, x, offset, mask): + return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, + self.stride, self.padding, + self.dilation, self.groups, + self.deform_groups) + + +@CONV_LAYERS.register_module('DCNv2') +class ModulatedDeformConv2dPack(ModulatedDeformConv2d): + """A ModulatedDeformable Conv Encapsulation that acts as normal Conv + layers. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int): Same as nn.Conv2d, while tuple is not supported. + padding (int): Same as nn.Conv2d, while tuple is not supported. + dilation (int): Same as nn.Conv2d, while tuple is not supported. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs) + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + bias=True) + self.init_weights() + + def init_weights(self): + super(ModulatedDeformConv2dPack, self).init_weights() + if hasattr(self, 'conv_offset'): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + out = self.conv_offset(x) + o1, o2, mask = torch.chunk(out, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) + return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, + self.stride, self.padding, + self.dilation, self.groups, + self.deform_groups) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + version = local_metadata.get('version', None) + + if version is None or version < 2: + # the key is different in early versions + # In version < 2, ModulatedDeformConvPack + # loads previous benchmark models. + if (prefix + 'conv_offset.weight' not in state_dict + and prefix[:-1] + '_offset.weight' in state_dict): + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( + prefix[:-1] + '_offset.weight') + if (prefix + 'conv_offset.bias' not in state_dict + and prefix[:-1] + '_offset.bias' in state_dict): + state_dict[prefix + + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + + '_offset.bias') + + if version is not None and version > 1: + print_log( + f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to ' + 'version 2.', + logger='root') + + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, missing_keys, unexpected_keys, + error_msgs) diff --git a/mmcv/mmcv/ops/nms.py b/mmcv/mmcv/ops/nms.py new file mode 100644 index 0000000..e225ca1 --- /dev/null +++ b/mmcv/mmcv/ops/nms.py @@ -0,0 +1,307 @@ +import sys + +import numpy as np +import torch + +from mmcv.utils import deprecated_api_warning +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['nms', 'softnms', 'nms_match']) + + +# This function is modified from: https://github.com/pytorch/vision/ +class NMSop(torch.autograd.Function): + + @staticmethod + def forward(ctx, bboxes, scores, iou_threshold, offset): + inds = ext_module.nms( + bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) + return inds + + @staticmethod + def symbolic(g, bboxes, scores, iou_threshold, offset): + from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze + + boxes = unsqueeze(g, bboxes, 0) + scores = unsqueeze(g, unsqueeze(g, scores, 0), 0) + max_output_per_class = g.op( + 'Constant', value_t=torch.tensor([sys.maxsize], dtype=torch.long)) + iou_threshold = g.op( + 'Constant', + value_t=torch.tensor([iou_threshold], dtype=torch.float)) + nms_out = g.op('NonMaxSuppression', boxes, scores, + max_output_per_class, iou_threshold) + return squeeze( + g, + select( + g, nms_out, 1, + g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), + 1) + + +@deprecated_api_warning({'iou_thr': 'iou_threshold'}) +def nms(boxes, scores, iou_threshold, offset=0): + """Dispatch to either CPU or GPU NMS implementations. + + The input can be either torch tensor or numpy array. GPU NMS will be used + if the input is gpu tensor, otherwise CPU NMS + will be used. The returned type will always be the same as inputs. + + Arguments: + boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). + scores (torch.Tensor or np.ndarray): scores in shape (N, ). + iou_threshold (float): IoU threshold for NMS. + offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + + Example: + >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9], + >>> [49.3, 32.9, 51.0, 35.3], + >>> [49.2, 31.8, 51.0, 35.4], + >>> [35.1, 11.5, 39.1, 15.7], + >>> [35.6, 11.8, 39.3, 14.2], + >>> [35.3, 11.5, 39.9, 14.5], + >>> [35.2, 11.7, 39.7, 15.7]], dtype=np.float32) + >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\ + dtype=np.float32) + >>> iou_threshold = 0.6 + >>> dets, inds = nms(boxes, scores, iou_threshold) + >>> assert len(inds) == len(dets) == 3 + """ + assert isinstance(boxes, (torch.Tensor, np.ndarray)) + assert isinstance(scores, (torch.Tensor, np.ndarray)) + is_numpy = False + if isinstance(boxes, np.ndarray): + is_numpy = True + boxes = torch.from_numpy(boxes) + if isinstance(scores, np.ndarray): + scores = torch.from_numpy(scores) + assert boxes.size(1) == 4 + assert boxes.size(0) == scores.size(0) + assert offset in (0, 1) + + if torch.__version__ == 'parrots': + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + areas = (x2 - x1 + offset) * (y2 - y1 + offset) + _, order = scores.sort(0, descending=True) + if boxes.device == 'cpu': + indata_list = [boxes, order, areas] + indata_dict = { + 'iou_threshold': float(iou_threshold), + 'offset': int(offset) + } + select = ext_module.nms(*indata_list, **indata_dict).byte() + else: + boxes_sorted = boxes.index_select(0, order) + indata_list = [boxes_sorted, order, areas] + indata_dict = { + 'iou_threshold': float(iou_threshold), + 'offset': int(offset) + } + select = ext_module.nms(*indata_list, **indata_dict) + inds = order.masked_select(select) + else: + if torch.onnx.is_in_onnx_export() and offset == 0: + # ONNX only support offset == 1 + boxes[:, -2:] -= 1 + inds = NMSop.apply(boxes, scores, iou_threshold, offset) + if torch.onnx.is_in_onnx_export() and offset == 0: + # ONNX only support offset == 1 + boxes[:, -2:] += 1 + dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1) + if is_numpy: + dets = dets.cpu().numpy() + inds = inds.cpu().numpy() + return dets, inds + + +@deprecated_api_warning({'iou_thr': 'iou_threshold'}) +def soft_nms(boxes, + scores, + iou_threshold=0.3, + sigma=0.5, + min_score=1e-3, + method='linear', + offset=0): + """Dispatch to only CPU Soft NMS implementations. + + The input can be either a torch tensor or numpy array. + The returned type will always be the same as inputs. + + Arguments: + boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). + scores (torch.Tensor or np.ndarray): scores in shape (N, ). + iou_threshold (float): IoU threshold for NMS. + sigma (float): hyperparameter for gaussian method + min_score (float): score filter threshold + method (str): either 'linear' or 'gaussian' + offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + + Example: + >>> boxes = np.array([[4., 3., 5., 3.], + >>> [4., 3., 5., 4.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.]], dtype=np.float32) + >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32) + >>> iou_threshold = 0.6 + >>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5) + >>> assert len(inds) == len(dets) == 5 + """ + + assert isinstance(boxes, (torch.Tensor, np.ndarray)) + assert isinstance(scores, (torch.Tensor, np.ndarray)) + is_numpy = False + if isinstance(boxes, np.ndarray): + is_numpy = True + boxes = torch.from_numpy(boxes) + if isinstance(scores, np.ndarray): + scores = torch.from_numpy(scores) + assert boxes.size(1) == 4 + assert boxes.size(0) == scores.size(0) + assert offset in (0, 1) + method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2} + assert method in method_dict.keys() + + if torch.__version__ == 'parrots': + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + areas = (x2 - x1 + offset) * (y2 - y1 + offset) + indata_list = [boxes.cpu(), scores.cpu(), areas.cpu()] + indata_dict = { + 'iou_threshold': float(iou_threshold), + 'sigma': float(sigma), + 'min_score': min_score, + 'method': method_dict[method], + 'offset': int(offset) + } + dets, inds, num_out = ext_module.softnms(*indata_list, **indata_dict) + inds = inds[:num_out] + else: + dets = boxes.new_empty((boxes.size(0), 5), device='cpu') + inds = ext_module.softnms( + boxes.cpu(), + scores.cpu(), + dets.cpu(), + iou_threshold=float(iou_threshold), + sigma=float(sigma), + min_score=float(min_score), + method=method_dict[method], + offset=int(offset)) + dets = dets[:inds.size(0)] + if is_numpy: + dets = dets.cpu().numpy() + inds = inds.cpu().numpy() + return dets, inds + else: + return dets.to(device=boxes.device), inds.to(device=boxes.device) + + +def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): + """Performs non-maximum suppression in a batched fashion. + + Modified from https://github.com/pytorch/vision/blob + /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. + In order to perform NMS independently per class, we add an offset to all + the boxes. The offset is dependent only on the class idx, and is large + enough so that boxes from different classes do not overlap. + + Arguments: + boxes (torch.Tensor): boxes in shape (N, 4). + scores (torch.Tensor): scores in shape (N, ). + idxs (torch.Tensor): each index value correspond to a bbox cluster, + and NMS will not be applied between elements of different idxs, + shape (N, ). + nms_cfg (dict): specify nms type and other parameters like iou_thr. + Possible keys includes the following. + + - iou_thr (float): IoU threshold used for NMS. + - split_thr (float): threshold number of boxes. In some cases the + number of boxes is large (e.g., 200k). To avoid OOM during + training, the users could set `split_thr` to a small value. + If the number of boxes is greater than the threshold, it will + perform NMS on each group of boxes separately and sequentially. + Defaults to 10000. + class_agnostic (bool): if true, nms is class agnostic, + i.e. IoU thresholding happens over all boxes, + regardless of the predicted class. + + Returns: + tuple: kept dets and indice. + """ + nms_cfg_ = nms_cfg.copy() + class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic) + if class_agnostic: + boxes_for_nms = boxes + else: + max_coordinate = boxes.max() + offsets = idxs.to(boxes) * (max_coordinate + 1) + boxes_for_nms = boxes + offsets[:, None] + + nms_type = nms_cfg_.pop('type', 'nms') + nms_op = eval(nms_type) + + split_thr = nms_cfg_.pop('split_thr', 10000) + if len(boxes_for_nms) < split_thr: + dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_) + boxes = boxes[keep] + scores = dets[:, -1] + else: + total_mask = scores.new_zeros(scores.size(), dtype=torch.bool) + for id in torch.unique(idxs): + mask = (idxs == id).nonzero(as_tuple=False).view(-1) + dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_) + total_mask[mask[keep]] = True + + keep = total_mask.nonzero(as_tuple=False).view(-1) + keep = keep[scores[keep].argsort(descending=True)] + boxes = boxes[keep] + scores = scores[keep] + + return torch.cat([boxes, scores[:, None]], -1), keep + + +def nms_match(dets, iou_threshold): + """Matched dets into different groups by NMS. + + NMS match is Similar to NMS but when a bbox is suppressed, nms match will + record the indice of suppressed bbox and form a group with the indice of + kept bbox. In each group, indice is sorted as score order. + + Arguments: + dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5). + iou_thr (float): IoU thresh for NMS. + + Returns: + List[torch.Tensor | np.ndarray]: The outer list corresponds different + matched group, the inner Tensor corresponds the indices for a group + in score order. + """ + if dets.shape[0] == 0: + matched = [] + else: + assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \ + f'but get {dets.shape}' + if isinstance(dets, torch.Tensor): + dets_t = dets.detach().cpu() + else: + dets_t = torch.from_numpy(dets) + matched = ext_module.nms_match(dets_t, float(iou_threshold)) + + if isinstance(dets, torch.Tensor): + return [dets.new_tensor(m, dtype=torch.long) for m in matched] + else: + return [np.array(m, dtype=np.int) for m in matched] diff --git a/mmcv/mmcv/ops/point_sample.py b/mmcv/mmcv/ops/point_sample.py new file mode 100644 index 0000000..c5f59d3 --- /dev/null +++ b/mmcv/mmcv/ops/point_sample.py @@ -0,0 +1,214 @@ +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.utils import _pair + + +def normalize(grid): + """Normalize input grid from [-1, 1] to [0, 1] + Args: + grid (Tensor): The grid to be normalize, range [-1, 1]. + Returns: + Tensor: Normalized grid, range [0, 1]. + """ + + return (grid + 1.0) / 2.0 + + +def denormalize(grid): + """Denormalize input grid from range [0, 1] to [-1, 1] + Args: + grid (Tensor): The grid to be denormalize, range [0, 1]. + Returns: + Tensor: Denormalized grid, range [-1, 1]. + """ + + return grid * 2.0 - 1.0 + + +def generate_grid(num_grid, size, device): + """Generate regular square grid of points in [0, 1] x [0, 1] coordinate + space. + + Args: + num_grid (int): The number of grids to sample, one for each region. + size (tuple(int, int)): The side size of the regular grid. + device (torch.device): Desired device of returned tensor. + + Returns: + (torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that + contains coordinates for the regular grids. + """ + + affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device) + grid = F.affine_grid( + affine_trans, torch.Size((1, 1, *size)), align_corners=False) + grid = normalize(grid) + return grid.view(1, -1, 2).expand(num_grid, -1, -1) + + +def rel_roi_point_to_abs_img_point(rois, rel_roi_points): + """Convert roi based relative point coordinates to image based absolute + point coordinates. + + Args: + rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) + rel_roi_points (Tensor): Point coordinates inside RoI, relative to + RoI, location, range (0, 1), shape (N, P, 2) + Returns: + Tensor: Image based absolute point coordinates, shape (N, P, 2) + """ + + with torch.no_grad(): + assert rel_roi_points.size(0) == rois.size(0) + assert rois.dim() == 2 + assert rel_roi_points.dim() == 3 + assert rel_roi_points.size(2) == 2 + # remove batch idx + if rois.size(1) == 5: + rois = rois[:, 1:] + abs_img_points = rel_roi_points.clone() + abs_img_points[:, :, 0] = abs_img_points[:, :, 0] * ( + rois[:, None, 2] - rois[:, None, 0]) + abs_img_points[:, :, 1] = abs_img_points[:, :, 1] * ( + rois[:, None, 3] - rois[:, None, 1]) + abs_img_points[:, :, 0] += rois[:, None, 0] + abs_img_points[:, :, 1] += rois[:, None, 1] + return abs_img_points + + +def abs_img_point_to_rel_img_point(abs_img_points, + img_shape, + spatial_scale=1.): + """Convert image based absolute point coordinates to image based relative + coordinates for sampling. + + Args: + abs_img_points (Tensor): Image based absolute point coordinates, + shape (N, P, 2) + img_shape (tuple): (height, width) of image or feature map. + spatial_scale (float): Scale points by this factor. Default: 1. + + Returns: + Tensor: Image based relative point coordinates for sampling, + shape (N, P, 2) + """ + + assert isinstance(img_shape, tuple) and len(img_shape) == 2 + h, w = img_shape + scale = torch.tensor([w, h], + dtype=torch.float, + device=abs_img_points.device) + scale = scale.view(1, 1, 2) + rel_img_points = abs_img_points / scale * spatial_scale + + return rel_img_points + + +def rel_roi_point_to_rel_img_point(rois, + rel_roi_points, + img_shape, + spatial_scale=1.): + """Convert roi based relative point coordinates to image based absolute + point coordinates. + + Args: + rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) + rel_roi_points (Tensor): Point coordinates inside RoI, relative to + RoI, location, range (0, 1), shape (N, P, 2) + img_shape (tuple): (height, width) of image or feature map. + spatial_scale (float): Scale points by this factor. Default: 1. + + Returns: + Tensor: Image based relative point coordinates for sampling, + shape (N, P, 2) + """ + + abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points) + rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img_shape, + spatial_scale) + + return rel_img_point + + +def point_sample(input, points, align_corners=False, **kwargs): + """A wrapper around :func:`grid_sample` to support 3D point_coords tensors + Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to + lie inside ``[0, 1] x [0, 1]`` square. + + Args: + input (Tensor): Feature map, shape (N, C, H, W). + points (Tensor): Image based absolute point coordinates (normalized), + range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2). + align_corners (bool): Whether align_corners. Default: False + + Returns: + Tensor: Features of `point` on `input`, shape (N, C, P) or + (N, C, Hgrid, Wgrid). + """ + + add_dim = False + if points.dim() == 3: + add_dim = True + points = points.unsqueeze(2) + output = F.grid_sample( + input, denormalize(points), align_corners=align_corners, **kwargs) + if add_dim: + output = output.squeeze(3) + return output + + +class SimpleRoIAlign(nn.Module): + + def __init__(self, output_size, spatial_scale, aligned=True): + """Simple RoI align in PointRend, faster than standard RoIAlign. + + Args: + output_size (tuple[int]): h, w + spatial_scale (float): scale the input boxes by this number + aligned (bool): if False, use the legacy implementation in + MMDetection, align_corners=True will be used in F.grid_sample. + If True, align the results more perfectly. + """ + + super(SimpleRoIAlign, self).__init__() + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + # to be consistent with other RoI ops + self.use_torchvision = False + self.aligned = aligned + + def forward(self, features, rois): + + num_imgs = features.size(0) + num_rois = rois.size(0) + rel_roi_points = generate_grid( + num_rois, self.output_size, device=rois.device) + + point_feats = [] + for batch_ind in range(num_imgs): + # unravel batch dim + feat = features[batch_ind].unsqueeze(0) + inds = (rois[:, 0].long() == batch_ind) + if inds.any(): + rel_img_points = rel_roi_point_to_rel_img_point( + rois[inds], rel_roi_points[inds], feat.shape[2:], + self.spatial_scale).unsqueeze(0) + point_feat = point_sample( + feat, rel_img_points, align_corners=not self.aligned) + point_feat = point_feat.squeeze(0).transpose(0, 1) + point_feats.append(point_feat) + + channels = features.size(1) + roi_feats = torch.cat(point_feats, dim=0) + roi_feats = roi_feats.reshape(num_rois, channels, *self.output_size) + + return roi_feats + + def __repr__(self): + format_str = self.__class__.__name__ + format_str += '(output_size={}, spatial_scale={}'.format( + self.output_size, self.spatial_scale) + return format_str diff --git a/mmcv/mmcv/ops/psa_mask.py b/mmcv/mmcv/ops/psa_mask.py new file mode 100644 index 0000000..0652594 --- /dev/null +++ b/mmcv/mmcv/ops/psa_mask.py @@ -0,0 +1,89 @@ +# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa +from torch import nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['psamask_forward', 'psamask_backward']) + + +class PSAMaskFunction(Function): + + @staticmethod + def symbolic(g, input, psa_type, mask_size): + return g.op( + 'MMCVPSAMask', input, psa_type=psa_type, mask_size=mask_size) + + @staticmethod + def forward(ctx, input, psa_type, mask_size): + ctx.psa_type = psa_type + ctx.mask_size = _pair(mask_size) + ctx.save_for_backward(input) + + h_mask, w_mask = ctx.mask_size + batch_size, channels, h_feature, w_feature = input.size() + assert channels == h_mask * w_mask + output = input.new_zeros( + (batch_size, h_feature * w_feature, h_feature, w_feature)) + + ext_module.psamask_forward( + input, + output, + psa_type=psa_type, + num_=batch_size, + h_feature=h_feature, + w_feature=w_feature, + h_mask=h_mask, + w_mask=w_mask, + half_h_mask=(h_mask - 1) // 2, + half_w_mask=(w_mask - 1) // 2) + return output + + @staticmethod + def backward(ctx, grad_output): + input = ctx.saved_tensors[0] + psa_type = ctx.psa_type + h_mask, w_mask = ctx.mask_size + batch_size, channels, h_feature, w_feature = input.size() + grad_input = grad_output.new_zeros( + (batch_size, channels, h_feature, w_feature)) + ext_module.psamask_backward( + grad_output, + grad_input, + psa_type=psa_type, + num_=batch_size, + h_feature=h_feature, + w_feature=w_feature, + h_mask=h_mask, + w_mask=w_mask, + half_h_mask=(h_mask - 1) // 2, + half_w_mask=(w_mask - 1) // 2) + return grad_input, None, None, None + + +psa_mask = PSAMaskFunction.apply + + +class PSAMask(nn.Module): + + def __init__(self, psa_type, mask_size=None): + super(PSAMask, self).__init__() + assert psa_type in ['collect', 'distribute'] + if psa_type == 'collect': + psa_type_enum = 0 + else: + psa_type_enum = 1 + self.psa_type_enum = psa_type_enum + self.mask_size = mask_size + self.psa_type = psa_type + + def forward(self, input): + return psa_mask(input, self.psa_type_enum, self.mask_size) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(psa_type={self.psa_type}, ' + s += f'mask_size={self.mask_size})' + return s diff --git a/mmcv/mmcv/ops/roi_align.py b/mmcv/mmcv/ops/roi_align.py new file mode 100644 index 0000000..dd62543 --- /dev/null +++ b/mmcv/mmcv/ops/roi_align.py @@ -0,0 +1,187 @@ +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import deprecated_api_warning, ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['roi_align_forward', 'roi_align_backward']) + + +class RoIAlignFunction(Function): + + @staticmethod + def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, + pool_mode, aligned): + return g.op( + 'MMCVRoIAlign', + input, + rois, + aligned_height=output_size[0], + aligned_weight=output_size[1], + spatial_scale=spatial_scale, + sampling_ratio=sampling_ratio, + pool_mode=pool_mode, + aligned=aligned) + + @staticmethod + def forward(ctx, + input, + rois, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + pool_mode='avg', + aligned=True): + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.sampling_ratio = sampling_ratio + assert pool_mode in ('max', 'avg') + ctx.pool_mode = 0 if pool_mode == 'max' else 1 + ctx.aligned = aligned + ctx.input_shape = input.size() + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + if ctx.pool_mode == 0: + argmax_y = input.new_zeros(output_shape) + argmax_x = input.new_zeros(output_shape) + else: + argmax_y = input.new_zeros(0) + argmax_x = input.new_zeros(0) + + ext_module.roi_align_forward( + input, + rois, + output, + argmax_y, + argmax_x, + aligned_height=ctx.output_size[0], + aligned_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + pool_mode=ctx.pool_mode, + aligned=ctx.aligned) + + ctx.save_for_backward(rois, argmax_y, argmax_x) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + rois, argmax_y, argmax_x = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + + ext_module.roi_align_backward( + grad_output, + rois, + argmax_y, + argmax_x, + grad_input, + aligned_height=ctx.output_size[0], + aligned_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + pool_mode=ctx.pool_mode, + aligned=ctx.aligned) + return grad_input, None, None, None, None, None, None + + +roi_align = RoIAlignFunction.apply + + +class RoIAlign(nn.Module): + """RoI align pooling layer. + + Args: + output_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sampling_ratio (int): number of inputs samples to take for each + output sample. 0 to take samples densely for current models. + pool_mode (str, 'avg' or 'max'): pooling mode in each bin. + aligned (bool): if False, use the legacy implementation in + MMDetection. If True, align the results more perfectly. + use_torchvision (bool): whether to use roi_align from torchvision. + + Note: + The implementation of RoIAlign when aligned=True is modified from + https://github.com/facebookresearch/detectron2/ + + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel + indices (in our pixel model) are computed by floor(c - 0.5) and + ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete + indices [0] and [1] (which are sampled from the underlying signal + at continuous coordinates 0.5 and 1.5). But the original roi_align + (aligned=False) does not subtract the 0.5 when computing + neighboring pixel indices and therefore it uses pixels with a + slightly incorrect alignment (relative to our pixel model) when + performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; + + The difference does not make a difference to the model's + performance if ROIAlign is used together with conv layers. + """ + + @deprecated_api_warning( + { + 'out_size': 'output_size', + 'sample_num': 'sampling_ratio' + }, + cls_name='RoIAlign') + def __init__(self, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + pool_mode='avg', + aligned=True, + use_torchvision=False): + super(RoIAlign, self).__init__() + + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + self.sampling_ratio = int(sampling_ratio) + self.pool_mode = pool_mode + self.aligned = aligned + self.use_torchvision = use_torchvision + + def forward(self, input, rois): + """ + Args: + input: NCHW images + rois: Bx5 boxes. First column is the index into N.\ + The other 4 columns are xyxy. + """ + if self.use_torchvision: + from torchvision.ops import roi_align as tv_roi_align + if 'aligned' in tv_roi_align.__code__.co_varnames: + return tv_roi_align(input, rois, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.aligned) + else: + if self.aligned: + rois -= rois.new_tensor([0.] + + [0.5 / self.spatial_scale] * 4) + return tv_roi_align(input, rois, self.output_size, + self.spatial_scale, self.sampling_ratio) + else: + return roi_align(input, rois, self.output_size, self.spatial_scale, + self.sampling_ratio, self.pool_mode, self.aligned) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(output_size={self.output_size}, ' + s += f'spatial_scale={self.spatial_scale}, ' + s += f'sampling_ratio={self.sampling_ratio}, ' + s += f'pool_mode={self.pool_mode}, ' + s += f'aligned={self.aligned}, ' + s += f'use_torchvision={self.use_torchvision})' + return s diff --git a/mmcv/mmcv/ops/roi_pool.py b/mmcv/mmcv/ops/roi_pool.py new file mode 100644 index 0000000..bb3122f --- /dev/null +++ b/mmcv/mmcv/ops/roi_pool.py @@ -0,0 +1,86 @@ +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['roi_pool_forward', 'roi_pool_backward']) + + +class RoIPoolFunction(Function): + + @staticmethod + def symbolic(g, input, rois, output_size, spatial_scale): + return g.op( + 'MMCVRoIPool', + input, + rois, + pooled_height=output_size[0], + pooled_width=output_size[1], + spatial_scale=spatial_scale) + + @staticmethod + def forward(ctx, input, rois, output_size, spatial_scale=1.0): + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.input_shape = input.size() + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + argmax = input.new_zeros(output_shape, dtype=torch.int) + + ext_module.roi_pool_forward( + input, + rois, + output, + argmax, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale) + + ctx.save_for_backward(rois, argmax) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + rois, argmax = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + + ext_module.roi_pool_backward( + grad_output, + rois, + argmax, + grad_input, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale) + + return grad_input, None, None, None + + +roi_pool = RoIPoolFunction.apply + + +class RoIPool(nn.Module): + + def __init__(self, output_size, spatial_scale=1.0): + super(RoIPool, self).__init__() + + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + + def forward(self, input, rois): + return roi_pool(input, rois, self.output_size, self.spatial_scale) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(output_size={self.output_size}, ' + s += f'spatial_scale={self.spatial_scale})' + return s diff --git a/mmcv/mmcv/ops/saconv.py b/mmcv/mmcv/ops/saconv.py new file mode 100644 index 0000000..cd7eea1 --- /dev/null +++ b/mmcv/mmcv/ops/saconv.py @@ -0,0 +1,132 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from mmcv.cnn import CONV_LAYERS, ConvAWS2d, constant_init +from mmcv.ops.deform_conv import deform_conv2d +from mmcv.utils import TORCH_VERSION + + +@CONV_LAYERS.register_module(name='SAC') +class SAConv2d(ConvAWS2d): + """SAC (Switchable Atrous Convolution) + + This is an implementation of SAC in DetectoRS + (https://arxiv.org/pdf/2006.02334.pdf). + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + padding_mode (string, optional): ``'zeros'``, ``'reflect'``, + ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the + output. Default: ``True`` + use_deform: If ``True``, replace convolution with deformable + convolution. Default: ``False``. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + use_deform=False): + super().__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.use_deform = use_deform + self.switch = nn.Conv2d( + self.in_channels, 1, kernel_size=1, stride=stride, bias=True) + self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size())) + self.pre_context = nn.Conv2d( + self.in_channels, self.in_channels, kernel_size=1, bias=True) + self.post_context = nn.Conv2d( + self.out_channels, self.out_channels, kernel_size=1, bias=True) + if self.use_deform: + self.offset_s = nn.Conv2d( + self.in_channels, + 18, + kernel_size=3, + padding=1, + stride=stride, + bias=True) + self.offset_l = nn.Conv2d( + self.in_channels, + 18, + kernel_size=3, + padding=1, + stride=stride, + bias=True) + self.init_weights() + + def init_weights(self): + constant_init(self.switch, 0, bias=1) + self.weight_diff.data.zero_() + constant_init(self.pre_context, 0) + constant_init(self.post_context, 0) + if self.use_deform: + constant_init(self.offset_s, 0) + constant_init(self.offset_l, 0) + + def forward(self, x): + # pre-context + avg_x = F.adaptive_avg_pool2d(x, output_size=1) + avg_x = self.pre_context(avg_x) + avg_x = avg_x.expand_as(x) + x = x + avg_x + # switch + avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect') + avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0) + switch = self.switch(avg_x) + # sac + weight = self._get_weight(self.weight) + if self.use_deform: + offset = self.offset_s(avg_x) + out_s = deform_conv2d(x, offset, weight, self.stride, self.padding, + self.dilation, self.groups, 1) + else: + if TORCH_VERSION < '1.5.0' or TORCH_VERSION == 'parrots': + out_s = super().conv2d_forward(x, weight) + else: + out_s = super()._conv_forward(x, weight) + ori_p = self.padding + ori_d = self.dilation + self.padding = tuple(3 * p for p in self.padding) + self.dilation = tuple(3 * d for d in self.dilation) + weight = weight + self.weight_diff + if self.use_deform: + offset = self.offset_l(avg_x) + out_l = deform_conv2d(x, offset, weight, self.stride, self.padding, + self.dilation, self.groups, 1) + else: + if TORCH_VERSION < '1.5.0' or TORCH_VERSION == 'parrots': + out_l = super().conv2d_forward(x, weight) + else: + out_l = super()._conv_forward(x, weight) + out = switch * out_s + (1 - switch) * out_l + self.padding = ori_p + self.dilation = ori_d + # post-context + avg_x = F.adaptive_avg_pool2d(out, output_size=1) + avg_x = self.post_context(avg_x) + avg_x = avg_x.expand_as(out) + out = out + avg_x + return out diff --git a/mmcv/mmcv/ops/sync_bn.py b/mmcv/mmcv/ops/sync_bn.py new file mode 100644 index 0000000..b2499f9 --- /dev/null +++ b/mmcv/mmcv/ops/sync_bn.py @@ -0,0 +1,195 @@ +import torch +import torch.distributed as dist +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.module import Module +from torch.nn.parameter import Parameter + +from mmcv.cnn import NORM_LAYERS +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'sync_bn_forward_mean', 'sync_bn_forward_var', 'sync_bn_forward_output', + 'sync_bn_backward_param', 'sync_bn_backward_data' +]) + + +class SyncBatchNormFunction(Function): + + @staticmethod + def symbolic(g, input, running_mean, running_var, weight, bias, momentum, + eps, group, group_size): + return g.op( + 'MMCVSyncBatchNorm', + input, + running_mean, + running_var, + weight, + bias, + momentum=momentum, + eps=eps, + group=group, + group_size=group_size) + + @staticmethod + def forward(self, input, running_mean, running_var, weight, bias, momentum, + eps, group, group_size): + self.momentum = momentum + self.eps = eps + self.group = group + self.group_size = group_size + + assert isinstance( + input, (torch.HalfTensor, torch.FloatTensor, + torch.cuda.HalfTensor, torch.cuda.FloatTensor)), \ + f'only support Half or Float Tensor, but {input.type()}' + output = torch.empty_like(input) + input3d = input.view(input.size(0), input.size(1), -1) + output3d = output.view_as(input3d) + + mean = torch.empty( + input3d.size(1), dtype=torch.float, device=input3d.device) + var = torch.empty( + input3d.size(1), dtype=torch.float, device=input3d.device) + norm = torch.empty_like( + input3d, dtype=torch.float, device=input3d.device) + std = torch.empty( + input3d.size(1), dtype=torch.float, device=input3d.device) + + ext_module.sync_bn_forward_mean(input3d, mean) + if self.group_size > 1: + dist.all_reduce(mean, group=self.group) + mean /= self.group_size + ext_module.sync_bn_forward_var(input3d, mean, var) + if self.group_size > 1: + dist.all_reduce(var, group=self.group) + var /= self.group_size + ext_module.sync_bn_forward_output( + input3d, + mean, + var, + weight, + bias, + running_mean, + running_var, + norm, + std, + output3d, + eps=self.eps, + momentum=self.momentum, + group_size=self.group_size) + self.save_for_backward(norm, std, weight) + return output + + @staticmethod + @once_differentiable + def backward(self, grad_output): + norm, std, weight = self.saved_tensors + grad_weight = torch.empty_like(weight) + grad_bias = torch.empty_like(weight) + grad_input = torch.empty_like(grad_output) + grad_output3d = grad_output.view( + grad_output.size(0), grad_output.size(1), -1) + grad_input3d = grad_input.view_as(grad_output3d) + ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight, + grad_bias) + # all reduce + if self.group_size > 1: + dist.all_reduce(grad_weight, group=self.group) + dist.all_reduce(grad_bias, group=self.group) + grad_weight /= self.group_size + grad_bias /= self.group_size + ext_module.sync_bn_backward_data(grad_output3d, weight, grad_weight, + grad_bias, norm, std, grad_input3d) + return grad_input, None, None, grad_weight, grad_bias, \ + None, None, None, None + + +@NORM_LAYERS.register_module(name='MMSyncBN') +class SyncBatchNorm(Module): + + def __init__(self, + num_features, + eps=1e-5, + momentum=0.1, + affine=True, + track_running_stats=True, + group=None): + super(SyncBatchNorm, self).__init__() + self.num_features = num_features + self.eps = eps + self.momentum = momentum + self.affine = affine + self.track_running_stats = track_running_stats + group = dist.group.WORLD if group is None else group + self.group = group + self.group_size = dist.get_world_size(group) + if self.affine: + self.weight = Parameter(torch.Tensor(num_features)) + self.bias = Parameter(torch.Tensor(num_features)) + else: + self.register_parameter('weight', None) + self.register_parameter('bias', None) + if self.track_running_stats: + self.register_buffer('running_mean', torch.zeros(num_features)) + self.register_buffer('running_var', torch.ones(num_features)) + self.register_buffer('num_batches_tracked', + torch.tensor(0, dtype=torch.long)) + else: + self.register_buffer('running_mean', None) + self.register_buffer('running_var', None) + self.register_buffer('num_batches_tracked', None) + self.reset_parameters() + + def reset_running_stats(self): + if self.track_running_stats: + self.running_mean.zero_() + self.running_var.fill_(1) + self.num_batches_tracked.zero_() + + def reset_parameters(self): + self.reset_running_stats() + if self.affine: + self.weight.data.uniform_() # pytorch use ones_() + self.bias.data.zero_() + + def forward(self, input): + if input.dim() < 2: + raise ValueError( + f'expected at least 2D input, got {input.dim()}D input') + if self.momentum is None: + exponential_average_factor = 0.0 + else: + exponential_average_factor = self.momentum + + if self.training and self.track_running_stats: + if self.num_batches_tracked is not None: + self.num_batches_tracked += 1 + if self.momentum is None: # use cumulative moving average + exponential_average_factor = 1.0 / float( + self.num_batches_tracked) + else: # use exponential moving average + exponential_average_factor = self.momentum + + if self.training or not self.track_running_stats: + return SyncBatchNormFunction.apply(input, self.running_mean, + self.running_var, self.weight, + self.bias, + exponential_average_factor, + self.eps, self.group, + self.group_size) + else: + return F.batch_norm(input, self.running_mean, self.running_var, + self.weight, self.bias, False, + exponential_average_factor, self.eps) + + def __repr__(self): + s = self.__class__.__name__ + s += f'({self.num_features}, ' + s += f'eps={self.eps}, ' + s += f'momentum={self.momentum}, ' + s += f'affine={self.affine}, ' + s += f'track_running_stats={self.track_running_stats}, ' + s += f'group_size={self.group_size})' + return s diff --git a/mmcv/mmcv/ops/tin_shift.py b/mmcv/mmcv/ops/tin_shift.py new file mode 100644 index 0000000..5560af4 --- /dev/null +++ b/mmcv/mmcv/ops/tin_shift.py @@ -0,0 +1,62 @@ +# Code reference from "Temporal Interlacing Network" +# https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py +# Hao Shao, Shengju Qian, Yu Liu +# shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk + +import torch +import torch.nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['tin_shift_forward', 'tin_shift_backward']) + + +class TINShiftFunction(Function): + + @staticmethod + def forward(ctx, input, shift): + + ctx.save_for_backward(shift) + + out = torch.zeros_like(input) + ext_module.tin_shift_forward(input, shift, out) + + return out + + @staticmethod + def backward(ctx, grad_output): + + shift = ctx.saved_tensors[0] + data_grad_input = grad_output.new(*grad_output.size()).zero_() + shift_grad_input = shift.new(*shift.size()).zero_() + ext_module.tin_shift_backward(grad_output, shift, data_grad_input) + + return data_grad_input, shift_grad_input + + +tin_shift = TINShiftFunction.apply + + +class TINShift(nn.Module): + """Temporal Interlace Shift. + + Temporal Interlace shift is a differentiable temporal-wise frame shifting + which is proposed in "Temporal Interlacing Network" + + Please refer to https://arxiv.org/abs/2001.06499 for more details. + Code is modified from https://github.com/mit-han-lab/temporal-shift-module + """ + + def forward(self, input, shift): + """Perform temporal interlace shift. + + Args: + input (Tensor): Feature map with shape [N, num_segments, C, H * W]. + shift (Tensor): Shift tensor with shape [N, num_segments]. + + Returns: + Feature map after temporal interlace shift. + """ + return tin_shift(input, shift) diff --git a/mmcv/mmcv/parallel/__init__.py b/mmcv/mmcv/parallel/__init__.py new file mode 100644 index 0000000..98c2974 --- /dev/null +++ b/mmcv/mmcv/parallel/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .collate import collate +from .data_container import DataContainer +from .data_parallel import MMDataParallel +from .distributed import MMDistributedDataParallel +from .registry import MODULE_WRAPPERS +from .scatter_gather import scatter, scatter_kwargs +from .utils import is_module_wrapper + +__all__ = [ + 'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel', + 'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS' +] diff --git a/mmcv/mmcv/parallel/_functions.py b/mmcv/mmcv/parallel/_functions.py new file mode 100644 index 0000000..4cd02fb --- /dev/null +++ b/mmcv/mmcv/parallel/_functions.py @@ -0,0 +1,79 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import torch +from torch.nn.parallel._functions import _get_stream + + +def scatter(input, devices, streams=None): + """Scatters tensor across multiple GPUs.""" + if streams is None: + streams = [None] * len(devices) + + if isinstance(input, list): + chunk_size = (len(input) - 1) // len(devices) + 1 + outputs = [ + scatter(input[i], [devices[i // chunk_size]], + [streams[i // chunk_size]]) for i in range(len(input)) + ] + return outputs + elif isinstance(input, torch.Tensor): + output = input.contiguous() + # TODO: copy to a pinned buffer first (if copying from CPU) + stream = streams[0] if output.numel() > 0 else None + if devices != [-1]: + with torch.cuda.device(devices[0]), torch.cuda.stream(stream): + output = output.cuda(devices[0], non_blocking=True) + else: + # unsquzee the first dimension thus the tensor's shape is the + # same as those scattered with GPU. + output = output.unsqueeze(0) + return output + else: + raise Exception(f'Unknown type {type(input)}.') + + +def synchronize_stream(output, devices, streams): + if isinstance(output, list): + chunk_size = len(output) // len(devices) + for i in range(len(devices)): + for j in range(chunk_size): + synchronize_stream(output[i * chunk_size + j], [devices[i]], + [streams[i]]) + elif isinstance(output, torch.Tensor): + if output.numel() != 0: + with torch.cuda.device(devices[0]): + main_stream = torch.cuda.current_stream() + main_stream.wait_stream(streams[0]) + output.record_stream(main_stream) + else: + raise Exception(f'Unknown type {type(output)}.') + + +def get_input_device(input): + if isinstance(input, list): + for item in input: + input_device = get_input_device(item) + if input_device != -1: + return input_device + return -1 + elif isinstance(input, torch.Tensor): + return input.get_device() if input.is_cuda else -1 + else: + raise Exception(f'Unknown type {type(input)}.') + + +class Scatter: + + @staticmethod + def forward(target_gpus, input): + input_device = get_input_device(input) + streams = None + if input_device == -1 and target_gpus != [-1]: + # Perform CPU to GPU copies in a background stream + streams = [_get_stream(device) for device in target_gpus] + + outputs = scatter(input, target_gpus, streams) + # Synchronize with the copy stream + if streams is not None: + synchronize_stream(outputs, target_gpus, streams) + + return tuple(outputs) diff --git a/mmcv/mmcv/parallel/collate.py b/mmcv/mmcv/parallel/collate.py new file mode 100644 index 0000000..21155cb --- /dev/null +++ b/mmcv/mmcv/parallel/collate.py @@ -0,0 +1,84 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from collections.abc import Mapping, Sequence + +import torch +import torch.nn.functional as F +from torch.utils.data.dataloader import default_collate + +from .data_container import DataContainer + + +def collate(batch, samples_per_gpu=1): + """Puts each data field into a tensor/DataContainer with outer dimension + batch size. + + Extend default_collate to add support for + :type:`~mmcv.parallel.DataContainer`. There are 3 cases. + + 1. cpu_only = True, e.g., meta data + 2. cpu_only = False, stack = True, e.g., images tensors + 3. cpu_only = False, stack = False, e.g., gt bboxes + """ + + if not isinstance(batch, Sequence): + raise TypeError(f'{batch.dtype} is not supported.') + + if isinstance(batch[0], DataContainer): + stacked = [] + if batch[0].cpu_only: + for i in range(0, len(batch), samples_per_gpu): + stacked.append( + [sample.data for sample in batch[i:i + samples_per_gpu]]) + return DataContainer( + stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) + elif batch[0].stack: + for i in range(0, len(batch), samples_per_gpu): + assert isinstance(batch[i].data, torch.Tensor) + + if batch[i].pad_dims is not None: + ndim = batch[i].dim() + assert ndim > batch[i].pad_dims + max_shape = [0 for _ in range(batch[i].pad_dims)] + for dim in range(1, batch[i].pad_dims + 1): + max_shape[dim - 1] = batch[i].size(-dim) + for sample in batch[i:i + samples_per_gpu]: + for dim in range(0, ndim - batch[i].pad_dims): + assert batch[i].size(dim) == sample.size(dim) + for dim in range(1, batch[i].pad_dims + 1): + max_shape[dim - 1] = max(max_shape[dim - 1], + sample.size(-dim)) + padded_samples = [] + for sample in batch[i:i + samples_per_gpu]: + pad = [0 for _ in range(batch[i].pad_dims * 2)] + for dim in range(1, batch[i].pad_dims + 1): + pad[2 * dim - + 1] = max_shape[dim - 1] - sample.size(-dim) + padded_samples.append( + F.pad( + sample.data, pad, value=sample.padding_value)) + stacked.append(default_collate(padded_samples)) + elif batch[i].pad_dims is None: + stacked.append( + default_collate([ + sample.data + for sample in batch[i:i + samples_per_gpu] + ])) + else: + raise ValueError( + 'pad_dims should be either None or integers (1-3)') + + else: + for i in range(0, len(batch), samples_per_gpu): + stacked.append( + [sample.data for sample in batch[i:i + samples_per_gpu]]) + return DataContainer(stacked, batch[0].stack, batch[0].padding_value) + elif isinstance(batch[0], Sequence): + transposed = zip(*batch) + return [collate(samples, samples_per_gpu) for samples in transposed] + elif isinstance(batch[0], Mapping): + return { + key: collate([d[key] for d in batch], samples_per_gpu) + for key in batch[0] + } + else: + return default_collate(batch) diff --git a/mmcv/mmcv/parallel/data_container.py b/mmcv/mmcv/parallel/data_container.py new file mode 100644 index 0000000..7511c04 --- /dev/null +++ b/mmcv/mmcv/parallel/data_container.py @@ -0,0 +1,89 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import functools + +import torch + + +def assert_tensor_type(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not isinstance(args[0].data, torch.Tensor): + raise AttributeError( + f'{args[0].__class__.__name__} has no attribute ' + f'{func.__name__} for type {args[0].datatype}') + return func(*args, **kwargs) + + return wrapper + + +class DataContainer: + """A container for any type of objects. + + Typically tensors will be stacked in the collate function and sliced along + some dimension in the scatter function. This behavior has some limitations. + 1. All tensors have to be the same size. + 2. Types are limited (numpy array or Tensor). + + We design `DataContainer` and `MMDataParallel` to overcome these + limitations. The behavior can be either of the following. + + - copy to GPU, pad all tensors to the same size and stack them + - copy to GPU without stacking + - leave the objects as is and pass it to the model + - pad_dims specifies the number of last few dimensions to do padding + """ + + def __init__(self, + data, + stack=False, + padding_value=0, + cpu_only=False, + pad_dims=2): + self._data = data + self._cpu_only = cpu_only + self._stack = stack + self._padding_value = padding_value + assert pad_dims in [None, 1, 2, 3] + self._pad_dims = pad_dims + + def __repr__(self): + return f'{self.__class__.__name__}({repr(self.data)})' + + def __len__(self): + return len(self._data) + + @property + def data(self): + return self._data + + @property + def datatype(self): + if isinstance(self.data, torch.Tensor): + return self.data.type() + else: + return type(self.data) + + @property + def cpu_only(self): + return self._cpu_only + + @property + def stack(self): + return self._stack + + @property + def padding_value(self): + return self._padding_value + + @property + def pad_dims(self): + return self._pad_dims + + @assert_tensor_type + def size(self, *args, **kwargs): + return self.data.size(*args, **kwargs) + + @assert_tensor_type + def dim(self): + return self.data.dim() diff --git a/mmcv/mmcv/parallel/data_parallel.py b/mmcv/mmcv/parallel/data_parallel.py new file mode 100644 index 0000000..c209bcf --- /dev/null +++ b/mmcv/mmcv/parallel/data_parallel.py @@ -0,0 +1,89 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from itertools import chain + +from torch.nn.parallel import DataParallel + +from .scatter_gather import scatter_kwargs + + +class MMDataParallel(DataParallel): + """The DataParallel module that supports DataContainer. + + MMDataParallel has two main differences with PyTorch DataParallel: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data during both GPU and CPU inference. + - It implement two more APIs ``train_step()`` and ``val_step()``. + + Args: + module (:class:`nn.Module`): Module to be encapsulated. + device_ids (list[int]): Device IDS of modules to be scattered to. + Defaults to None when GPU is not available. + output_device (str | int): Device ID for output. Defaults to None. + dim (int): Dimension used to scatter the data. Defaults to 0. + """ + + def __init__(self, *args, dim=0, **kwargs): + super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs) + self.dim = dim + + def forward(self, *inputs, **kwargs): + """Override the original forward function. + + The main difference lies in the CPU inference where the datas in + :class:`DataContainers` will still be gathered. + """ + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module(*inputs[0], **kwargs[0]) + else: + return super().forward(*inputs, **kwargs) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.train_step(*inputs, **kwargs) + + assert len(self.device_ids) == 1, \ + ('MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + 'instead.') + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}') + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.train_step(*inputs[0], **kwargs[0]) + + def val_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.val_step(*inputs, **kwargs) + + assert len(self.device_ids) == 1, \ + ('MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + ' instead.') + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}') + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.val_step(*inputs[0], **kwargs[0]) diff --git a/mmcv/mmcv/parallel/distributed.py b/mmcv/mmcv/parallel/distributed.py new file mode 100644 index 0000000..07771a3 --- /dev/null +++ b/mmcv/mmcv/parallel/distributed.py @@ -0,0 +1,85 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import torch +from torch.nn.parallel.distributed import (DistributedDataParallel, + _find_tensors) + +from mmcv.utils import TORCH_VERSION +from .scatter_gather import scatter_kwargs + + +class MMDistributedDataParallel(DistributedDataParallel): + """The DDP module that supports DataContainer. + + MMDDP has two main differences with PyTorch DDP: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data. + - It implement two APIs ``train_step()`` and ``val_step()``. + """ + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + """train_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.train_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.train_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.train_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if TORCH_VERSION > '1.2': + self.require_forward_param_sync = False + return output + + def val_step(self, *inputs, **kwargs): + """val_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.val_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.val_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.val_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if TORCH_VERSION > '1.2': + self.require_forward_param_sync = False + return output diff --git a/mmcv/mmcv/parallel/distributed_deprecated.py b/mmcv/mmcv/parallel/distributed_deprecated.py new file mode 100644 index 0000000..2a49fa9 --- /dev/null +++ b/mmcv/mmcv/parallel/distributed_deprecated.py @@ -0,0 +1,69 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import torch +import torch.distributed as dist +import torch.nn as nn +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + +from mmcv.utils import TORCH_VERSION +from .registry import MODULE_WRAPPERS +from .scatter_gather import scatter_kwargs + + +@MODULE_WRAPPERS.register_module() +class MMDistributedDataParallel(nn.Module): + + def __init__(self, + module, + dim=0, + broadcast_buffers=True, + bucket_cap_mb=25): + super(MMDistributedDataParallel, self).__init__() + self.module = module + self.dim = dim + self.broadcast_buffers = broadcast_buffers + + self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024 + self._sync_params() + + def _dist_broadcast_coalesced(self, tensors, buffer_size): + for tensors in _take_tensors(tensors, buffer_size): + flat_tensors = _flatten_dense_tensors(tensors) + dist.broadcast(flat_tensors, 0) + for tensor, synced in zip( + tensors, _unflatten_dense_tensors(flat_tensors, tensors)): + tensor.copy_(synced) + + def _sync_params(self): + module_states = list(self.module.state_dict().values()) + if len(module_states) > 0: + self._dist_broadcast_coalesced(module_states, + self.broadcast_bucket_size) + if self.broadcast_buffers: + if TORCH_VERSION < '1.0': + buffers = [b.data for b in self.module._all_buffers()] + else: + buffers = [b.data for b in self.module.buffers()] + if len(buffers) > 0: + self._dist_broadcast_coalesced(buffers, + self.broadcast_bucket_size) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def forward(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + return self.module(*inputs[0], **kwargs[0]) + + def train_step(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + output = self.module.train_step(*inputs[0], **kwargs[0]) + return output + + def val_step(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + output = self.module.val_step(*inputs[0], **kwargs[0]) + return output diff --git a/mmcv/mmcv/parallel/registry.py b/mmcv/mmcv/parallel/registry.py new file mode 100644 index 0000000..6e592c7 --- /dev/null +++ b/mmcv/mmcv/parallel/registry.py @@ -0,0 +1,7 @@ +from torch.nn.parallel import DataParallel, DistributedDataParallel + +from mmcv.utils import Registry + +MODULE_WRAPPERS = Registry('module wrapper') +MODULE_WRAPPERS.register_module(module=DataParallel) +MODULE_WRAPPERS.register_module(module=DistributedDataParallel) diff --git a/mmcv/mmcv/parallel/scatter_gather.py b/mmcv/mmcv/parallel/scatter_gather.py new file mode 100644 index 0000000..78dba14 --- /dev/null +++ b/mmcv/mmcv/parallel/scatter_gather.py @@ -0,0 +1,59 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import torch +from torch.nn.parallel._functions import Scatter as OrigScatter + +from ._functions import Scatter +from .data_container import DataContainer + + +def scatter(inputs, target_gpus, dim=0): + """Scatter inputs to target gpus. + + The only difference from original :func:`scatter` is to add support for + :type:`~mmcv.parallel.DataContainer`. + """ + + def scatter_map(obj): + if isinstance(obj, torch.Tensor): + if target_gpus != [-1]: + return OrigScatter.apply(target_gpus, None, dim, obj) + else: + # for CPU inference we use self-implemented scatter + return Scatter.forward(target_gpus, obj) + if isinstance(obj, DataContainer): + if obj.cpu_only: + return obj.data + else: + return Scatter.forward(target_gpus, obj.data) + if isinstance(obj, tuple) and len(obj) > 0: + return list(zip(*map(scatter_map, obj))) + if isinstance(obj, list) and len(obj) > 0: + out = list(map(list, zip(*map(scatter_map, obj)))) + return out + if isinstance(obj, dict) and len(obj) > 0: + out = list(map(type(obj), zip(*map(scatter_map, obj.items())))) + return out + return [obj for targets in target_gpus] + + # After scatter_map is called, a scatter_map cell will exist. This cell + # has a reference to the actual function scatter_map, which has references + # to a closure that has a reference to the scatter_map cell (because the + # fn is recursive). To avoid this reference cycle, we set the function to + # None, clearing the cell + try: + return scatter_map(inputs) + finally: + scatter_map = None + + +def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): + """Scatter with support for kwargs dictionary.""" + inputs = scatter(inputs, target_gpus, dim) if inputs else [] + kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] + if len(inputs) < len(kwargs): + inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) + elif len(kwargs) < len(inputs): + kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) + inputs = tuple(inputs) + kwargs = tuple(kwargs) + return inputs, kwargs diff --git a/mmcv/mmcv/parallel/utils.py b/mmcv/mmcv/parallel/utils.py new file mode 100644 index 0000000..ac7a0e4 --- /dev/null +++ b/mmcv/mmcv/parallel/utils.py @@ -0,0 +1,20 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .registry import MODULE_WRAPPERS + + +def is_module_wrapper(module): + """Check if a module is a module wrapper. + + The following 3 modules in MMCV (and their subclasses) are regarded as + module wrappers: DataParallel, DistributedDataParallel, + MMDistributedDataParallel (the deprecated version). You may add you own + module wrapper by registering it to mmcv.parallel.MODULE_WRAPPERS. + + Args: + module (nn.Module): The module to be checked. + + Returns: + bool: True if the input module is a module wrapper. + """ + module_wrappers = tuple(MODULE_WRAPPERS.module_dict.values()) + return isinstance(module, module_wrappers) diff --git a/mmcv/mmcv/runner/__init__.py b/mmcv/mmcv/runner/__init__.py new file mode 100644 index 0000000..d05116e --- /dev/null +++ b/mmcv/mmcv/runner/__init__.py @@ -0,0 +1,37 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .base_runner import BaseRunner +from .builder import RUNNERS, build_runner +from .checkpoint import (_load_checkpoint, load_checkpoint, load_state_dict, + save_checkpoint, weights_to_cpu) +from .dist_utils import (allreduce_grads, allreduce_params, get_dist_info, + init_dist, master_only) +from .epoch_based_runner import EpochBasedRunner, Runner +from .fp16_utils import LossScaler, auto_fp16, force_fp32, wrap_fp16_model +from .hooks import (HOOKS, CheckpointHook, ClosureHook, DistSamplerSeedHook, + EMAHook, Fp16OptimizerHook, Hook, IterTimerHook, + LoggerHook, LrUpdaterHook, MlflowLoggerHook, OptimizerHook, + PaviLoggerHook, SyncBuffersHook, TensorboardLoggerHook, + TextLoggerHook, WandbLoggerHook) +from .iter_based_runner import IterBasedRunner, IterLoader +from .log_buffer import LogBuffer +from .optimizer import (OPTIMIZER_BUILDERS, OPTIMIZERS, + DefaultOptimizerConstructor, build_optimizer, + build_optimizer_constructor) +from .priority import Priority, get_priority +from .utils import get_host_info, get_time_str, obj_from_dict, set_random_seed + +__all__ = [ + 'BaseRunner', 'Runner', 'EpochBasedRunner', 'IterBasedRunner', 'LogBuffer', + 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', + 'OptimizerHook', 'IterTimerHook', 'DistSamplerSeedHook', 'LoggerHook', + 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', + 'WandbLoggerHook', 'MlflowLoggerHook', '_load_checkpoint', + 'load_state_dict', 'load_checkpoint', 'weights_to_cpu', 'save_checkpoint', + 'Priority', 'get_priority', 'get_host_info', 'get_time_str', + 'obj_from_dict', 'init_dist', 'get_dist_info', 'master_only', + 'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor', + 'build_optimizer', 'build_optimizer_constructor', 'IterLoader', + 'set_random_seed', 'auto_fp16', 'force_fp32', 'wrap_fp16_model', + 'Fp16OptimizerHook', 'SyncBuffersHook', 'EMAHook', 'build_runner', + 'RUNNERS', 'allreduce_grads', 'allreduce_params', 'LossScaler' +] diff --git a/mmcv/mmcv/runner/base_runner.py b/mmcv/mmcv/runner/base_runner.py new file mode 100644 index 0000000..82cb76a --- /dev/null +++ b/mmcv/mmcv/runner/base_runner.py @@ -0,0 +1,437 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import logging +import os.path as osp +import warnings +from abc import ABCMeta, abstractmethod + +import torch +from torch.optim import Optimizer + +import mmcv +from ..parallel import is_module_wrapper +from .checkpoint import load_checkpoint +from .dist_utils import get_dist_info +from .hooks import HOOKS, Hook, IterTimerHook +from .log_buffer import LogBuffer +from .priority import get_priority +from .utils import get_time_str + + +class BaseRunner(metaclass=ABCMeta): + """The base class of Runner, a training helper for PyTorch. + + All subclasses should implement the following APIs: + + - ``run()`` + - ``train()`` + - ``val()`` + - ``save_checkpoint()`` + + Args: + model (:obj:`torch.nn.Module`): The model to be run. + batch_processor (callable): A callable method that process a data + batch. The interface of this method should be + `batch_processor(model, data, train_mode) -> dict` + optimizer (dict or :obj:`torch.optim.Optimizer`): It can be either an + optimizer (in most cases) or a dict of optimizers (in models that + requires more than one optimizer, e.g., GAN). + work_dir (str, optional): The working directory to save checkpoints + and logs. Defaults to None. + logger (:obj:`logging.Logger`): Logger used during training. + Defaults to None. (The default value is just for backward + compatibility) + meta (dict | None): A dict records some import information such as + environment info and seed, which will be logged in logger hook. + Defaults to None. + max_epochs (int, optional): Total training epochs. + max_iters (int, optional): Total training iterations. + """ + + def __init__(self, + model, + batch_processor=None, + optimizer=None, + work_dir=None, + logger=None, + meta=None, + max_iters=None, + max_epochs=None): + if batch_processor is not None: + if not callable(batch_processor): + raise TypeError('batch_processor must be callable, ' + f'but got {type(batch_processor)}') + warnings.warn('batch_processor is deprecated, please implement ' + 'train_step() and val_step() in the model instead.') + # raise an error is `batch_processor` is not None and + # `model.train_step()` exists. + if is_module_wrapper(model): + _model = model.module + else: + _model = model + if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'): + raise RuntimeError( + 'batch_processor and model.train_step()/model.val_step() ' + 'cannot be both available.') + else: + assert hasattr(model, 'train_step') + + # check the type of `optimizer` + if isinstance(optimizer, dict): + for name, optim in optimizer.items(): + if not isinstance(optim, Optimizer): + raise TypeError( + f'optimizer must be a dict of torch.optim.Optimizers, ' + f'but optimizer["{name}"] is a {type(optim)}') + elif not isinstance(optimizer, Optimizer) and optimizer is not None: + raise TypeError( + f'optimizer must be a torch.optim.Optimizer object ' + f'or dict or None, but got {type(optimizer)}') + + # check the type of `logger` + if not isinstance(logger, logging.Logger): + raise TypeError(f'logger must be a logging.Logger object, ' + f'but got {type(logger)}') + + # check the type of `meta` + if meta is not None and not isinstance(meta, dict): + raise TypeError( + f'meta must be a dict or None, but got {type(meta)}') + + self.model = model + self.batch_processor = batch_processor + self.optimizer = optimizer + self.logger = logger + self.meta = meta + + # create work_dir + if mmcv.is_str(work_dir): + self.work_dir = osp.abspath(work_dir) + mmcv.mkdir_or_exist(self.work_dir) + elif work_dir is None: + self.work_dir = None + else: + raise TypeError('"work_dir" must be a str or None') + + # get model name from the model class + if hasattr(self.model, 'module'): + self._model_name = self.model.module.__class__.__name__ + else: + self._model_name = self.model.__class__.__name__ + + self._rank, self._world_size = get_dist_info() + self.timestamp = get_time_str() + self.mode = None + self._hooks = [] + self._epoch = 0 + self._iter = 0 + self._inner_iter = 0 + + if max_epochs is not None and max_iters is not None: + raise ValueError( + 'Only one of `max_epochs` or `max_iters` can be set.') + + self._max_epochs = max_epochs + self._max_iters = max_iters + # TODO: Redesign LogBuffer, it is not flexible and elegant enough + self.log_buffer = LogBuffer() + + @property + def model_name(self): + """str: Name of the model, usually the module class name.""" + return self._model_name + + @property + def rank(self): + """int: Rank of current process. (distributed training)""" + return self._rank + + @property + def world_size(self): + """int: Number of processes participating in the job. + (distributed training)""" + return self._world_size + + @property + def hooks(self): + """list[:obj:`Hook`]: A list of registered hooks.""" + return self._hooks + + @property + def epoch(self): + """int: Current epoch.""" + return self._epoch + + @property + def iter(self): + """int: Current iteration.""" + return self._iter + + @property + def inner_iter(self): + """int: Iteration in an epoch.""" + return self._inner_iter + + @property + def max_epochs(self): + """int: Maximum training epochs.""" + return self._max_epochs + + @property + def max_iters(self): + """int: Maximum training iterations.""" + return self._max_iters + + @abstractmethod + def train(self): + pass + + @abstractmethod + def val(self): + pass + + @abstractmethod + def run(self, data_loaders, workflow, **kwargs): + pass + + @abstractmethod + def save_checkpoint(self, + out_dir, + filename_tmpl, + save_optimizer=True, + meta=None, + create_symlink=True): + pass + + def current_lr(self): + """Get current learning rates. + + Returns: + list[float] | dict[str, list[float]]: Current learning rates of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + if isinstance(self.optimizer, torch.optim.Optimizer): + lr = [group['lr'] for group in self.optimizer.param_groups] + elif isinstance(self.optimizer, dict): + lr = dict() + for name, optim in self.optimizer.items(): + lr[name] = [group['lr'] for group in optim.param_groups] + else: + raise RuntimeError( + 'lr is not applicable because optimizer does not exist.') + return lr + + def current_momentum(self): + """Get current momentums. + + Returns: + list[float] | dict[str, list[float]]: Current momentums of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + + def _get_momentum(optimizer): + momentums = [] + for group in optimizer.param_groups: + if 'momentum' in group.keys(): + momentums.append(group['momentum']) + elif 'betas' in group.keys(): + momentums.append(group['betas'][0]) + else: + momentums.append(0) + return momentums + + if self.optimizer is None: + raise RuntimeError( + 'momentum is not applicable because optimizer does not exist.') + elif isinstance(self.optimizer, torch.optim.Optimizer): + momentums = _get_momentum(self.optimizer) + elif isinstance(self.optimizer, dict): + momentums = dict() + for name, optim in self.optimizer.items(): + momentums[name] = _get_momentum(optim) + return momentums + + def register_hook(self, hook, priority='NORMAL'): + """Register a hook into the hook list. + + The hook will be inserted into a priority queue, with the specified + priority (See :class:`Priority` for details of priorities). + For hooks with the same priority, they will be triggered in the same + order as they are registered. + + Args: + hook (:obj:`Hook`): The hook to be registered. + priority (int or str or :obj:`Priority`): Hook priority. + Lower value means higher priority. + """ + assert isinstance(hook, Hook) + if hasattr(hook, 'priority'): + raise ValueError('"priority" is a reserved attribute for hooks') + priority = get_priority(priority) + hook.priority = priority + # insert the hook to a sorted list + inserted = False + for i in range(len(self._hooks) - 1, -1, -1): + if priority >= self._hooks[i].priority: + self._hooks.insert(i + 1, hook) + inserted = True + break + if not inserted: + self._hooks.insert(0, hook) + + def register_hook_from_cfg(self, hook_cfg): + """Register a hook from its cfg. + + Args: + hook_cfg (dict): Hook config. It should have at least keys 'type' + and 'priority' indicating its type and priority. + + Notes: + The specific hook class to register should not use 'type' and + 'priority' arguments during initialization. + """ + hook_cfg = hook_cfg.copy() + priority = hook_cfg.pop('priority', 'NORMAL') + hook = mmcv.build_from_cfg(hook_cfg, HOOKS) + self.register_hook(hook, priority=priority) + + def call_hook(self, fn_name): + """Call all hooks. + + Args: + fn_name (str): The function name in each hook to be called, such as + "before_train_epoch". + """ + for hook in self._hooks: + getattr(hook, fn_name)(self) + + def load_checkpoint(self, filename, map_location='cpu', strict=False): + self.logger.info('load checkpoint from %s', filename) + return load_checkpoint(self.model, filename, map_location, strict, + self.logger) + + def resume(self, + checkpoint, + resume_optimizer=True, + map_location='default'): + if map_location == 'default': + if torch.cuda.is_available(): + device_id = torch.cuda.current_device() + checkpoint = self.load_checkpoint( + checkpoint, + map_location=lambda storage, loc: storage.cuda(device_id)) + else: + checkpoint = self.load_checkpoint(checkpoint) + else: + checkpoint = self.load_checkpoint( + checkpoint, map_location=map_location) + + self._epoch = checkpoint['meta']['epoch'] + self._iter = checkpoint['meta']['iter'] + if 'optimizer' in checkpoint and resume_optimizer: + if isinstance(self.optimizer, Optimizer): + self.optimizer.load_state_dict(checkpoint['optimizer']) + elif isinstance(self.optimizer, dict): + for k in self.optimizer.keys(): + self.optimizer[k].load_state_dict( + checkpoint['optimizer'][k]) + else: + raise TypeError( + 'Optimizer should be dict or torch.optim.Optimizer ' + f'but got {type(self.optimizer)}') + + self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) + + def register_lr_hook(self, lr_config): + if isinstance(lr_config, dict): + assert 'policy' in lr_config + policy_type = lr_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of Lr updater. + # Since this is not applicable for ` + # CosineAnnealingLrUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'LrUpdaterHook' + lr_config['type'] = hook_type + hook = mmcv.build_from_cfg(lr_config, HOOKS) + else: + hook = lr_config + self.register_hook(hook) + + def register_momentum_hook(self, momentum_config): + if momentum_config is None: + return + if isinstance(momentum_config, dict): + assert 'policy' in momentum_config + policy_type = momentum_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of momentum updater. + # Since this is not applicable for + # `CosineAnnealingMomentumUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'MomentumUpdaterHook' + momentum_config['type'] = hook_type + hook = mmcv.build_from_cfg(momentum_config, HOOKS) + else: + hook = momentum_config + self.register_hook(hook) + + def register_optimizer_hook(self, optimizer_config): + if optimizer_config is None: + return + if isinstance(optimizer_config, dict): + optimizer_config.setdefault('type', 'OptimizerHook') + hook = mmcv.build_from_cfg(optimizer_config, HOOKS) + else: + hook = optimizer_config + self.register_hook(hook) + + def register_checkpoint_hook(self, checkpoint_config): + if checkpoint_config is None: + return + if isinstance(checkpoint_config, dict): + checkpoint_config.setdefault('type', 'CheckpointHook') + hook = mmcv.build_from_cfg(checkpoint_config, HOOKS) + else: + hook = checkpoint_config + self.register_hook(hook) + + def register_logger_hooks(self, log_config): + if log_config is None: + return + log_interval = log_config['interval'] + for info in log_config['hooks']: + logger_hook = mmcv.build_from_cfg( + info, HOOKS, default_args=dict(interval=log_interval)) + self.register_hook(logger_hook, priority='VERY_LOW') + + def register_training_hooks(self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None): + """Register default hooks for training. + + Default hooks include: + + - LrUpdaterHook + - MomentumUpdaterHook + - OptimizerStepperHook + - CheckpointSaverHook + - IterTimerHook + - LoggerHook(s) + """ + self.register_lr_hook(lr_config) + self.register_momentum_hook(momentum_config) + self.register_optimizer_hook(optimizer_config) + self.register_checkpoint_hook(checkpoint_config) + self.register_hook(IterTimerHook()) + self.register_logger_hooks(log_config) diff --git a/mmcv/mmcv/runner/builder.py b/mmcv/mmcv/runner/builder.py new file mode 100644 index 0000000..e9989b0 --- /dev/null +++ b/mmcv/mmcv/runner/builder.py @@ -0,0 +1,7 @@ +from ..utils import Registry, build_from_cfg + +RUNNERS = Registry('runner') + + +def build_runner(cfg, default_args=None): + return build_from_cfg(cfg, RUNNERS, default_args=default_args) diff --git a/mmcv/mmcv/runner/checkpoint.py b/mmcv/mmcv/runner/checkpoint.py new file mode 100644 index 0000000..ddb9f23 --- /dev/null +++ b/mmcv/mmcv/runner/checkpoint.py @@ -0,0 +1,380 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import os +import os.path as osp +import pkgutil +import time +import warnings +from collections import OrderedDict +from importlib import import_module + +import torch +import torchvision +from torch.optim import Optimizer +from torch.utils import model_zoo + +import mmcv +from ..fileio import load as load_file +from ..parallel import is_module_wrapper +from ..utils import mkdir_or_exist +from .dist_utils import get_dist_info + +ENV_MMCV_HOME = 'MMCV_HOME' +ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' +DEFAULT_CACHE_DIR = '~/.cache' + + +def _get_mmcv_home(): + mmcv_home = os.path.expanduser( + os.getenv( + ENV_MMCV_HOME, + os.path.join( + os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv'))) + + mkdir_or_exist(mmcv_home) + return mmcv_home + + +def load_state_dict(module, state_dict, strict=False, logger=None): + """Load state_dict to a module. + + This method is modified from :meth:`torch.nn.Module.load_state_dict`. + Default value for ``strict`` is set to ``False`` and the message for + param mismatch will be shown even if strict is False. + + Args: + module (Module): Module that receives the state_dict. + state_dict (OrderedDict): Weights. + strict (bool): whether to strictly enforce that the keys + in :attr:`state_dict` match the keys returned by this module's + :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. + logger (:obj:`logging.Logger`, optional): Logger to log the error + message. If not specified, print function will be used. + """ + unexpected_keys = [] + all_missing_keys = [] + err_msg = [] + + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + # use _load_from_state_dict to enable checkpoint version control + def load(module, prefix=''): + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + local_metadata = {} if metadata is None else metadata.get( + prefix[:-1], {}) + module._load_from_state_dict(state_dict, prefix, local_metadata, True, + all_missing_keys, unexpected_keys, + err_msg) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(module) + load = None # break load->load reference cycle + + # ignore "num_batches_tracked" of BN layers + missing_keys = [ + key for key in all_missing_keys if 'num_batches_tracked' not in key + ] + + if unexpected_keys: + err_msg.append('unexpected key in source ' + f'state_dict: {", ".join(unexpected_keys)}\n') + if missing_keys: + err_msg.append( + f'missing keys in source state_dict: {", ".join(missing_keys)}\n') + + rank, _ = get_dist_info() + if len(err_msg) > 0 and rank == 0: + err_msg.insert( + 0, 'The model and loaded state dict do not match exactly\n') + err_msg = '\n'.join(err_msg) + if strict: + raise RuntimeError(err_msg) + elif logger is not None: + logger.warning(err_msg) + else: + print(err_msg) + + +def load_url_dist(url, model_dir=None): + """In distributed setting, this function only download checkpoint at local + rank 0.""" + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + checkpoint = model_zoo.load_url(url, model_dir=model_dir) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + checkpoint = model_zoo.load_url(url, model_dir=model_dir) + return checkpoint + + +def get_torchvision_models(): + model_urls = dict() + for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): + if ispkg: + continue + _zoo = import_module(f'torchvision.models.{name}') + if hasattr(_zoo, 'model_urls'): + _urls = getattr(_zoo, 'model_urls') + model_urls.update(_urls) + return model_urls + + +def get_external_models(): + mmcv_home = _get_mmcv_home() + default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') + default_urls = load_file(default_json_path) + assert isinstance(default_urls, dict) + external_json_path = osp.join(mmcv_home, 'open_mmlab.json') + if osp.exists(external_json_path): + external_urls = load_file(external_json_path) + assert isinstance(external_urls, dict) + default_urls.update(external_urls) + + return default_urls + + +def get_mmcls_models(): + mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') + mmcls_urls = load_file(mmcls_json_path) + + return mmcls_urls + + +def get_deprecated_model_names(): + deprecate_json_path = osp.join(mmcv.__path__[0], + 'model_zoo/deprecated.json') + deprecate_urls = load_file(deprecate_json_path) + assert isinstance(deprecate_urls, dict) + + return deprecate_urls + + +def _process_mmcls_checkpoint(checkpoint): + state_dict = checkpoint['state_dict'] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith('backbone.'): + new_state_dict[k[9:]] = v + new_checkpoint = dict(state_dict=new_state_dict) + + return new_checkpoint + + +def _load_checkpoint(filename, map_location=None): + """Load checkpoint from somewhere (modelzoo, file, url). + + Args: + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str | None): Same as :func:`torch.load`. Default: None. + + Returns: + dict | OrderedDict: The loaded checkpoint. It can be either an + OrderedDict storing model weights or a dict containing other + information, which depends on the checkpoint. + """ + if filename.startswith('modelzoo://'): + warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' + 'use "torchvision://" instead') + model_urls = get_torchvision_models() + model_name = filename[11:] + checkpoint = load_url_dist(model_urls[model_name]) + elif filename.startswith('torchvision://'): + model_urls = get_torchvision_models() + model_name = filename[14:] + checkpoint = load_url_dist(model_urls[model_name]) + elif filename.startswith('open-mmlab://'): + model_urls = get_external_models() + model_name = filename[13:] + deprecated_urls = get_deprecated_model_names() + if model_name in deprecated_urls: + warnings.warn(f'open-mmlab://{model_name} is deprecated in favor ' + f'of open-mmlab://{deprecated_urls[model_name]}') + model_name = deprecated_urls[model_name] + model_url = model_urls[model_name] + # check if is url + if model_url.startswith(('http://', 'https://')): + checkpoint = load_url_dist(model_url) + else: + filename = osp.join(_get_mmcv_home(), model_url) + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + elif filename.startswith('mmcls://'): + model_urls = get_mmcls_models() + model_name = filename[8:] + checkpoint = load_url_dist(model_urls[model_name]) + checkpoint = _process_mmcls_checkpoint(checkpoint) + elif filename.startswith(('http://', 'https://')): + checkpoint = load_url_dist(filename) + else: + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +def load_checkpoint(model, + filename, + map_location=None, + strict=False, + logger=None): + """Load checkpoint from a file or URI. + + Args: + model (Module): Module to load checkpoint. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str): Same as :func:`torch.load`. + strict (bool): Whether to allow different params for the model and + checkpoint. + logger (:mod:`logging.Logger` or None): The logger for error message. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + checkpoint = _load_checkpoint(filename, map_location) + # OrderedDict is a subclass of dict + if not isinstance(checkpoint, dict): + raise RuntimeError( + f'No state_dict found in checkpoint file {filename}') + # get state_dict from checkpoint + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + # strip prefix of state_dict + if list(state_dict.keys())[0].startswith('module.'): + state_dict = {k[7:]: v for k, v in checkpoint['state_dict'].items()} + # load state_dict + load_state_dict(model, state_dict, strict, logger) + return checkpoint + + +def weights_to_cpu(state_dict): + """Copy a model state_dict to cpu. + + Args: + state_dict (OrderedDict): Model weights on GPU. + + Returns: + OrderedDict: Model weights on GPU. + """ + state_dict_cpu = OrderedDict() + for key, val in state_dict.items(): + state_dict_cpu[key] = val.cpu() + return state_dict_cpu + + +def _save_to_state_dict(module, destination, prefix, keep_vars): + """Saves module state to `destination` dictionary. + + This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. + + Args: + module (nn.Module): The module to generate state_dict. + destination (dict): A dict where state will be stored. + prefix (str): The prefix for parameters and buffers used in this + module. + """ + for name, param in module._parameters.items(): + if param is not None: + destination[prefix + name] = param if keep_vars else param.detach() + for name, buf in module._buffers.items(): + # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d + if buf is not None: + destination[prefix + name] = buf if keep_vars else buf.detach() + + +def get_state_dict(module, destination=None, prefix='', keep_vars=False): + """Returns a dictionary containing a whole state of the module. + + Both parameters and persistent buffers (e.g. running averages) are + included. Keys are corresponding parameter and buffer names. + + This method is modified from :meth:`torch.nn.Module.state_dict` to + recursively check parallel module in case that the model has a complicated + structure, e.g., nn.Module(nn.Module(DDP)). + + Args: + module (nn.Module): The module to generate state_dict. + destination (OrderedDict): Returned dict for the state of the + module. + prefix (str): Prefix of the key. + keep_vars (bool): Whether to keep the variable property of the + parameters. Default: False. + + Returns: + dict: A dictionary containing a whole state of the module. + """ + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + + # below is the same as torch.nn.Module.state_dict() + if destination is None: + destination = OrderedDict() + destination._metadata = OrderedDict() + destination._metadata[prefix[:-1]] = local_metadata = dict( + version=module._version) + _save_to_state_dict(module, destination, prefix, keep_vars) + for name, child in module._modules.items(): + if child is not None: + get_state_dict( + child, destination, prefix + name + '.', keep_vars=keep_vars) + for hook in module._state_dict_hooks.values(): + hook_result = hook(module, destination, prefix, local_metadata) + if hook_result is not None: + destination = hook_result + return destination + + +def save_checkpoint(model, filename, optimizer=None, meta=None): + """Save checkpoint to file. + + The checkpoint will have 3 fields: ``meta``, ``state_dict`` and + ``optimizer``. By default ``meta`` will contain version and time info. + + Args: + model (Module): Module whose params are to be saved. + filename (str): Checkpoint filename. + optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. + meta (dict, optional): Metadata to be saved in checkpoint. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError(f'meta must be a dict or None, but got {type(meta)}') + meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) + + mmcv.mkdir_or_exist(osp.dirname(filename)) + if is_module_wrapper(model): + model = model.module + + checkpoint = { + 'meta': meta, + 'state_dict': weights_to_cpu(get_state_dict(model)) + } + # save optimizer state dict in the checkpoint + if isinstance(optimizer, Optimizer): + checkpoint['optimizer'] = optimizer.state_dict() + elif isinstance(optimizer, dict): + checkpoint['optimizer'] = {} + for name, optim in optimizer.items(): + checkpoint['optimizer'][name] = optim.state_dict() + # immediately flush buffer + with open(filename, 'wb') as f: + torch.save(checkpoint, f) + f.flush() diff --git a/mmcv/mmcv/runner/dist_utils.py b/mmcv/mmcv/runner/dist_utils.py new file mode 100644 index 0000000..1e428ca --- /dev/null +++ b/mmcv/mmcv/runner/dist_utils.py @@ -0,0 +1,167 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import functools +import os +import subprocess +from collections import OrderedDict + +import torch +import torch.multiprocessing as mp +from torch import distributed as dist +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + +from mmcv.utils import TORCH_VERSION + + +def init_dist(launcher, backend='nccl', **kwargs): + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + if launcher == 'pytorch': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'mpi': + _init_dist_mpi(backend, **kwargs) + elif launcher == 'slurm': + _init_dist_slurm(backend, **kwargs) + else: + raise ValueError(f'Invalid launcher type: {launcher}') + + +def _init_dist_pytorch(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_mpi(backend, **kwargs): + raise NotImplementedError + + +def _init_dist_slurm(backend, port=None): + """Initialize slurm distributed training environment. + + If argument ``port`` is not specified, then the master port will be system + environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system + environment variable, then a default port ``29500`` will be used. + + Args: + backend (str): Backend of torch.distributed. + port (int, optional): Master port. Defaults to None. + """ + proc_id = int(os.environ['SLURM_PROCID']) + ntasks = int(os.environ['SLURM_NTASKS']) + node_list = os.environ['SLURM_NODELIST'] + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(proc_id % num_gpus) + addr = subprocess.getoutput( + f'scontrol show hostname {node_list} | head -n1') + # specify master port + if port is not None: + os.environ['MASTER_PORT'] = str(port) + elif 'MASTER_PORT' in os.environ: + pass # use MASTER_PORT in the environment variable + else: + # 29500 is torch.distributed default port + os.environ['MASTER_PORT'] = '29500' + os.environ['MASTER_ADDR'] = addr + os.environ['WORLD_SIZE'] = str(ntasks) + os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) + os.environ['RANK'] = str(proc_id) + dist.init_process_group(backend=backend) + + +def get_dist_info(): + if TORCH_VERSION < '1.0': + initialized = dist._initialized + else: + if dist.is_available(): + initialized = dist.is_initialized() + else: + initialized = False + if initialized: + rank = dist.get_rank() + world_size = dist.get_world_size() + else: + rank = 0 + world_size = 1 + return rank, world_size + + +def master_only(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + rank, _ = get_dist_info() + if rank == 0: + return func(*args, **kwargs) + + return wrapper + + +def allreduce_params(params, coalesce=True, bucket_size_mb=-1): + """Allreduce parameters. + + Args: + params (list[torch.Parameters]): List of parameters or buffers of a + model. + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + _, world_size = get_dist_info() + if world_size == 1: + return + params = [param.data for param in params] + if coalesce: + _allreduce_coalesced(params, world_size, bucket_size_mb) + else: + for tensor in params: + dist.all_reduce(tensor.div_(world_size)) + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + """Allreduce gradients. + + Args: + params (list[torch.Parameters]): List of parameters of a model + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + grads = [ + param.grad.data for param in params + if param.requires_grad and param.grad is not None + ] + _, world_size = get_dist_info() + if world_size == 1: + return + if coalesce: + _allreduce_coalesced(grads, world_size, bucket_size_mb) + else: + for tensor in grads: + dist.all_reduce(tensor.div_(world_size)) + + +def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): + if bucket_size_mb > 0: + bucket_size_bytes = bucket_size_mb * 1024 * 1024 + buckets = _take_tensors(tensors, bucket_size_bytes) + else: + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + buckets = buckets.values() + + for bucket in buckets: + flat_tensors = _flatten_dense_tensors(bucket) + dist.all_reduce(flat_tensors) + flat_tensors.div_(world_size) + for tensor, synced in zip( + bucket, _unflatten_dense_tensors(flat_tensors, bucket)): + tensor.copy_(synced) diff --git a/mmcv/mmcv/runner/epoch_based_runner.py b/mmcv/mmcv/runner/epoch_based_runner.py new file mode 100644 index 0000000..2cd12d1 --- /dev/null +++ b/mmcv/mmcv/runner/epoch_based_runner.py @@ -0,0 +1,182 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import os.path as osp +import platform +import shutil +import time +import warnings + +import torch + +import mmcv +from .base_runner import BaseRunner +from .builder import RUNNERS +from .checkpoint import save_checkpoint +from .utils import get_host_info + + +@RUNNERS.register_module() +class EpochBasedRunner(BaseRunner): + """Epoch-based Runner. + + This runner train models epoch by epoch. + """ + + def run_iter(self, data_batch, train_mode, **kwargs): + if self.batch_processor is not None: + outputs = self.batch_processor( + self.model, data_batch, train_mode=train_mode, **kwargs) + elif train_mode: + outputs = self.model.train_step(data_batch, self.optimizer, + **kwargs) + else: + outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('"batch_processor()" or "model.train_step()"' + 'and "model.val_step()" must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + + def train(self, data_loader, **kwargs): + self.model.train() + self.mode = 'train' + self.data_loader = data_loader + self._max_iters = self._max_epochs * len(self.data_loader) + self.call_hook('before_train_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + self.call_hook('before_train_iter') + self.run_iter(data_batch, train_mode=True) + self.call_hook('after_train_iter') + self._iter += 1 + + self.call_hook('after_train_epoch') + self._epoch += 1 + + def val(self, data_loader, **kwargs): + self.model.eval() + self.mode = 'val' + self.data_loader = data_loader + self.call_hook('before_val_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + self.call_hook('before_val_iter') + with torch.no_grad(): + self.run_iter(data_batch, train_mode=False) + self.call_hook('after_val_iter') + + self.call_hook('after_val_epoch') + + def run(self, data_loaders, workflow, max_epochs=None, **kwargs): + """Start running. + + Args: + data_loaders (list[:obj:`DataLoader`]): Dataloaders for training + and validation. + workflow (list[tuple]): A list of (phase, epochs) to specify the + running order and epochs. E.g, [('train', 2), ('val', 1)] means + running 2 epochs for training and 1 epoch for validation, + iteratively. + """ + assert isinstance(data_loaders, list) + assert mmcv.is_list_of(workflow, tuple) + assert len(data_loaders) == len(workflow) + if max_epochs is not None: + warnings.warn( + 'setting max_epochs in run is deprecated, ' + 'please set max_epochs in runner_config', DeprecationWarning) + self._max_epochs = max_epochs + + assert self._max_epochs is not None, ( + 'max_epochs must be specified during instantiation') + + for i, flow in enumerate(workflow): + mode, epochs = flow + if mode == 'train': + self._max_iters = self._max_epochs * len(data_loaders[i]) + break + + work_dir = self.work_dir if self.work_dir is not None else 'NONE' + self.logger.info('Start running, host: %s, work_dir: %s', + get_host_info(), work_dir) + self.logger.info('workflow: %s, max: %d epochs', workflow, + self._max_epochs) + self.call_hook('before_run') + + while self.epoch < self._max_epochs: + for i, flow in enumerate(workflow): + mode, epochs = flow + if isinstance(mode, str): # self.train() + if not hasattr(self, mode): + raise ValueError( + f'runner has no method named "{mode}" to run an ' + 'epoch') + epoch_runner = getattr(self, mode) + else: + raise TypeError( + 'mode in workflow must be a str, but got {}'.format( + type(mode))) + + for _ in range(epochs): + if mode == 'train' and self.epoch >= self._max_epochs: + break + epoch_runner(data_loaders[i], **kwargs) + + time.sleep(1) # wait for some hooks like loggers to finish + self.call_hook('after_run') + + def save_checkpoint(self, + out_dir, + filename_tmpl='epoch_{}.pth', + save_optimizer=True, + meta=None, + create_symlink=True): + """Save the checkpoint. + + Args: + out_dir (str): The directory that checkpoints are saved. + filename_tmpl (str, optional): The checkpoint filename template, + which contains a placeholder for the epoch number. + Defaults to 'epoch_{}.pth'. + save_optimizer (bool, optional): Whether to save the optimizer to + the checkpoint. Defaults to True. + meta (dict, optional): The meta information to be saved in the + checkpoint. Defaults to None. + create_symlink (bool, optional): Whether to create a symlink + "latest.pth" to point to the latest checkpoint. + Defaults to True. + """ + if meta is None: + meta = dict(epoch=self.epoch + 1, iter=self.iter) + elif isinstance(meta, dict): + meta.update(epoch=self.epoch + 1, iter=self.iter) + else: + raise TypeError( + f'meta should be a dict or None, but got {type(meta)}') + if self.meta is not None: + meta.update(self.meta) + + filename = filename_tmpl.format(self.epoch + 1) + filepath = osp.join(out_dir, filename) + optimizer = self.optimizer if save_optimizer else None + save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) + # in some environments, `os.symlink` is not supported, you may need to + # set `create_symlink` to False + if create_symlink: + dst_file = osp.join(out_dir, 'latest.pth') + if platform.system() != 'Windows': + mmcv.symlink(filename, dst_file) + else: + shutil.copy(filepath, dst_file) + + +@RUNNERS.register_module() +class Runner(EpochBasedRunner): + """Deprecated name of EpochBasedRunner.""" + + def __init__(self, *args, **kwargs): + warnings.warn( + 'Runner was deprecated, please use EpochBasedRunner instead') + super().__init__(*args, **kwargs) diff --git a/mmcv/mmcv/runner/fp16_utils.py b/mmcv/mmcv/runner/fp16_utils.py new file mode 100644 index 0000000..b9ab9a1 --- /dev/null +++ b/mmcv/mmcv/runner/fp16_utils.py @@ -0,0 +1,350 @@ +import functools +import warnings +from collections import abc +from inspect import getfullargspec + +import numpy as np +import torch +import torch.nn as nn + +from .dist_utils import allreduce_grads as _allreduce_grads + + +def cast_tensor_type(inputs, src_type, dst_type): + """Recursively convert Tensor in inputs from src_type to dst_type. + + Args: + inputs: Inputs that to be casted. + src_type (torch.dtype): Source type.. + dst_type (torch.dtype): Destination type. + + Returns: + The same type with inputs, but all contained Tensors have been cast. + """ + if isinstance(inputs, torch.Tensor): + return inputs.to(dst_type) + elif isinstance(inputs, str): + return inputs + elif isinstance(inputs, np.ndarray): + return inputs + elif isinstance(inputs, abc.Mapping): + return type(inputs)({ + k: cast_tensor_type(v, src_type, dst_type) + for k, v in inputs.items() + }) + elif isinstance(inputs, abc.Iterable): + return type(inputs)( + cast_tensor_type(item, src_type, dst_type) for item in inputs) + else: + return inputs + + +def auto_fp16(apply_to=None, out_fp32=False): + """Decorator to enable fp16 training automatically. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If inputs arguments are fp32 tensors, they will + be converted to fp16 automatically. Arguments other than fp32 tensors are + ignored. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp32 (bool): Whether to convert the output back to fp32. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp16 + >>> @auto_fp16() + >>> def forward(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp16 + >>> @auto_fp16(apply_to=('pred', )) + >>> def do_something(self, pred, others): + >>> pass + """ + + def auto_fp16_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@auto_fp16 can only be used to decorate the ' + 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + # NOTE: default args are not taken into consideration + if args: + arg_names = args_info.args[:len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append( + cast_tensor_type(args[i], torch.float, torch.half)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = {} + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type( + arg_value, torch.float, torch.half) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp32: + output = cast_tensor_type(output, torch.half, torch.float) + return output + + return new_func + + return auto_fp16_wrapper + + +def force_fp32(apply_to=None, out_fp16=False): + """Decorator to convert input arguments to fp32 in force. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If there are some inputs that must be processed + in fp32 mode, then this decorator can handle it. If inputs arguments are + fp16 tensors, they will be converted to fp32 automatically. Arguments other + than fp16 tensors are ignored. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp16 (bool): Whether to convert the output back to fp16. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp32 + >>> @force_fp32() + >>> def loss(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp32 + >>> @force_fp32(apply_to=('pred', )) + >>> def post_process(self, pred, others): + >>> pass + """ + + def force_fp32_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@force_fp32 can only be used to decorate the ' + 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + if args: + arg_names = args_info.args[:len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append( + cast_tensor_type(args[i], torch.half, torch.float)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = dict() + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type( + arg_value, torch.half, torch.float) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp16: + output = cast_tensor_type(output, torch.float, torch.half) + return output + + return new_func + + return force_fp32_wrapper + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + warnings.warning( + '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be ' + 'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads') + _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb) + + +def wrap_fp16_model(model): + """Wrap the FP32 model to FP16. + + 1. Convert FP32 model to FP16. + 2. Remain some necessary layers to be FP32, e.g., normalization layers. + + Args: + model (nn.Module): Model in FP32. + """ + # convert model to fp16 + model.half() + # patch the normalization layers to make it work in fp32 mode + patch_norm_fp32(model) + # set `fp16_enabled` flag + for m in model.modules(): + if hasattr(m, 'fp16_enabled'): + m.fp16_enabled = True + + +def patch_norm_fp32(module): + """Recursively convert normalization layers from FP16 to FP32. + + Args: + module (nn.Module): The modules to be converted in FP16. + + Returns: + nn.Module: The converted module, the normalization layers have been + converted to FP32. + """ + if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)): + module.float() + if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3': + module.forward = patch_forward_method(module.forward, torch.half, + torch.float) + for child in module.children(): + patch_norm_fp32(child) + return module + + +def patch_forward_method(func, src_type, dst_type, convert_output=True): + """Patch the forward method of a module. + + Args: + func (callable): The original forward method. + src_type (torch.dtype): Type of input arguments to be converted from. + dst_type (torch.dtype): Type of input arguments to be converted to. + convert_output (bool): Whether to convert the output back to src_type. + + Returns: + callable: The patched forward method. + """ + + def new_forward(*args, **kwargs): + output = func(*cast_tensor_type(args, src_type, dst_type), + **cast_tensor_type(kwargs, src_type, dst_type)) + if convert_output: + output = cast_tensor_type(output, dst_type, src_type) + return output + + return new_forward + + +class LossScaler: + """Class that manages loss scaling in mixed precision training which + supports both dynamic or static mode. + + The implementation refers to + https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py. + Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling. + It's important to understand how :class:`LossScaler` operates. + Loss scaling is designed to combat the problem of underflowing + gradients encountered at long times when training fp16 networks. + Dynamic loss scaling begins by attempting a very high loss + scale. Ironically, this may result in OVERflowing gradients. + If overflowing gradients are encountered, :class:`FP16_Optimizer` then + skips the update step for this particular iteration/minibatch, + and :class:`LossScaler` adjusts the loss scale to a lower value. + If a certain number of iterations occur without overflowing gradients + detected,:class:`LossScaler` increases the loss scale once more. + In this way :class:`LossScaler` attempts to "ride the edge" of always + using the highest loss scale possible without incurring overflow. + + Args: + init_scale (float): Initial loss scale value, default: 2**32. + scale_factor (float): Factor used when adjusting the loss scale. + Default: 2. + mode (str): Loss scaling mode. 'dynamic' or 'static' + scale_window (int): Number of consecutive iterations without an + overflow to wait before increasing the loss scale. Default: 1000. + """ + + def __init__(self, + init_scale=2**32, + mode='dynamic', + scale_factor=2., + scale_window=1000): + self.cur_scale = init_scale + self.cur_iter = 0 + assert mode in ('dynamic', + 'static'), 'mode can only be dynamic or static' + self.mode = mode + self.last_overflow_iter = -1 + self.scale_factor = scale_factor + self.scale_window = scale_window + + def has_overflow(self, params): + """Check if params contain overflow.""" + if self.mode != 'dynamic': + return False + for p in params: + if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data): + return True + return False + + def _has_inf_or_nan(x): + """Check if params contain NaN.""" + try: + cpu_sum = float(x.float().sum()) + except RuntimeError as instance: + if 'value cannot be converted' not in instance.args[0]: + raise + return True + else: + if cpu_sum == float('inf') or cpu_sum == -float('inf') \ + or cpu_sum != cpu_sum: + return True + return False + + def update_scale(self, overflow): + """update the current loss scale value when overflow happens.""" + if self.mode != 'dynamic': + return + if overflow: + self.cur_scale = max(self.cur_scale / self.scale_factor, 1) + self.last_overflow_iter = self.cur_iter + else: + if (self.cur_iter - self.last_overflow_iter) % \ + self.scale_window == 0: + self.cur_scale *= self.scale_factor + self.cur_iter += 1 + + @property + def loss_scale(self): + return self.cur_scale diff --git a/mmcv/mmcv/runner/hooks/__init__.py b/mmcv/mmcv/runner/hooks/__init__.py new file mode 100644 index 0000000..c2d5a95 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .checkpoint import CheckpointHook +from .closure import ClosureHook +from .ema import EMAHook +from .hook import HOOKS, Hook +from .iter_timer import IterTimerHook +from .logger import (LoggerHook, MlflowLoggerHook, PaviLoggerHook, + TensorboardLoggerHook, TextLoggerHook, WandbLoggerHook) +from .lr_updater import LrUpdaterHook +from .memory import EmptyCacheHook +from .momentum_updater import MomentumUpdaterHook +from .optimizer import Fp16OptimizerHook, OptimizerHook +from .sampler_seed import DistSamplerSeedHook +from .sync_buffer import SyncBuffersHook + +__all__ = [ + 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', + 'OptimizerHook', 'Fp16OptimizerHook', 'IterTimerHook', + 'DistSamplerSeedHook', 'EmptyCacheHook', 'LoggerHook', 'MlflowLoggerHook', + 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', + 'WandbLoggerHook', 'MomentumUpdaterHook', 'SyncBuffersHook', 'EMAHook' +] diff --git a/mmcv/mmcv/runner/hooks/checkpoint.py b/mmcv/mmcv/runner/hooks/checkpoint.py new file mode 100644 index 0000000..1912cf6 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/checkpoint.py @@ -0,0 +1,101 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import os + +from ..dist_utils import allreduce_params, master_only +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class CheckpointHook(Hook): + """Save checkpoints periodically. + + Args: + interval (int): The saving period. If ``by_epoch=True``, interval + indicates epochs, otherwise it indicates iterations. + Default: -1, which means "never". + by_epoch (bool): Saving checkpoints by epoch or by iteration. + Default: True. + save_optimizer (bool): Whether to save optimizer state_dict in the + checkpoint. It is usually used for resuming experiments. + Default: True. + out_dir (str, optional): The directory to save checkpoints. If not + specified, ``runner.work_dir`` will be used by default. + max_keep_ckpts (int, optional): The maximum checkpoints to keep. + In some cases we want only the latest few checkpoints and would + like to delete old ones to save the disk space. + Default: -1, which means unlimited. + sync_buffer (bool): Whether to synchronize buffers in different + gpus. Default: False. + """ + + def __init__(self, + interval=-1, + by_epoch=True, + save_optimizer=True, + out_dir=None, + max_keep_ckpts=-1, + sync_buffer=False, + **kwargs): + self.interval = interval + self.by_epoch = by_epoch + self.save_optimizer = save_optimizer + self.out_dir = out_dir + self.max_keep_ckpts = max_keep_ckpts + self.args = kwargs + self.sync_buffer = sync_buffer + + def after_train_epoch(self, runner): + if not self.by_epoch or not self.every_n_epochs(runner, self.interval): + return + + runner.logger.info(f'Saving checkpoint at {runner.epoch + 1} epochs') + if self.sync_buffer: + allreduce_params(runner.model.buffers()) + self._save_checkpoint(runner) + + @master_only + def _save_checkpoint(self, runner): + """Save the current checkpoint and delete unwanted checkpoint.""" + if not self.out_dir: + self.out_dir = runner.work_dir + runner.save_checkpoint( + self.out_dir, save_optimizer=self.save_optimizer, **self.args) + if runner.meta is not None: + if self.by_epoch: + cur_ckpt_filename = self.args.get( + 'filename_tmpl', 'epoch_{}.pth').format(runner.epoch + 1) + else: + cur_ckpt_filename = self.args.get( + 'filename_tmpl', 'iter_{}.pth').format(runner.iter + 1) + runner.meta.setdefault('hook_msgs', dict()) + runner.meta['hook_msgs']['last_ckpt'] = os.path.join( + self.out_dir, cur_ckpt_filename) + # remove other checkpoints + if self.max_keep_ckpts > 0: + if self.by_epoch: + name = 'epoch_{}.pth' + current_ckpt = runner.epoch + 1 + else: + name = 'iter_{}.pth' + current_ckpt = runner.iter + 1 + redundant_ckpts = range( + current_ckpt - self.max_keep_ckpts * self.interval, 0, + -self.interval) + filename_tmpl = self.args.get('filename_tmpl', name) + for _step in redundant_ckpts: + ckpt_path = os.path.join(self.out_dir, + filename_tmpl.format(_step)) + if os.path.exists(ckpt_path): + os.remove(ckpt_path) + else: + break + + def after_train_iter(self, runner): + if self.by_epoch or not self.every_n_iters(runner, self.interval): + return + + runner.logger.info( + f'Saving checkpoint at {runner.iter + 1} iterations') + if self.sync_buffer: + allreduce_params(runner.model.buffers()) + self._save_checkpoint(runner) diff --git a/mmcv/mmcv/runner/hooks/closure.py b/mmcv/mmcv/runner/hooks/closure.py new file mode 100644 index 0000000..ea531f6 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/closure.py @@ -0,0 +1,11 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class ClosureHook(Hook): + + def __init__(self, fn_name, fn): + assert hasattr(self, fn_name) + assert callable(fn) + setattr(self, fn_name, fn) diff --git a/mmcv/mmcv/runner/hooks/ema.py b/mmcv/mmcv/runner/hooks/ema.py new file mode 100644 index 0000000..d5fe738 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/ema.py @@ -0,0 +1,88 @@ +from ...parallel import is_module_wrapper +from ..hooks.hook import HOOKS, Hook + + +@HOOKS.register_module() +class EMAHook(Hook): + r"""Exponential Moving Average Hook. + + Use Exponential Moving Average on all parameters of model in training + process. All parameters have a ema backup, which update by the formula + as below. EMAHook takes priority over EvalHook and CheckpointSaverHook. + + .. math:: + + \text{Xema_{t+1}} = (1 - \text{momentum}) \times + \text{Xema_{t}} + \text{momentum} \times X_t + + Args: + momentum (float): The momentum used for updating ema parameter. + Defaults to 0.0002. + interval (int): Update ema parameter every interval iteration. + Defaults to 1. + warm_up (int): During first warm_up steps, we may use smaller momentum + to update ema parameters more slowly. Defaults to 100. + resume_from (str): The checkpoint path. Defaults to None. + """ + + def __init__(self, + momentum=0.0002, + interval=1, + warm_up=100, + resume_from=None): + assert isinstance(interval, int) and interval > 0 + self.warm_up = warm_up + self.interval = interval + assert momentum > 0 and momentum < 1 + self.momentum = momentum**interval + self.checkpoint = resume_from + + def before_run(self, runner): + """To resume model with it's ema parameters more friendly. + + Register ema parameter as ``named_buffer`` to model + """ + model = runner.model + if is_module_wrapper(model): + model = model.module + self.param_ema_buffer = {} + self.model_parameters = dict(model.named_parameters(recurse=True)) + for name, value in self.model_parameters.items(): + # "." is not allowed in module's buffer name + buffer_name = f"ema_{name.replace('.', '_')}" + self.param_ema_buffer[name] = buffer_name + model.register_buffer(buffer_name, value.data.clone()) + self.model_buffers = dict(model.named_buffers(recurse=True)) + if self.checkpoint is not None: + runner.resume(self.checkpoint) + + def after_train_iter(self, runner): + """Update ema parameter every self.interval iterations.""" + curr_step = runner.iter + # We warm up the momentum considering the instability at beginning + momentum = min(self.momentum, + (1 + curr_step) / (self.warm_up + curr_step)) + if curr_step % self.interval != 0: + return + for name, parameter in self.model_parameters.items(): + buffer_name = self.param_ema_buffer[name] + buffer_parameter = self.model_buffers[buffer_name] + buffer_parameter.mul_(1 - momentum).add_(momentum, parameter.data) + + def after_train_epoch(self, runner): + """We load parameter values from ema backup to model before the + EvalHook.""" + self._swap_ema_parameters() + + def before_train_epoch(self, runner): + """We recover model's parameter from ema backup after last epoch's + EvalHook.""" + self._swap_ema_parameters() + + def _swap_ema_parameters(self): + """Swap the parameter of model with parameter in ema_buffer.""" + for name, value in self.model_parameters.items(): + temp = value.data.clone() + ema_buffer = self.model_buffers[self.param_ema_buffer[name]] + value.data.copy_(ema_buffer.data) + ema_buffer.data.copy_(temp) diff --git a/mmcv/mmcv/runner/hooks/hook.py b/mmcv/mmcv/runner/hooks/hook.py new file mode 100644 index 0000000..296ffde --- /dev/null +++ b/mmcv/mmcv/runner/hooks/hook.py @@ -0,0 +1,61 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from mmcv.utils import Registry + +HOOKS = Registry('hook') + + +class Hook: + + def before_run(self, runner): + pass + + def after_run(self, runner): + pass + + def before_epoch(self, runner): + pass + + def after_epoch(self, runner): + pass + + def before_iter(self, runner): + pass + + def after_iter(self, runner): + pass + + def before_train_epoch(self, runner): + self.before_epoch(runner) + + def before_val_epoch(self, runner): + self.before_epoch(runner) + + def after_train_epoch(self, runner): + self.after_epoch(runner) + + def after_val_epoch(self, runner): + self.after_epoch(runner) + + def before_train_iter(self, runner): + self.before_iter(runner) + + def before_val_iter(self, runner): + self.before_iter(runner) + + def after_train_iter(self, runner): + self.after_iter(runner) + + def after_val_iter(self, runner): + self.after_iter(runner) + + def every_n_epochs(self, runner, n): + return (runner.epoch + 1) % n == 0 if n > 0 else False + + def every_n_inner_iters(self, runner, n): + return (runner.inner_iter + 1) % n == 0 if n > 0 else False + + def every_n_iters(self, runner, n): + return (runner.iter + 1) % n == 0 if n > 0 else False + + def end_of_epoch(self, runner): + return runner.inner_iter + 1 == len(runner.data_loader) diff --git a/mmcv/mmcv/runner/hooks/iter_timer.py b/mmcv/mmcv/runner/hooks/iter_timer.py new file mode 100644 index 0000000..fd0ed91 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/iter_timer.py @@ -0,0 +1,18 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import time + +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class IterTimerHook(Hook): + + def before_epoch(self, runner): + self.t = time.time() + + def before_iter(self, runner): + runner.log_buffer.update({'data_time': time.time() - self.t}) + + def after_iter(self, runner): + runner.log_buffer.update({'time': time.time() - self.t}) + self.t = time.time() diff --git a/mmcv/mmcv/runner/hooks/logger/__init__.py b/mmcv/mmcv/runner/hooks/logger/__init__.py new file mode 100644 index 0000000..8fe4d81 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/logger/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .base import LoggerHook +from .mlflow import MlflowLoggerHook +from .pavi import PaviLoggerHook +from .tensorboard import TensorboardLoggerHook +from .text import TextLoggerHook +from .wandb import WandbLoggerHook + +__all__ = [ + 'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook', + 'TensorboardLoggerHook', 'TextLoggerHook', 'WandbLoggerHook' +] diff --git a/mmcv/mmcv/runner/hooks/logger/base.py b/mmcv/mmcv/runner/hooks/logger/base.py new file mode 100644 index 0000000..b22bae7 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/logger/base.py @@ -0,0 +1,173 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import numbers +from abc import ABCMeta, abstractmethod + +import numpy as np +import torch + +from ..hook import Hook + + +class LoggerHook(Hook): + """Base class for logger hooks. + + Args: + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging. + by_epoch (bool): Whether EpochBasedRunner is used. + """ + + __metaclass__ = ABCMeta + + def __init__(self, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True): + self.interval = interval + self.ignore_last = ignore_last + self.reset_flag = reset_flag + self.by_epoch = by_epoch + + @abstractmethod + def log(self, runner): + pass + + @staticmethod + def is_scalar(val, include_np=True, include_torch=True): + """Tell the input variable is a scalar or not. + + Args: + val: Input variable. + include_np (bool): Whether include 0-d np.ndarray as a scalar. + include_torch (bool): Whether include 0-d torch.Tensor as a scalar. + + Returns: + bool: True or False. + """ + if isinstance(val, numbers.Number): + return True + elif include_np and isinstance(val, np.ndarray) and val.ndim == 0: + return True + elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1: + return True + else: + return False + + def get_mode(self, runner): + if runner.mode == 'train': + if 'time' in runner.log_buffer.output: + mode = 'train' + else: + mode = 'val' + elif runner.mode == 'val': + mode = 'val' + else: + raise ValueError(f"runner mode should be 'train' or 'val', " + f'but got {runner.mode}') + return mode + + def get_epoch(self, runner): + if runner.mode == 'train': + epoch = runner.epoch + 1 + elif runner.mode == 'val': + # normal val mode + # runner.epoch += 1 has been done before val workflow + epoch = runner.epoch + else: + raise ValueError(f"runner mode should be 'train' or 'val', " + f'but got {runner.mode}') + return epoch + + def get_iter(self, runner, inner_iter=False): + """Get the current training iteration step.""" + if self.by_epoch and inner_iter: + current_iter = runner.inner_iter + 1 + else: + current_iter = runner.iter + 1 + return current_iter + + def get_step(self, runner): + """Get the total training step/epoch.""" + if self.get_mode(runner) == 'val' and self.by_epoch: + return self.get_epoch(runner) + else: + return self.get_iter(runner) + + def get_lr_tags(self, runner): + tags = {} + lrs = runner.current_lr() + if isinstance(lrs, dict): + for name, value in lrs.items(): + tags[f'learning_rate/{name}'] = value[0] + else: + tags['learning_rate'] = lrs[0] + return tags + + def get_momentum_tags(self, runner): + tags = {} + momentums = runner.current_momentum() + if isinstance(momentums, dict): + for name, value in momentums.items(): + tags[f'momentum/{name}'] = value[0] + else: + tags['momentum'] = momentums[0] + return tags + + def get_loggable_tags(self, + runner, + allow_scalar=True, + allow_text=False, + add_mode=True, + tags_to_skip=('time', 'data_time')): + tags = {} + for var, val in runner.log_buffer.output.items(): + if var in tags_to_skip: + continue + if self.is_scalar(val) and not allow_scalar: + continue + if isinstance(val, str) and not allow_text: + continue + if add_mode: + var = f'{self.get_mode(runner)}/{var}' + tags[var] = val + tags.update(self.get_lr_tags(runner)) + tags.update(self.get_momentum_tags(runner)) + return tags + + def before_run(self, runner): + for hook in runner.hooks[::-1]: + if isinstance(hook, LoggerHook): + hook.reset_flag = True + break + + def before_epoch(self, runner): + runner.log_buffer.clear() # clear logs of last epoch + + def after_train_iter(self, runner): + if self.by_epoch and self.every_n_inner_iters(runner, self.interval): + runner.log_buffer.average(self.interval) + elif not self.by_epoch and self.every_n_iters(runner, self.interval): + runner.log_buffer.average(self.interval) + elif self.end_of_epoch(runner) and not self.ignore_last: + # not precise but more stable + runner.log_buffer.average(self.interval) + + if runner.log_buffer.ready: + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() + + def after_train_epoch(self, runner): + if runner.log_buffer.ready: + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() + + def after_val_epoch(self, runner): + runner.log_buffer.average() + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() diff --git a/mmcv/mmcv/runner/hooks/logger/mlflow.py b/mmcv/mmcv/runner/hooks/logger/mlflow.py new file mode 100644 index 0000000..0e3fd48 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/logger/mlflow.py @@ -0,0 +1,77 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class MlflowLoggerHook(LoggerHook): + + def __init__(self, + exp_name=None, + tags=None, + log_model=True, + interval=10, + ignore_last=True, + reset_flag=True, + by_epoch=True): + """Class to log metrics and (optionally) a trained model to MLflow. + + It requires `MLflow`_ to be installed. + + Args: + exp_name (str, optional): Name of the experiment to be used. + Default None. + If not None, set the active experiment. + If experiment does not exist, an experiment with provided name + will be created. + tags (dict of str: str, optional): Tags for the current run. + Default None. + If not None, set tags for the current run. + log_model (bool, optional): Wheter to log an MLflow artifact. + Default True. + If True, log runner.model as an MLflow artifact + for the current run. + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging + by_epoch (bool): Whether EpochBasedRunner is used. + + .. _MLflow: + https://www.mlflow.org/docs/latest/index.html + """ + super(MlflowLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.import_mlflow() + self.exp_name = exp_name + self.tags = tags + self.log_model = log_model + + def import_mlflow(self): + try: + import mlflow + import mlflow.pytorch as mlflow_pytorch + except ImportError: + raise ImportError( + 'Please run "pip install mlflow" to install mlflow') + self.mlflow = mlflow + self.mlflow_pytorch = mlflow_pytorch + + @master_only + def before_run(self, runner): + if self.exp_name is not None: + self.mlflow.set_experiment(self.exp_name) + if self.tags is not None: + self.mlflow.set_tags(self.tags) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + self.mlflow.log_metrics(tags, step=self.get_step(runner)) + + @master_only + def after_run(self, runner): + if self.log_model: + self.mlflow_pytorch.log_model(runner.model, 'models') diff --git a/mmcv/mmcv/runner/hooks/logger/pavi.py b/mmcv/mmcv/runner/hooks/logger/pavi.py new file mode 100644 index 0000000..fb8a5b1 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/logger/pavi.py @@ -0,0 +1,90 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import json +import os +import os.path as osp + +import yaml + +import mmcv +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class PaviLoggerHook(LoggerHook): + + def __init__(self, + init_kwargs=None, + add_graph=False, + add_last_ckpt=False, + interval=10, + ignore_last=True, + reset_flag=True, + by_epoch=True): + super(PaviLoggerHook, self).__init__(interval, ignore_last, reset_flag, + by_epoch) + self.init_kwargs = init_kwargs + self.add_graph = add_graph + self.add_last_ckpt = add_last_ckpt + + @master_only + def before_run(self, runner): + try: + from pavi import SummaryWriter + except ImportError: + raise ImportError('Please run "pip install pavi" to install pavi.') + + self.run_name = runner.work_dir.split('/')[-1] + + if not self.init_kwargs: + self.init_kwargs = dict() + self.init_kwargs['task'] = self.run_name + self.init_kwargs['model'] = runner._model_name + if runner.meta is not None: + if 'config_dict' in runner.meta: + config_dict = runner.meta['config_dict'] + assert isinstance( + config_dict, + dict), ('meta["config_dict"] has to be of a dict, ' + f'but got {type(config_dict)}') + elif 'config_file' in runner.meta: + config_file = runner.meta['config_file'] + config_dict = dict(mmcv.Config.fromfile(config_file)) + else: + config_dict = None + if config_dict is not None: + # 'max_.*iter' is parsed in pavi sdk as the maximum iterations + # to properly set up the progress bar. + config_dict = config_dict.copy() + config_dict.setdefault('max_iter', runner.max_iters) + # non-serializable values are first converted in + # mmcv.dump to json + config_dict = json.loads( + mmcv.dump(config_dict, file_format='json')) + session_text = yaml.dump(config_dict) + self.init_kwargs['session_text'] = session_text + self.writer = SummaryWriter(**self.init_kwargs) + + if self.add_graph: + self.writer.add_graph(runner.model) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner, add_mode=False) + if tags: + self.writer.add_scalars( + self.get_mode(runner), tags, self.get_step(runner)) + + @master_only + def after_run(self, runner): + if self.add_last_ckpt: + ckpt_path = osp.join(runner.work_dir, 'latest.pth') + if osp.isfile(ckpt_path): + ckpt_path = osp.join(runner.work_dir, os.readlink(ckpt_path)) + # runner.epoch += 1 has been done before `after_run`. + iteration = runner.epoch if self.by_epoch else runner.iter + return self.writer.add_snapshot_file( + tag=self.run_name, + snapshot_file_path=ckpt_path, + iteration=iteration) diff --git a/mmcv/mmcv/runner/hooks/logger/tensorboard.py b/mmcv/mmcv/runner/hooks/logger/tensorboard.py new file mode 100644 index 0000000..975874b --- /dev/null +++ b/mmcv/mmcv/runner/hooks/logger/tensorboard.py @@ -0,0 +1,55 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import os.path as osp + +from mmcv.utils import TORCH_VERSION +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class TensorboardLoggerHook(LoggerHook): + + def __init__(self, + log_dir=None, + interval=10, + ignore_last=True, + reset_flag=True, + by_epoch=True): + super(TensorboardLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.log_dir = log_dir + + @master_only + def before_run(self, runner): + if TORCH_VERSION < '1.1' or TORCH_VERSION == 'parrots': + try: + from tensorboardX import SummaryWriter + except ImportError: + raise ImportError('Please install tensorboardX to use ' + 'TensorboardLoggerHook.') + else: + try: + from torch.utils.tensorboard import SummaryWriter + except ImportError: + raise ImportError( + 'Please run "pip install future tensorboard" to install ' + 'the dependencies to use torch.utils.tensorboard ' + '(applicable to PyTorch 1.1 or higher)') + + if self.log_dir is None: + self.log_dir = osp.join(runner.work_dir, 'tf_logs') + self.writer = SummaryWriter(self.log_dir) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner, allow_text=True) + for tag, val in tags.items(): + if isinstance(val, str): + self.writer.add_text(tag, val, self.get_step(runner)) + else: + self.writer.add_scalar(tag, val, self.get_step(runner)) + + @master_only + def after_run(self, runner): + self.writer.close() diff --git a/mmcv/mmcv/runner/hooks/logger/text.py b/mmcv/mmcv/runner/hooks/logger/text.py new file mode 100644 index 0000000..8adfdc8 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/logger/text.py @@ -0,0 +1,168 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import datetime +import os.path as osp +from collections import OrderedDict + +import torch +import torch.distributed as dist + +import mmcv +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class TextLoggerHook(LoggerHook): + """Logger hook in text. + + In this logger hook, the information will be printed on terminal and + saved in json file. + + Args: + by_epoch (bool): Whether EpochBasedRunner is used. + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging. + interval_exp_name (int): Logging interval for experiment name. This + feature is to help users conveniently get the experiment + information from screen or log file. Default: 1000. + """ + + def __init__(self, + by_epoch=True, + interval=10, + ignore_last=True, + reset_flag=False, + interval_exp_name=1000): + super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag, + by_epoch) + self.by_epoch = by_epoch + self.time_sec_tot = 0 + self.interval_exp_name = interval_exp_name + + def before_run(self, runner): + super(TextLoggerHook, self).before_run(runner) + self.start_iter = runner.iter + self.json_log_path = osp.join(runner.work_dir, + f'{runner.timestamp}.log.json') + if runner.meta is not None: + self._dump_log(runner.meta, runner) + + def _get_max_memory(self, runner): + device = getattr(runner.model, 'output_device', None) + mem = torch.cuda.max_memory_allocated(device=device) + mem_mb = torch.tensor([mem / (1024 * 1024)], + dtype=torch.int, + device=device) + if runner.world_size > 1: + dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX) + return mem_mb.item() + + def _log_info(self, log_dict, runner): + # print exp name for users to distinguish experiments + # at every ``interval_exp_name`` iterations and the end of each epoch + if runner.meta is not None and 'exp_name' in runner.meta: + if (self.every_n_iters(runner, self.interval_exp_name)) or ( + self.by_epoch and self.end_of_epoch(runner)): + exp_info = f'Exp name: {runner.meta["exp_name"]}' + runner.logger.info(exp_info) + + if log_dict['mode'] == 'train': + if isinstance(log_dict['lr'], dict): + lr_str = [] + for k, val in log_dict['lr'].items(): + lr_str.append(f'lr_{k}: {val:.3e}') + lr_str = ' '.join(lr_str) + else: + lr_str = f'lr: {log_dict["lr"]:.3e}' + + # by epoch: Epoch [4][100/1000] + # by iter: Iter [100/100000] + if self.by_epoch: + log_str = f'Epoch [{log_dict["epoch"]}]' \ + f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' + else: + log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' + log_str += f'{lr_str}, ' + + if 'time' in log_dict.keys(): + self.time_sec_tot += (log_dict['time'] * self.interval) + time_sec_avg = self.time_sec_tot / ( + runner.iter - self.start_iter + 1) + eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) + eta_str = str(datetime.timedelta(seconds=int(eta_sec))) + log_str += f'eta: {eta_str}, ' + log_str += f'time: {log_dict["time"]:.3f}, ' \ + f'data_time: {log_dict["data_time"]:.3f}, ' + # statistic memory + if torch.cuda.is_available(): + log_str += f'memory: {log_dict["memory"]}, ' + else: + if self.by_epoch: + log_str = f'Epoch({log_dict["mode"]}) ' \ + f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' + else: + log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' + + log_items = [] + for name, val in log_dict.items(): + # TODO: resolve this hack + # these items have been in log_str + if name in [ + 'mode', 'Epoch', 'iter', 'lr', 'time', 'data_time', + 'memory', 'epoch' + ]: + continue + if isinstance(val, float): + val = f'{val:.4f}' + log_items.append(f'{name}: {val}') + log_str += ', '.join(log_items) + + runner.logger.info(log_str) + + def _dump_log(self, log_dict, runner): + # dump log in json format + json_log = OrderedDict() + for k, v in log_dict.items(): + json_log[k] = self._round_float(v) + # only append log at last line + if runner.rank == 0: + with open(self.json_log_path, 'a+') as f: + mmcv.dump(json_log, f, file_format='json') + f.write('\n') + + def _round_float(self, items): + if isinstance(items, list): + return [self._round_float(item) for item in items] + elif isinstance(items, float): + return round(items, 5) + else: + return items + + def log(self, runner): + log_dict = OrderedDict( + mode=self.get_mode(runner), + epoch=self.get_epoch(runner), + iter=self.get_iter(runner, inner_iter=True)) + + # only record lr of the first param group + cur_lr = runner.current_lr() + if isinstance(cur_lr, list): + log_dict['lr'] = cur_lr[0] + else: + assert isinstance(cur_lr, dict) + log_dict['lr'] = {} + for k, lr_ in cur_lr.items(): + assert isinstance(lr_, list) + log_dict['lr'].update({k: lr_[0]}) + + if 'time' in runner.log_buffer.output: + # statistic memory + if torch.cuda.is_available(): + log_dict['memory'] = self._get_max_memory(runner) + + log_dict = dict(log_dict, **runner.log_buffer.output) + + self._log_info(log_dict, runner) + self._dump_log(log_dict, runner) diff --git a/mmcv/mmcv/runner/hooks/logger/wandb.py b/mmcv/mmcv/runner/hooks/logger/wandb.py new file mode 100644 index 0000000..4d65ebf --- /dev/null +++ b/mmcv/mmcv/runner/hooks/logger/wandb.py @@ -0,0 +1,46 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class WandbLoggerHook(LoggerHook): + + def __init__(self, + init_kwargs=None, + interval=10, + ignore_last=True, + reset_flag=True, + by_epoch=True): + super(WandbLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.import_wandb() + self.init_kwargs = init_kwargs + + def import_wandb(self): + try: + import wandb + except ImportError: + raise ImportError( + 'Please run "pip install wandb" to install wandb') + self.wandb = wandb + + @master_only + def before_run(self, runner): + if self.wandb is None: + self.import_wandb() + if self.init_kwargs: + self.wandb.init(**self.init_kwargs) + else: + self.wandb.init() + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + self.wandb.log(tags, step=self.get_step(runner)) + + @master_only + def after_run(self, runner): + self.wandb.join() diff --git a/mmcv/mmcv/runner/hooks/lr_updater.py b/mmcv/mmcv/runner/hooks/lr_updater.py new file mode 100644 index 0000000..0120b58 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/lr_updater.py @@ -0,0 +1,416 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from math import cos, pi + +from .hook import HOOKS, Hook + + +class LrUpdaterHook(Hook): + """LR Scheduler in MMCV. + + Args: + by_epoch (bool): LR changes epoch by epoch + warmup (string): Type of warmup used. It can be None(use no warmup), + 'constant', 'linear' or 'exp' + warmup_iters (int): The number of iterations or epochs that warmup + lasts + warmup_ratio (float): LR used at the beginning of warmup equals to + warmup_ratio * initial_lr + warmup_by_epoch (bool): When warmup_by_epoch == True, warmup_iters + means the number of epochs that warmup lasts, otherwise means the + number of iteration that warmup lasts + """ + + def __init__(self, + by_epoch=True, + warmup=None, + warmup_iters=0, + warmup_ratio=0.1, + warmup_by_epoch=False): + # validate the "warmup" argument + if warmup is not None: + if warmup not in ['constant', 'linear', 'exp']: + raise ValueError( + f'"{warmup}" is not a supported type for warming up, valid' + ' types are "constant" and "linear"') + if warmup is not None: + assert warmup_iters > 0, \ + '"warmup_iters" must be a positive integer' + assert 0 < warmup_ratio <= 1.0, \ + '"warmup_ratio" must be in range (0,1]' + + self.by_epoch = by_epoch + self.warmup = warmup + self.warmup_iters = warmup_iters + self.warmup_ratio = warmup_ratio + self.warmup_by_epoch = warmup_by_epoch + + if self.warmup_by_epoch: + self.warmup_epochs = self.warmup_iters + self.warmup_iters = None + else: + self.warmup_epochs = None + + self.base_lr = [] # initial lr for all param groups + self.regular_lr = [] # expected lr if no warming up is performed + + def _set_lr(self, runner, lr_groups): + if isinstance(runner.optimizer, dict): + for k, optim in runner.optimizer.items(): + for param_group, lr in zip(optim.param_groups, lr_groups[k]): + param_group['lr'] = lr + else: + for param_group, lr in zip(runner.optimizer.param_groups, + lr_groups): + param_group['lr'] = lr + + def get_lr(self, runner, base_lr): + raise NotImplementedError + + def get_regular_lr(self, runner): + if isinstance(runner.optimizer, dict): + lr_groups = {} + for k in runner.optimizer.keys(): + _lr_group = [ + self.get_lr(runner, _base_lr) + for _base_lr in self.base_lr[k] + ] + lr_groups.update({k: _lr_group}) + + return lr_groups + else: + return [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr] + + def get_warmup_lr(self, cur_iters): + if self.warmup == 'constant': + warmup_lr = [_lr * self.warmup_ratio for _lr in self.regular_lr] + elif self.warmup == 'linear': + k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio) + warmup_lr = [_lr * (1 - k) for _lr in self.regular_lr] + elif self.warmup == 'exp': + k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters) + warmup_lr = [_lr * k for _lr in self.regular_lr] + return warmup_lr + + def before_run(self, runner): + # NOTE: when resuming from a checkpoint, if 'initial_lr' is not saved, + # it will be set according to the optimizer params + if isinstance(runner.optimizer, dict): + self.base_lr = {} + for k, optim in runner.optimizer.items(): + for group in optim.param_groups: + group.setdefault('initial_lr', group['lr']) + _base_lr = [ + group['initial_lr'] for group in optim.param_groups + ] + self.base_lr.update({k: _base_lr}) + else: + for group in runner.optimizer.param_groups: + group.setdefault('initial_lr', group['lr']) + self.base_lr = [ + group['initial_lr'] for group in runner.optimizer.param_groups + ] + + def before_train_epoch(self, runner): + if self.warmup_iters is None: + epoch_len = len(runner.data_loader) + self.warmup_iters = self.warmup_epochs * epoch_len + + if not self.by_epoch: + return + + self.regular_lr = self.get_regular_lr(runner) + self._set_lr(runner, self.regular_lr) + + def before_train_iter(self, runner): + cur_iter = runner.iter + if not self.by_epoch: + self.regular_lr = self.get_regular_lr(runner) + if self.warmup is None or cur_iter >= self.warmup_iters: + self._set_lr(runner, self.regular_lr) + else: + warmup_lr = self.get_warmup_lr(cur_iter) + self._set_lr(runner, warmup_lr) + elif self.by_epoch: + if self.warmup is None or cur_iter > self.warmup_iters: + return + elif cur_iter == self.warmup_iters: + self._set_lr(runner, self.regular_lr) + else: + warmup_lr = self.get_warmup_lr(cur_iter) + self._set_lr(runner, warmup_lr) + + +@HOOKS.register_module() +class FixedLrUpdaterHook(LrUpdaterHook): + + def __init__(self, **kwargs): + super(FixedLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + return base_lr + + +@HOOKS.register_module() +class StepLrUpdaterHook(LrUpdaterHook): + + def __init__(self, step, gamma=0.1, **kwargs): + assert isinstance(step, (list, int)) + if isinstance(step, list): + for s in step: + assert isinstance(s, int) and s > 0 + elif isinstance(step, int): + assert step > 0 + else: + raise TypeError('"step" must be a list or integer') + self.step = step + self.gamma = gamma + super(StepLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + + if isinstance(self.step, int): + return base_lr * (self.gamma**(progress // self.step)) + + exp = len(self.step) + for i, s in enumerate(self.step): + if progress < s: + exp = i + break + return base_lr * self.gamma**exp + + +@HOOKS.register_module() +class ExpLrUpdaterHook(LrUpdaterHook): + + def __init__(self, gamma, **kwargs): + self.gamma = gamma + super(ExpLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + return base_lr * self.gamma**progress + + +@HOOKS.register_module() +class PolyLrUpdaterHook(LrUpdaterHook): + + def __init__(self, power=1., min_lr=0., **kwargs): + self.power = power + self.min_lr = min_lr + super(PolyLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + coeff = (1 - progress / max_progress)**self.power + return (base_lr - self.min_lr) * coeff + self.min_lr + + +@HOOKS.register_module() +class InvLrUpdaterHook(LrUpdaterHook): + + def __init__(self, gamma, power=1., **kwargs): + self.gamma = gamma + self.power = power + super(InvLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + progress = runner.epoch if self.by_epoch else runner.iter + return base_lr * (1 + self.gamma * progress)**(-self.power) + + +@HOOKS.register_module() +class CosineAnnealingLrUpdaterHook(LrUpdaterHook): + + def __init__(self, min_lr=None, min_lr_ratio=None, **kwargs): + assert (min_lr is None) ^ (min_lr_ratio is None) + self.min_lr = min_lr + self.min_lr_ratio = min_lr_ratio + super(CosineAnnealingLrUpdaterHook, self).__init__(**kwargs) + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + + if self.min_lr_ratio is not None: + target_lr = base_lr * self.min_lr_ratio + else: + target_lr = self.min_lr + return annealing_cos(base_lr, target_lr, progress / max_progress) + + +@HOOKS.register_module() +class CosineRestartLrUpdaterHook(LrUpdaterHook): + """Cosine annealing with restarts learning rate scheme. + + Args: + periods (list[int]): Periods for each cosine anneling cycle. + restart_weights (list[float], optional): Restart weights at each + restart iteration. Default: [1]. + min_lr (float, optional): The minimum lr. Default: None. + min_lr_ratio (float, optional): The ratio of minimum lr to the base lr. + Either `min_lr` or `min_lr_ratio` should be specified. + Default: None. + """ + + def __init__(self, + periods, + restart_weights=[1], + min_lr=None, + min_lr_ratio=None, + **kwargs): + assert (min_lr is None) ^ (min_lr_ratio is None) + self.periods = periods + self.min_lr = min_lr + self.min_lr_ratio = min_lr_ratio + self.restart_weights = restart_weights + assert (len(self.periods) == len(self.restart_weights) + ), 'periods and restart_weights should have the same length.' + super(CosineRestartLrUpdaterHook, self).__init__(**kwargs) + + self.cumulative_periods = [ + sum(self.periods[0:i + 1]) for i in range(0, len(self.periods)) + ] + + def get_lr(self, runner, base_lr): + if self.by_epoch: + progress = runner.epoch + else: + progress = runner.iter + + if self.min_lr_ratio is not None: + target_lr = base_lr * self.min_lr_ratio + else: + target_lr = self.min_lr + + idx = get_position_from_periods(progress, self.cumulative_periods) + current_weight = self.restart_weights[idx] + nearest_restart = 0 if idx == 0 else self.cumulative_periods[idx - 1] + current_periods = self.periods[idx] + + alpha = min((progress - nearest_restart) / current_periods, 1) + return annealing_cos(base_lr, target_lr, alpha, current_weight) + + +def get_position_from_periods(iteration, cumulative_periods): + """Get the position from a period list. + + It will return the index of the right-closest number in the period list. + For example, the cumulative_periods = [100, 200, 300, 400], + if iteration == 50, return 0; + if iteration == 210, return 2; + if iteration == 300, return 3. + + Args: + iteration (int): Current iteration. + cumulative_periods (list[int]): Cumulative period list. + + Returns: + int: The position of the right-closest number in the period list. + """ + for i, period in enumerate(cumulative_periods): + if iteration < period: + return i + raise ValueError(f'Current iteration {iteration} exceeds ' + f'cumulative_periods {cumulative_periods}') + + +@HOOKS.register_module() +class CyclicLrUpdaterHook(LrUpdaterHook): + """Cyclic LR Scheduler. + + Implement the cyclical learning rate policy (CLR) described in + https://arxiv.org/pdf/1506.01186.pdf + + Different from the original paper, we use cosine anealing rather than + triangular policy inside a cycle. This improves the performance in the + 3D detection area. + + Attributes: + target_ratio (tuple[float]): Relative ratio of the highest LR and the + lowest LR to the initial LR. + cyclic_times (int): Number of cycles during training + step_ratio_up (float): The ratio of the increasing process of LR in + the total cycle. + by_epoch (bool): Whether to update LR by epoch. + """ + + def __init__(self, + by_epoch=False, + target_ratio=(10, 1e-4), + cyclic_times=1, + step_ratio_up=0.4, + **kwargs): + if isinstance(target_ratio, float): + target_ratio = (target_ratio, target_ratio / 1e5) + elif isinstance(target_ratio, tuple): + target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \ + if len(target_ratio) == 1 else target_ratio + else: + raise ValueError('target_ratio should be either float ' + f'or tuple, got {type(target_ratio)}') + + assert len(target_ratio) == 2, \ + '"target_ratio" must be list or tuple of two floats' + assert 0 <= step_ratio_up < 1.0, \ + '"step_ratio_up" must be in range [0,1)' + + self.target_ratio = target_ratio + self.cyclic_times = cyclic_times + self.step_ratio_up = step_ratio_up + self.lr_phases = [] # init lr_phases + + assert not by_epoch, \ + 'currently only support "by_epoch" = False' + super(CyclicLrUpdaterHook, self).__init__(by_epoch, **kwargs) + + def before_run(self, runner): + super(CyclicLrUpdaterHook, self).before_run(runner) + # initiate lr_phases + # total lr_phases are separated as up and down + max_iter_per_phase = runner.max_iters // self.cyclic_times + iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) + self.lr_phases.append( + [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) + self.lr_phases.append([ + iter_up_phase, max_iter_per_phase, max_iter_per_phase, + self.target_ratio[0], self.target_ratio[1] + ]) + + def get_lr(self, runner, base_lr): + curr_iter = runner.iter + for (start_iter, end_iter, max_iter_per_phase, start_ratio, + end_ratio) in self.lr_phases: + curr_iter %= max_iter_per_phase + if start_iter <= curr_iter < end_iter: + progress = curr_iter - start_iter + return annealing_cos(base_lr * start_ratio, + base_lr * end_ratio, + progress / (end_iter - start_iter)) + + +def annealing_cos(start, end, factor, weight=1): + """Calculate annealing cos learning rate. + + Cosine anneal from `weight * start + (1 - weight) * end` to `end` as + percentage goes from 0.0 to 1.0. + + Args: + start (float): The starting learning rate of the cosine annealing. + end (float): The ending learing rate of the cosine annealing. + factor (float): The coefficient of `pi` when calculating the current + percentage. Range from 0.0 to 1.0. + weight (float, optional): The combination factor of `start` and `end` + when calculating the actual starting learning rate. Default to 1. + """ + cos_out = cos(pi * factor) + 1 + return end + 0.5 * weight * (start - end) * cos_out diff --git a/mmcv/mmcv/runner/hooks/memory.py b/mmcv/mmcv/runner/hooks/memory.py new file mode 100644 index 0000000..966ee07 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/memory.py @@ -0,0 +1,25 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import torch + +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class EmptyCacheHook(Hook): + + def __init__(self, before_epoch=False, after_epoch=True, after_iter=False): + self._before_epoch = before_epoch + self._after_epoch = after_epoch + self._after_iter = after_iter + + def after_iter(self, runner): + if self._after_iter: + torch.cuda.empty_cache() + + def before_epoch(self, runner): + if self._before_epoch: + torch.cuda.empty_cache() + + def after_epoch(self, runner): + if self._after_epoch: + torch.cuda.empty_cache() diff --git a/mmcv/mmcv/runner/hooks/momentum_updater.py b/mmcv/mmcv/runner/hooks/momentum_updater.py new file mode 100644 index 0000000..b349071 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/momentum_updater.py @@ -0,0 +1,199 @@ +from .hook import HOOKS, Hook +from .lr_updater import annealing_cos + + +class MomentumUpdaterHook(Hook): + + def __init__(self, + by_epoch=True, + warmup=None, + warmup_iters=0, + warmup_ratio=0.9): + # validate the "warmup" argument + if warmup is not None: + if warmup not in ['constant', 'linear', 'exp']: + raise ValueError( + f'"{warmup}" is not a supported type for warming up, valid' + ' types are "constant" and "linear"') + if warmup is not None: + assert warmup_iters > 0, \ + '"warmup_iters" must be a positive integer' + assert 0 < warmup_ratio <= 1.0, \ + '"warmup_momentum" must be in range (0,1]' + + self.by_epoch = by_epoch + self.warmup = warmup + self.warmup_iters = warmup_iters + self.warmup_ratio = warmup_ratio + + self.base_momentum = [] # initial momentum for all param groups + self.regular_momentum = [ + ] # expected momentum if no warming up is performed + + def _set_momentum(self, runner, momentum_groups): + for param_group, mom in zip(runner.optimizer.param_groups, + momentum_groups): + if 'momentum' in param_group.keys(): + param_group['momentum'] = mom + elif 'betas' in param_group.keys(): + param_group['betas'] = (mom, param_group['betas'][1]) + + def get_momentum(self, runner, base_momentum): + raise NotImplementedError + + def get_regular_momentum(self, runner): + return [ + self.get_momentum(runner, _base_momentum) + for _base_momentum in self.base_momentum + ] + + def get_warmup_momentum(self, cur_iters): + if self.warmup == 'constant': + warmup_momentum = [ + _momentum / self.warmup_ratio + for _momentum in self.regular_momentum + ] + elif self.warmup == 'linear': + k = (1 - cur_iters / self.warmup_iters) * (1 - self.warmup_ratio) + warmup_momentum = [ + _momentum / (1 - k) for _momentum in self.regular_mom + ] + elif self.warmup == 'exp': + k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters) + warmup_momentum = [_momentum / k for _momentum in self.regular_mom] + return warmup_momentum + + def before_run(self, runner): + # NOTE: when resuming from a checkpoint, + # if 'initial_momentum' is not saved, + # it will be set according to the optimizer params + for group in runner.optimizer.param_groups: + if 'momentum' in group.keys(): + group.setdefault('initial_momentum', group['momentum']) + else: + group.setdefault('initial_momentum', group['betas'][0]) + self.base_momentum = [ + group['initial_momentum'] + for group in runner.optimizer.param_groups + ] + + def before_train_epoch(self, runner): + if not self.by_epoch: + return + self.regular_mom = self.get_regular_momentum(runner) + self._set_momentum(runner, self.regular_mom) + + def before_train_iter(self, runner): + cur_iter = runner.iter + if not self.by_epoch: + self.regular_mom = self.get_regular_momentum(runner) + if self.warmup is None or cur_iter >= self.warmup_iters: + self._set_momentum(runner, self.regular_mom) + else: + warmup_momentum = self.get_warmup_momentum(cur_iter) + self._set_momentum(runner, warmup_momentum) + elif self.by_epoch: + if self.warmup is None or cur_iter > self.warmup_iters: + return + elif cur_iter == self.warmup_iters: + self._set_momentum(runner, self.regular_mom) + else: + warmup_momentum = self.get_warmup_momentum(cur_iter) + self._set_momentum(runner, warmup_momentum) + + +@HOOKS.register_module() +class CosineAnnealingMomentumUpdaterHook(MomentumUpdaterHook): + + def __init__(self, min_momentum=None, min_momentum_ratio=None, **kwargs): + assert (min_momentum is None) ^ (min_momentum_ratio is None) + self.min_momentum = min_momentum + self.min_momentum_ratio = min_momentum_ratio + super(CosineAnnealingMomentumUpdaterHook, self).__init__(**kwargs) + + def get_momentum(self, runner, base_momentum): + if self.by_epoch: + progress = runner.epoch + max_progress = runner.max_epochs + else: + progress = runner.iter + max_progress = runner.max_iters + if self.min_momentum_ratio is not None: + target_momentum = base_momentum * self.min_momentum_ratio + else: + target_momentum = self.min_momentum + return annealing_cos(base_momentum, target_momentum, + progress / max_progress) + + +@HOOKS.register_module() +class CyclicMomentumUpdaterHook(MomentumUpdaterHook): + """Cyclic momentum Scheduler. + + Implemet the cyclical momentum scheduler policy described in + https://arxiv.org/pdf/1708.07120.pdf + + This momentum scheduler usually used together with the CyclicLRUpdater + to improve the performance in the 3D detection area. + + Attributes: + target_ratio (tuple[float]): Relative ratio of the lowest momentum and + the highest momentum to the initial momentum. + cyclic_times (int): Number of cycles during training + step_ratio_up (float): The ratio of the increasing process of momentum + in the total cycle. + by_epoch (bool): Whether to update momentum by epoch. + """ + + def __init__(self, + by_epoch=False, + target_ratio=(0.85 / 0.95, 1), + cyclic_times=1, + step_ratio_up=0.4, + **kwargs): + if isinstance(target_ratio, float): + target_ratio = (target_ratio, target_ratio / 1e5) + elif isinstance(target_ratio, tuple): + target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \ + if len(target_ratio) == 1 else target_ratio + else: + raise ValueError('target_ratio should be either float ' + f'or tuple, got {type(target_ratio)}') + + assert len(target_ratio) == 2, \ + '"target_ratio" must be list or tuple of two floats' + assert 0 <= step_ratio_up < 1.0, \ + '"step_ratio_up" must be in range [0,1)' + + self.target_ratio = target_ratio + self.cyclic_times = cyclic_times + self.step_ratio_up = step_ratio_up + self.momentum_phases = [] # init momentum_phases + # currently only support by_epoch=False + assert not by_epoch, \ + 'currently only support "by_epoch" = False' + super(CyclicMomentumUpdaterHook, self).__init__(by_epoch, **kwargs) + + def before_run(self, runner): + super(CyclicMomentumUpdaterHook, self).before_run(runner) + # initiate momentum_phases + # total momentum_phases are separated as up and down + max_iter_per_phase = runner.max_iters // self.cyclic_times + iter_up_phase = int(self.step_ratio_up * max_iter_per_phase) + self.momentum_phases.append( + [0, iter_up_phase, max_iter_per_phase, 1, self.target_ratio[0]]) + self.momentum_phases.append([ + iter_up_phase, max_iter_per_phase, max_iter_per_phase, + self.target_ratio[0], self.target_ratio[1] + ]) + + def get_momentum(self, runner, base_momentum): + curr_iter = runner.iter + for (start_iter, end_iter, max_iter_per_phase, start_ratio, + end_ratio) in self.momentum_phases: + curr_iter %= max_iter_per_phase + if start_iter <= curr_iter < end_iter: + progress = curr_iter - start_iter + return annealing_cos(base_momentum * start_ratio, + base_momentum * end_ratio, + progress / (end_iter - start_iter)) diff --git a/mmcv/mmcv/runner/hooks/optimizer.py b/mmcv/mmcv/runner/hooks/optimizer.py new file mode 100644 index 0000000..049d505 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/optimizer.py @@ -0,0 +1,150 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import copy +from collections import defaultdict +from itertools import chain + +from torch.nn.utils import clip_grad + +from ..dist_utils import allreduce_grads +from ..fp16_utils import LossScaler, wrap_fp16_model +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class OptimizerHook(Hook): + + def __init__(self, grad_clip=None): + self.grad_clip = grad_clip + + def clip_grads(self, params): + params = list( + filter(lambda p: p.requires_grad and p.grad is not None, params)) + if len(params) > 0: + return clip_grad.clip_grad_norm_(params, **self.grad_clip) + + def after_train_iter(self, runner): + runner.optimizer.zero_grad() + runner.outputs['loss'].backward() + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + runner.optimizer.step() + + +@HOOKS.register_module() +class Fp16OptimizerHook(OptimizerHook): + """FP16 optimizer hook. + + The steps of fp16 optimizer is as follows. + 1. Scale the loss value. + 2. BP in the fp16 model. + 2. Copy gradients from fp16 model to fp32 weights. + 3. Update fp32 weights. + 4. Copy updated parameters from fp32 weights to fp16 model. + + Refer to https://arxiv.org/abs/1710.03740 for more details. + + Args: + loss_scale (float | str): Scale factor multiplied with loss. If + 'dynamic' is specified, then dynamic loss scaling will be used. + """ + + def __init__(self, + grad_clip=None, + coalesce=True, + bucket_size_mb=-1, + loss_scale=512., + distributed=True): + self.grad_clip = grad_clip + self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb + self.distributed = distributed + if loss_scale == 'dynamic': + self.loss_scaler = LossScaler(mode='dynamic') + elif isinstance(loss_scale, float): + self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static') + else: + raise ValueError('loss_scale must be of type float or str') + + def before_run(self, runner): + """Preparing steps before Mixed Precision Training. + + 1. Make a master copy of fp32 weights for optimization. + 2. Convert the main model from fp32 to fp16. + """ + # keep a copy of fp32 weights + old_groups = runner.optimizer.param_groups + runner.optimizer.param_groups = copy.deepcopy( + runner.optimizer.param_groups) + state = defaultdict(dict) + p_map = { + old_p: p + for old_p, p in zip( + chain(*(g['params'] for g in old_groups)), + chain(*(g['params'] for g in runner.optimizer.param_groups))) + } + for k, v in runner.optimizer.state.items(): + state[p_map[k]] = v + runner.optimizer.state = state + # convert model to fp16 + wrap_fp16_model(runner.model) + + def copy_grads_to_fp32(self, fp16_net, fp32_weights): + """Copy gradients from fp16 model to fp32 weight copy.""" + for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): + if fp16_param.grad is not None: + if fp32_param.grad is None: + fp32_param.grad = fp32_param.data.new(fp32_param.size()) + fp32_param.grad.copy_(fp16_param.grad) + + def copy_params_to_fp16(self, fp16_net, fp32_weights): + """Copy updated params from fp32 weight copy to fp16 model.""" + for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): + fp16_param.data.copy_(fp32_param.data) + + def after_train_iter(self, runner): + """Backward optimization steps for Mixed Precision Training. For + dynamic loss scaling, please refer `loss_scalar.py` + + 1. Scale the loss by a scale factor. + 2. Backward the loss to obtain the gradients (fp16). + 3. Copy gradients from the model to the fp32 weight copy. + 4. Scale the gradients back and update the fp32 weight copy. + 5. Copy back the params from fp32 weight copy to the fp16 model. + """ + # clear grads of last iteration + runner.model.zero_grad() + runner.optimizer.zero_grad() + # scale the loss value + scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale + scaled_loss.backward() + # copy fp16 grads in the model to fp32 params in the optimizer + + fp32_weights = [] + for param_group in runner.optimizer.param_groups: + fp32_weights += param_group['params'] + self.copy_grads_to_fp32(runner.model, fp32_weights) + # allreduce grads + if self.distributed: + allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) + + has_overflow = self.loss_scaler.has_overflow(fp32_weights) + # if has overflow, skip this iteration + if not has_overflow: + # scale the gradients back + for param in fp32_weights: + if param.grad is not None: + param.grad.div_(self.loss_scaler.loss_scale) + if self.grad_clip is not None: + self.clip_grads(fp32_weights) + # update fp32 params + runner.optimizer.step() + # copy fp32 params to the fp16 model + self.copy_params_to_fp16(runner.model, fp32_weights) + self.loss_scaler.update_scale(has_overflow) + if has_overflow: + runner.logger.warning('Check overflow, downscale loss scale ' + f'to {self.loss_scaler.cur_scale}') diff --git a/mmcv/mmcv/runner/hooks/sampler_seed.py b/mmcv/mmcv/runner/hooks/sampler_seed.py new file mode 100644 index 0000000..535d801 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/sampler_seed.py @@ -0,0 +1,14 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class DistSamplerSeedHook(Hook): + + def before_epoch(self, runner): + if hasattr(runner.data_loader.sampler, 'set_epoch'): + # in case the data loader uses `SequentialSampler` in Pytorch + runner.data_loader.sampler.set_epoch(runner.epoch) + elif hasattr(runner.data_loader.batch_sampler.sampler, 'set_epoch'): + # batch sampler in pytorch warps the sampler as its attributes. + runner.data_loader.batch_sampler.sampler.set_epoch(runner.epoch) diff --git a/mmcv/mmcv/runner/hooks/sync_buffer.py b/mmcv/mmcv/runner/hooks/sync_buffer.py new file mode 100644 index 0000000..f582715 --- /dev/null +++ b/mmcv/mmcv/runner/hooks/sync_buffer.py @@ -0,0 +1,22 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from ..dist_utils import allreduce_params +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class SyncBuffersHook(Hook): + """Synchronize model buffers such as running_mean and running_var in BN at + the end of each epoch. + + Args: + distributed (bool): Whether distributed training is used. It is + effective only for distributed training. Defaults to True. + """ + + def __init__(self, distributed=True): + self.distributed = distributed + + def after_epoch(self, runner): + """All-reduce model buffers at the end of each epoch.""" + if self.distributed: + allreduce_params(runner.model.buffers()) diff --git a/mmcv/mmcv/runner/iter_based_runner.py b/mmcv/mmcv/runner/iter_based_runner.py new file mode 100644 index 0000000..fa5ae11 --- /dev/null +++ b/mmcv/mmcv/runner/iter_based_runner.py @@ -0,0 +1,246 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import os.path as osp +import platform +import shutil +import time +import warnings + +import torch +from torch.optim import Optimizer + +import mmcv +from .base_runner import BaseRunner +from .builder import RUNNERS +from .checkpoint import save_checkpoint +from .hooks import IterTimerHook +from .utils import get_host_info + + +class IterLoader: + + def __init__(self, dataloader): + self._dataloader = dataloader + self.iter_loader = iter(self._dataloader) + self._epoch = 0 + + @property + def epoch(self): + return self._epoch + + def __next__(self): + try: + data = next(self.iter_loader) + except StopIteration: + self._epoch += 1 + if hasattr(self._dataloader.sampler, 'set_epoch'): + self._dataloader.sampler.set_epoch(self._epoch) + self.iter_loader = iter(self._dataloader) + data = next(self.iter_loader) + + return data + + def __len__(self): + return len(self._dataloader) + + +@RUNNERS.register_module() +class IterBasedRunner(BaseRunner): + """Iteration-based Runner. + + This runner train models iteration by iteration. + """ + + def train(self, data_loader, **kwargs): + self.model.train() + self.mode = 'train' + self.data_loader = data_loader + self._epoch = data_loader.epoch + data_batch = next(data_loader) + self.call_hook('before_train_iter') + outputs = self.model.train_step(data_batch, self.optimizer, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('model.train_step() must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + self.call_hook('after_train_iter') + self._inner_iter += 1 + self._iter += 1 + + def val(self, data_loader, **kwargs): + self.model.eval() + self.mode = 'val' + self.data_loader = data_loader + data_batch = next(data_loader) + self.call_hook('before_val_iter') + outputs = self.model.val_step(data_batch, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('model.val_step() must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + self.call_hook('after_val_iter') + self._inner_iter += 1 + + def run(self, data_loaders, workflow, max_iters=None, **kwargs): + """Start running. + + Args: + data_loaders (list[:obj:`DataLoader`]): Dataloaders for training + and validation. + workflow (list[tuple]): A list of (phase, iters) to specify the + running order and iterations. E.g, [('train', 10000), + ('val', 1000)] means running 10000 iterations for training and + 1000 iterations for validation, iteratively. + """ + assert isinstance(data_loaders, list) + assert mmcv.is_list_of(workflow, tuple) + assert len(data_loaders) == len(workflow) + if max_iters is not None: + warnings.warn( + 'setting max_iters in run is deprecated, ' + 'please set max_iters in runner_config', DeprecationWarning) + self._max_iters = max_iters + assert self._max_iters is not None, ( + 'max_iters must be specified during instantiation') + + work_dir = self.work_dir if self.work_dir is not None else 'NONE' + self.logger.info('Start running, host: %s, work_dir: %s', + get_host_info(), work_dir) + self.logger.info('workflow: %s, max: %d iters', workflow, + self._max_iters) + self.call_hook('before_run') + + iter_loaders = [IterLoader(x) for x in data_loaders] + + self.call_hook('before_epoch') + + while self.iter < self._max_iters: + for i, flow in enumerate(workflow): + self._inner_iter = 0 + mode, iters = flow + if not isinstance(mode, str) or not hasattr(self, mode): + raise ValueError( + 'runner has no method named "{}" to run a workflow'. + format(mode)) + iter_runner = getattr(self, mode) + for _ in range(iters): + if mode == 'train' and self.iter >= self._max_iters: + break + iter_runner(iter_loaders[i], **kwargs) + + time.sleep(1) # wait for some hooks like loggers to finish + self.call_hook('after_epoch') + self.call_hook('after_run') + + def resume(self, + checkpoint, + resume_optimizer=True, + map_location='default'): + """Resume model from checkpoint. + + Args: + checkpoint (str): Checkpoint to resume from. + resume_optimizer (bool, optional): Whether resume the optimizer(s) + if the checkpoint file includes optimizer(s). Default to True. + map_location (str, optional): Same as :func:`torch.load`. + Default to 'default'. + """ + if map_location == 'default': + device_id = torch.cuda.current_device() + checkpoint = self.load_checkpoint( + checkpoint, + map_location=lambda storage, loc: storage.cuda(device_id)) + else: + checkpoint = self.load_checkpoint( + checkpoint, map_location=map_location) + + self._epoch = checkpoint['meta']['epoch'] + self._iter = checkpoint['meta']['iter'] + self._inner_iter = checkpoint['meta']['iter'] + if 'optimizer' in checkpoint and resume_optimizer: + if isinstance(self.optimizer, Optimizer): + self.optimizer.load_state_dict(checkpoint['optimizer']) + elif isinstance(self.optimizer, dict): + for k in self.optimizer.keys(): + self.optimizer[k].load_state_dict( + checkpoint['optimizer'][k]) + else: + raise TypeError( + 'Optimizer should be dict or torch.optim.Optimizer ' + f'but got {type(self.optimizer)}') + + self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}') + + def save_checkpoint(self, + out_dir, + filename_tmpl='iter_{}.pth', + meta=None, + save_optimizer=True, + create_symlink=True): + """Save checkpoint to file. + + Args: + out_dir (str): Directory to save checkpoint files. + filename_tmpl (str, optional): Checkpoint file template. + Defaults to 'iter_{}.pth'. + meta (dict, optional): Metadata to be saved in checkpoint. + Defaults to None. + save_optimizer (bool, optional): Whether save optimizer. + Defaults to True. + create_symlink (bool, optional): Whether create symlink to the + latest checkpoint file. Defaults to True. + """ + if meta is None: + meta = dict(iter=self.iter + 1, epoch=self.epoch + 1) + elif isinstance(meta, dict): + meta.update(iter=self.iter + 1, epoch=self.epoch + 1) + else: + raise TypeError( + f'meta should be a dict or None, but got {type(meta)}') + if self.meta is not None: + meta.update(self.meta) + + filename = filename_tmpl.format(self.iter + 1) + filepath = osp.join(out_dir, filename) + optimizer = self.optimizer if save_optimizer else None + save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) + # in some environments, `os.symlink` is not supported, you may need to + # set `create_symlink` to False + if create_symlink: + dst_file = osp.join(out_dir, 'latest.pth') + if platform.system() != 'Windows': + mmcv.symlink(filename, dst_file) + else: + shutil.copy(filename, dst_file) + + def register_training_hooks(self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None): + """Register default hooks for iter-based training. + + Default hooks include: + + - LrUpdaterHook + - MomentumUpdaterHook + - OptimizerStepperHook + - CheckpointSaverHook + - IterTimerHook + - LoggerHook(s) + """ + if checkpoint_config is not None: + checkpoint_config.setdefault('by_epoch', False) + if lr_config is not None: + lr_config.setdefault('by_epoch', False) + self.register_lr_hook(lr_config) + self.register_momentum_hook(momentum_config) + self.register_optimizer_hook(optimizer_config) + self.register_checkpoint_hook(checkpoint_config) + self.register_hook(IterTimerHook()) + if log_config is not None: + for info in log_config['hooks']: + info.setdefault('by_epoch', False) + self.register_logger_hooks(log_config) diff --git a/mmcv/mmcv/runner/log_buffer.py b/mmcv/mmcv/runner/log_buffer.py new file mode 100644 index 0000000..ed4652f --- /dev/null +++ b/mmcv/mmcv/runner/log_buffer.py @@ -0,0 +1,41 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from collections import OrderedDict + +import numpy as np + + +class LogBuffer: + + def __init__(self): + self.val_history = OrderedDict() + self.n_history = OrderedDict() + self.output = OrderedDict() + self.ready = False + + def clear(self): + self.val_history.clear() + self.n_history.clear() + self.clear_output() + + def clear_output(self): + self.output.clear() + self.ready = False + + def update(self, vars, count=1): + assert isinstance(vars, dict) + for key, var in vars.items(): + if key not in self.val_history: + self.val_history[key] = [] + self.n_history[key] = [] + self.val_history[key].append(var) + self.n_history[key].append(count) + + def average(self, n=0): + """Average latest n values or all values.""" + assert n >= 0 + for key in self.val_history: + values = np.array(self.val_history[key][-n:]) + nums = np.array(self.n_history[key][-n:]) + avg = np.sum(values * nums) / np.sum(nums) + self.output[key] = avg + self.ready = True diff --git a/mmcv/mmcv/runner/optimizer/__init__.py b/mmcv/mmcv/runner/optimizer/__init__.py new file mode 100644 index 0000000..faa0585 --- /dev/null +++ b/mmcv/mmcv/runner/optimizer/__init__.py @@ -0,0 +1,8 @@ +from .builder import (OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer, + build_optimizer_constructor) +from .default_constructor import DefaultOptimizerConstructor + +__all__ = [ + 'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor', + 'build_optimizer', 'build_optimizer_constructor' +] diff --git a/mmcv/mmcv/runner/optimizer/builder.py b/mmcv/mmcv/runner/optimizer/builder.py new file mode 100644 index 0000000..e97082a --- /dev/null +++ b/mmcv/mmcv/runner/optimizer/builder.py @@ -0,0 +1,43 @@ +import copy +import inspect + +import torch + +from ...utils import Registry, build_from_cfg + +OPTIMIZERS = Registry('optimizer') +OPTIMIZER_BUILDERS = Registry('optimizer builder') + + +def register_torch_optimizers(): + torch_optimizers = [] + for module_name in dir(torch.optim): + if module_name.startswith('__'): + continue + _optim = getattr(torch.optim, module_name) + if inspect.isclass(_optim) and issubclass(_optim, + torch.optim.Optimizer): + OPTIMIZERS.register_module()(_optim) + torch_optimizers.append(module_name) + return torch_optimizers + + +TORCH_OPTIMIZERS = register_torch_optimizers() + + +def build_optimizer_constructor(cfg): + return build_from_cfg(cfg, OPTIMIZER_BUILDERS) + + +def build_optimizer(model, cfg): + optimizer_cfg = copy.deepcopy(cfg) + constructor_type = optimizer_cfg.pop('constructor', + 'DefaultOptimizerConstructor') + paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) + optim_constructor = build_optimizer_constructor( + dict( + type=constructor_type, + optimizer_cfg=optimizer_cfg, + paramwise_cfg=paramwise_cfg)) + optimizer = optim_constructor(model) + return optimizer diff --git a/mmcv/mmcv/runner/optimizer/default_constructor.py b/mmcv/mmcv/runner/optimizer/default_constructor.py new file mode 100644 index 0000000..477bf07 --- /dev/null +++ b/mmcv/mmcv/runner/optimizer/default_constructor.py @@ -0,0 +1,248 @@ +import warnings + +import torch +from torch.nn import GroupNorm, LayerNorm + +from mmcv.utils import _BatchNorm, _InstanceNorm, build_from_cfg, is_list_of +from mmcv.utils.ext_loader import check_ops_exist +from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS + + +@OPTIMIZER_BUILDERS.register_module() +class DefaultOptimizerConstructor: + """Default constructor for optimizers. + + By default each parameter share the same optimizer settings, and we + provide an argument ``paramwise_cfg`` to specify parameter-wise settings. + It is a dict and may contain the following fields: + + - ``custom_keys`` (dict): Specified parameters-wise settings by keys. If + one of the keys in ``custom_keys`` is a substring of the name of one + parameter, then the setting of the parameter will be specified by + ``custom_keys[key]`` and other setting like ``bias_lr_mult`` etc. will + be ignored. It should be noted that the aforementioned ``key`` is the + longest key that is a substring of the name of the parameter. If there + are multiple matched keys with the same length, then the key with lower + alphabet order will be chosen. + ``custom_keys[key]`` should be a dict and may contain fields ``lr_mult`` + and ``decay_mult``. See Example 2 below. + - ``bias_lr_mult`` (float): It will be multiplied to the learning + rate for all bias parameters (except for those in normalization + layers and offset layers of DCN). + - ``bias_decay_mult`` (float): It will be multiplied to the weight + decay for all bias parameters (except for those in + normalization layers, depthwise conv layers, offset layers of DCN). + - ``norm_decay_mult`` (float): It will be multiplied to the weight + decay for all weight and bias parameters of normalization + layers. + - ``dwconv_decay_mult`` (float): It will be multiplied to the weight + decay for all weight and bias parameters of depthwise conv + layers. + - ``dcn_offset_lr_mult`` (float): It will be multiplied to the learning + rate for parameters of offset layer in the deformable convs + of a model. + - ``bypass_duplicate`` (bool): If true, the duplicate parameters + would not be added into optimizer. Default: False. + + Note: + 1. If the option ``dcn_offset_lr_mult`` is used, the constructor will + override the effect of ``bias_lr_mult`` in the bias of offset + layer. So be careful when using both ``bias_lr_mult`` and + ``dcn_offset_lr_mult``. If you wish to apply both of them to the + offset layer in deformable convs, set ``dcn_offset_lr_mult`` + to the original ``dcn_offset_lr_mult`` * ``bias_lr_mult``. + 2. If the option ``dcn_offset_lr_mult`` is used, the construtor will + apply it to all the DCN layers in the model. So be carefull when + the model contains multiple DCN layers in places other than + backbone. + + Args: + model (:obj:`nn.Module`): The model with parameters to be optimized. + optimizer_cfg (dict): The config dict of the optimizer. + Positional fields are + + - `type`: class name of the optimizer. + + Optional fields are + + - any arguments of the corresponding optimizer type, e.g., + lr, weight_decay, momentum, etc. + paramwise_cfg (dict, optional): Parameter-wise options. + + Example 1: + >>> model = torch.nn.modules.Conv1d(1, 1, 1) + >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9, + >>> weight_decay=0.0001) + >>> paramwise_cfg = dict(norm_decay_mult=0.) + >>> optim_builder = DefaultOptimizerConstructor( + >>> optimizer_cfg, paramwise_cfg) + >>> optimizer = optim_builder(model) + + Example 2: + >>> # assume model have attribute model.backbone and model.cls_head + >>> optimizer_cfg = dict(type='SGD', lr=0.01, weight_decay=0.95) + >>> paramwise_cfg = dict(custom_keys={ + '.backbone': dict(lr_mult=0.1, decay_mult=0.9)}) + >>> optim_builder = DefaultOptimizerConstructor( + >>> optimizer_cfg, paramwise_cfg) + >>> optimizer = optim_builder(model) + >>> # Then the `lr` and `weight_decay` for model.backbone is + >>> # (0.01 * 0.1, 0.95 * 0.9). `lr` and `weight_decay` for + >>> # model.cls_head is (0.01, 0.95). + """ + + def __init__(self, optimizer_cfg, paramwise_cfg=None): + if not isinstance(optimizer_cfg, dict): + raise TypeError('optimizer_cfg should be a dict', + f'but got {type(optimizer_cfg)}') + self.optimizer_cfg = optimizer_cfg + self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg + self.base_lr = optimizer_cfg.get('lr', None) + self.base_wd = optimizer_cfg.get('weight_decay', None) + self._validate_cfg() + + def _validate_cfg(self): + if not isinstance(self.paramwise_cfg, dict): + raise TypeError('paramwise_cfg should be None or a dict, ' + f'but got {type(self.paramwise_cfg)}') + + if 'custom_keys' in self.paramwise_cfg: + if not isinstance(self.paramwise_cfg['custom_keys'], dict): + raise TypeError( + 'If specified, custom_keys must be a dict, ' + f'but got {type(self.paramwise_cfg["custom_keys"])}') + if self.base_wd is None: + for key in self.paramwise_cfg['custom_keys']: + if 'decay_mult' in self.paramwise_cfg['custom_keys'][key]: + raise ValueError('base_wd should not be None') + + # get base lr and weight decay + # weight_decay must be explicitly specified if mult is specified + if ('bias_decay_mult' in self.paramwise_cfg + or 'norm_decay_mult' in self.paramwise_cfg + or 'dwconv_decay_mult' in self.paramwise_cfg): + if self.base_wd is None: + raise ValueError('base_wd should not be None') + + def _is_in(self, param_group, param_group_list): + assert is_list_of(param_group_list, dict) + param = set(param_group['params']) + param_set = set() + for group in param_group_list: + param_set.update(set(group['params'])) + + return not param.isdisjoint(param_set) + + def add_params(self, params, module, prefix='', is_dcn_module=None): + """Add all parameters of module to the params list. + + The parameters of the given module will be added to the list of param + groups, with specific rules defined by paramwise_cfg. + + Args: + params (list[dict]): A list of param groups, it will be modified + in place. + module (nn.Module): The module to be added. + prefix (str): The prefix of the module + is_dcn_module (int|float|None): If the current module is a + submodule of DCN, `is_dcn_module` will be passed to + control conv_offset layer's learning rate. Defaults to None. + """ + # get param-wise options + custom_keys = self.paramwise_cfg.get('custom_keys', {}) + # first sort with alphabet order and then sort with reversed len of str + sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True) + + bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.) + bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.) + norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.) + dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.) + bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False) + dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.) + + # special rules for norm layers and depth-wise conv layers + is_norm = isinstance(module, + (_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm)) + is_dwconv = ( + isinstance(module, torch.nn.Conv2d) + and module.in_channels == module.groups) + + for name, param in module.named_parameters(recurse=False): + param_group = {'params': [param]} + if not param.requires_grad: + params.append(param_group) + continue + if bypass_duplicate and self._is_in(param_group, params): + warnings.warn(f'{prefix} is duplicate. It is skipped since ' + f'bypass_duplicate={bypass_duplicate}') + continue + # if the parameter match one of the custom keys, ignore other rules + is_custom = False + for key in sorted_keys: + if key in f'{prefix}.{name}': + is_custom = True + lr_mult = custom_keys[key].get('lr_mult', 1.) + param_group['lr'] = self.base_lr * lr_mult + if self.base_wd is not None: + decay_mult = custom_keys[key].get('decay_mult', 1.) + param_group['weight_decay'] = self.base_wd * decay_mult + break + + if not is_custom: + # bias_lr_mult affects all bias parameters + # except for norm.bias dcn.conv_offset.bias + if name == 'bias' and not (is_norm or is_dcn_module): + param_group['lr'] = self.base_lr * bias_lr_mult + + if (prefix.find('conv_offset') != -1 and is_dcn_module + and isinstance(module, torch.nn.Conv2d)): + # deal with both dcn_offset's bias & weight + param_group['lr'] = self.base_lr * dcn_offset_lr_mult + + # apply weight decay policies + if self.base_wd is not None: + # norm decay + if is_norm: + param_group[ + 'weight_decay'] = self.base_wd * norm_decay_mult + # depth-wise conv + elif is_dwconv: + param_group[ + 'weight_decay'] = self.base_wd * dwconv_decay_mult + # bias lr and decay + elif name == 'bias' and not is_dcn_module: + # TODO: current bias_decay_mult will have affect on DCN + param_group[ + 'weight_decay'] = self.base_wd * bias_decay_mult + params.append(param_group) + + if check_ops_exist(): + from mmcv.ops import DeformConv2d, ModulatedDeformConv2d + is_dcn_module = isinstance(module, + (DeformConv2d, ModulatedDeformConv2d)) + else: + is_dcn_module = False + for child_name, child_mod in module.named_children(): + child_prefix = f'{prefix}.{child_name}' if prefix else child_name + self.add_params( + params, + child_mod, + prefix=child_prefix, + is_dcn_module=is_dcn_module) + + def __call__(self, model): + if hasattr(model, 'module'): + model = model.module + + optimizer_cfg = self.optimizer_cfg.copy() + # if no paramwise option is specified, just use the global setting + if not self.paramwise_cfg: + optimizer_cfg['params'] = model.parameters() + return build_from_cfg(optimizer_cfg, OPTIMIZERS) + + # set param-wise lr and weight decay recursively + params = [] + self.add_params(params, model) + optimizer_cfg['params'] = params + + return build_from_cfg(optimizer_cfg, OPTIMIZERS) diff --git a/mmcv/mmcv/runner/priority.py b/mmcv/mmcv/runner/priority.py new file mode 100644 index 0000000..b58c67e --- /dev/null +++ b/mmcv/mmcv/runner/priority.py @@ -0,0 +1,54 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from enum import Enum + + +class Priority(Enum): + """Hook priority levels. + + +------------+------------+ + | Level | Value | + +============+============+ + | HIGHEST | 0 | + +------------+------------+ + | VERY_HIGH | 10 | + +------------+------------+ + | HIGH | 30 | + +------------+------------+ + | NORMAL | 50 | + +------------+------------+ + | LOW | 70 | + +------------+------------+ + | VERY_LOW | 90 | + +------------+------------+ + | LOWEST | 100 | + +------------+------------+ + """ + + HIGHEST = 0 + VERY_HIGH = 10 + HIGH = 30 + NORMAL = 50 + LOW = 70 + VERY_LOW = 90 + LOWEST = 100 + + +def get_priority(priority): + """Get priority value. + + Args: + priority (int or str or :obj:`Priority`): Priority. + + Returns: + int: The priority value. + """ + if isinstance(priority, int): + if priority < 0 or priority > 100: + raise ValueError('priority must be between 0 and 100') + return priority + elif isinstance(priority, Priority): + return priority.value + elif isinstance(priority, str): + return Priority[priority.upper()].value + else: + raise TypeError('priority must be an integer or Priority enum value') diff --git a/mmcv/mmcv/runner/utils.py b/mmcv/mmcv/runner/utils.py new file mode 100644 index 0000000..168305f --- /dev/null +++ b/mmcv/mmcv/runner/utils.py @@ -0,0 +1,81 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import os +import random +import sys +import time +from getpass import getuser +from socket import gethostname + +import numpy as np +import torch + +import mmcv + + +def get_host_info(): + return f'{getuser()}@{gethostname()}' + + +def get_time_str(): + return time.strftime('%Y%m%d_%H%M%S', time.localtime()) + + +def obj_from_dict(info, parent=None, default_args=None): + """Initialize an object from dict. + + The dict must contain the key "type", which indicates the object type, it + can be either a string or type, such as "list" or ``list``. Remaining + fields are treated as the arguments for constructing the object. + + Args: + info (dict): Object types and arguments. + parent (:class:`module`): Module which may containing expected object + classes. + default_args (dict, optional): Default arguments for initializing the + object. + + Returns: + any type: Object built from the dict. + """ + assert isinstance(info, dict) and 'type' in info + assert isinstance(default_args, dict) or default_args is None + args = info.copy() + obj_type = args.pop('type') + if mmcv.is_str(obj_type): + if parent is not None: + obj_type = getattr(parent, obj_type) + else: + obj_type = sys.modules[obj_type] + elif not isinstance(obj_type, type): + raise TypeError('type must be a str or valid type, but ' + f'got {type(obj_type)}') + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + return obj_type(**args) + + +def set_random_seed(seed, deterministic=False, use_rank_shift=False): + """Set random seed. + + Args: + seed (int): Seed to be used. + deterministic (bool): Whether to set the deterministic option for + CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` + to True and `torch.backends.cudnn.benchmark` to False. + Default: False. + rank_shift (bool): Whether to add rank number to the random seed to + have different random seed in different threads. Default: False. + """ + if use_rank_shift: + rank, _ = mmcv.runner.get_dist_info() + seed += rank + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + if deterministic: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False diff --git a/mmcv/mmcv/utils/__init__.py b/mmcv/mmcv/utils/__init__.py new file mode 100644 index 0000000..bf8792f --- /dev/null +++ b/mmcv/mmcv/utils/__init__.py @@ -0,0 +1,52 @@ +# flake8: noqa +# Copyright (c) Open-MMLab. All rights reserved. +from .config import Config, ConfigDict, DictAction +from .misc import (check_prerequisites, concat_list, deprecated_api_warning, + import_modules_from_strings, is_list_of, is_seq_of, is_str, + is_tuple_of, iter_cast, list_cast, requires_executable, + requires_package, slice_list, tuple_cast) +from .path import (check_file_exist, fopen, is_filepath, mkdir_or_exist, + scandir, symlink) +from .progressbar import (ProgressBar, track_iter_progress, + track_parallel_progress, track_progress) +from .timer import Timer, TimerError, check_time +from .version_utils import digit_version, get_git_hash + +try: + import torch +except ImportError: + __all__ = [ + 'Config', 'ConfigDict', 'DictAction', 'is_str', 'iter_cast', + 'list_cast', 'tuple_cast', 'is_seq_of', 'is_list_of', 'is_tuple_of', + 'slice_list', 'concat_list', 'check_prerequisites', 'requires_package', + 'requires_executable', 'is_filepath', 'fopen', 'check_file_exist', + 'mkdir_or_exist', 'symlink', 'scandir', 'ProgressBar', + 'track_progress', 'track_iter_progress', 'track_parallel_progress', + 'Timer', 'TimerError', 'check_time', 'deprecated_api_warning', + 'digit_version', 'get_git_hash', 'import_modules_from_strings' + ] +else: + from .env import collect_env + from .logging import get_logger, print_log + from .parrots_wrapper import ( + CUDA_HOME, TORCH_VERSION, BuildExtension, CppExtension, CUDAExtension, + DataLoader, PoolDataLoader, SyncBatchNorm, _AdaptiveAvgPoolNd, + _AdaptiveMaxPoolNd, _AvgPoolNd, _BatchNorm, _ConvNd, + _ConvTransposeMixin, _InstanceNorm, _MaxPoolNd, get_build_config) + from .registry import Registry, build_from_cfg + __all__ = [ + 'Config', 'ConfigDict', 'DictAction', 'collect_env', 'get_logger', + 'print_log', 'is_str', 'iter_cast', 'list_cast', 'tuple_cast', + 'is_seq_of', 'is_list_of', 'is_tuple_of', 'slice_list', 'concat_list', + 'check_prerequisites', 'requires_package', 'requires_executable', + 'is_filepath', 'fopen', 'check_file_exist', 'mkdir_or_exist', + 'symlink', 'scandir', 'ProgressBar', 'track_progress', + 'track_iter_progress', 'track_parallel_progress', 'Registry', + 'build_from_cfg', 'Timer', 'TimerError', 'check_time', 'CUDA_HOME', + 'SyncBatchNorm', '_AdaptiveAvgPoolNd', '_AdaptiveMaxPoolNd', + '_AvgPoolNd', '_BatchNorm', '_ConvNd', '_ConvTransposeMixin', + '_InstanceNorm', '_MaxPoolNd', 'get_build_config', 'BuildExtension', + 'CppExtension', 'CUDAExtension', 'DataLoader', 'PoolDataLoader', + 'TORCH_VERSION', 'deprecated_api_warning', 'digit_version', + 'get_git_hash', 'import_modules_from_strings' + ] diff --git a/mmcv/mmcv/utils/config.py b/mmcv/mmcv/utils/config.py new file mode 100644 index 0000000..76739c3 --- /dev/null +++ b/mmcv/mmcv/utils/config.py @@ -0,0 +1,467 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import ast +import os.path as osp +import platform +import shutil +import sys +import tempfile +from argparse import Action, ArgumentParser +from collections import abc +from importlib import import_module + +from addict import Dict +from yapf.yapflib.yapf_api import FormatCode + +from .misc import import_modules_from_strings +from .path import check_file_exist + +if platform.system() == 'Windows': + import regex as re +else: + import re + +BASE_KEY = '_base_' +DELETE_KEY = '_delete_' +RESERVED_KEYS = ['filename', 'text', 'pretty_text'] + + +class ConfigDict(Dict): + + def __missing__(self, name): + raise KeyError(name) + + def __getattr__(self, name): + try: + value = super(ConfigDict, self).__getattr__(name) + except KeyError: + ex = AttributeError(f"'{self.__class__.__name__}' object has no " + f"attribute '{name}'") + except Exception as e: + ex = e + else: + return value + raise ex + + +def add_args(parser, cfg, prefix=''): + for k, v in cfg.items(): + if isinstance(v, str): + parser.add_argument('--' + prefix + k) + elif isinstance(v, int): + parser.add_argument('--' + prefix + k, type=int) + elif isinstance(v, float): + parser.add_argument('--' + prefix + k, type=float) + elif isinstance(v, bool): + parser.add_argument('--' + prefix + k, action='store_true') + elif isinstance(v, dict): + add_args(parser, v, prefix + k + '.') + elif isinstance(v, abc.Iterable): + parser.add_argument('--' + prefix + k, type=type(v[0]), nargs='+') + else: + print(f'cannot parse key {prefix + k} of type {type(v)}') + return parser + + +class Config: + """A facility for config and config files. + + It supports common file formats as configs: python/json/yaml. The interface + is the same as a dict object and also allows access config values as + attributes. + + Example: + >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) + >>> cfg.a + 1 + >>> cfg.b + {'b1': [0, 1]} + >>> cfg.b.b1 + [0, 1] + >>> cfg = Config.fromfile('tests/data/config/a.py') + >>> cfg.filename + "/home/kchen/projects/mmcv/tests/data/config/a.py" + >>> cfg.item4 + 'test' + >>> cfg + "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " + "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" + """ + + @staticmethod + def _validate_py_syntax(filename): + with open(filename, 'r') as f: + content = f.read() + try: + ast.parse(content) + except SyntaxError as e: + raise SyntaxError('There are syntax errors in config ' + f'file {filename}: {e}') + + @staticmethod + def _substitute_predefined_vars(filename, temp_config_name): + file_dirname = osp.dirname(filename) + file_basename = osp.basename(filename) + file_basename_no_extension = osp.splitext(file_basename)[0] + file_extname = osp.splitext(filename)[1] + support_templates = dict( + fileDirname=file_dirname, + fileBasename=file_basename, + fileBasenameNoExtension=file_basename_no_extension, + fileExtname=file_extname) + with open(filename, 'r') as f: + config_file = f.read() + for key, value in support_templates.items(): + regexp = r'\{\{\s*' + str(key) + r'\s*\}\}' + value = value.replace('\\', '/') + config_file = re.sub(regexp, value, config_file) + with open(temp_config_name, 'w') as tmp_config_file: + tmp_config_file.write(config_file) + + @staticmethod + def _file2dict(filename, use_predefined_variables=True): + filename = osp.abspath(osp.expanduser(filename)) + check_file_exist(filename) + fileExtname = osp.splitext(filename)[1] + if fileExtname not in ['.py', '.json', '.yaml', '.yml']: + raise IOError('Only py/yml/yaml/json type are supported now!') + + with tempfile.TemporaryDirectory() as temp_config_dir: + temp_config_file = tempfile.NamedTemporaryFile( + dir=temp_config_dir, suffix=fileExtname) + if platform.system() == 'Windows': + temp_config_file.close() + temp_config_name = osp.basename(temp_config_file.name) + # Substitute predefined variables + if use_predefined_variables: + Config._substitute_predefined_vars(filename, + temp_config_file.name) + else: + shutil.copyfile(filename, temp_config_file.name) + + if filename.endswith('.py'): + temp_module_name = osp.splitext(temp_config_name)[0] + sys.path.insert(0, temp_config_dir) + Config._validate_py_syntax(filename) + mod = import_module(temp_module_name) + sys.path.pop(0) + cfg_dict = { + name: value + for name, value in mod.__dict__.items() + if not name.startswith('__') + } + # delete imported module + del sys.modules[temp_module_name] + elif filename.endswith(('.yml', '.yaml', '.json')): + import mmcv + cfg_dict = mmcv.load(temp_config_file.name) + # close temp file + temp_config_file.close() + + cfg_text = filename + '\n' + with open(filename, 'r') as f: + cfg_text += f.read() + + if BASE_KEY in cfg_dict: + cfg_dir = osp.dirname(filename) + base_filename = cfg_dict.pop(BASE_KEY) + base_filename = base_filename if isinstance( + base_filename, list) else [base_filename] + + cfg_dict_list = list() + cfg_text_list = list() + for f in base_filename: + _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) + cfg_dict_list.append(_cfg_dict) + cfg_text_list.append(_cfg_text) + + base_cfg_dict = dict() + for c in cfg_dict_list: + if len(base_cfg_dict.keys() & c.keys()) > 0: + raise KeyError('Duplicate key is not allowed among bases') + base_cfg_dict.update(c) + + base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) + cfg_dict = base_cfg_dict + + # merge cfg_text + cfg_text_list.append(cfg_text) + cfg_text = '\n'.join(cfg_text_list) + + return cfg_dict, cfg_text + + @staticmethod + def _merge_a_into_b(a, b): + # merge dict `a` into dict `b` (non-inplace). values in `a` will + # overwrite `b`. + # copy first to avoid inplace modification + b = b.copy() + for k, v in a.items(): + if isinstance(v, dict) and k in b and not v.pop(DELETE_KEY, False): + if not isinstance(b[k], dict): + raise TypeError( + f'{k}={v} in child config cannot inherit from base ' + f'because {k} is a dict in the child config but is of ' + f'type {type(b[k])} in base config. You may set ' + f'`{DELETE_KEY}=True` to ignore the base config') + b[k] = Config._merge_a_into_b(v, b[k]) + else: + b[k] = v + return b + + @staticmethod + def fromfile(filename, + use_predefined_variables=True, + import_custom_modules=True): + cfg_dict, cfg_text = Config._file2dict(filename, + use_predefined_variables) + if import_custom_modules and cfg_dict.get('custom_imports', None): + import_modules_from_strings(**cfg_dict['custom_imports']) + return Config(cfg_dict, cfg_text=cfg_text, filename=filename) + + @staticmethod + def auto_argparser(description=None): + """Generate argparser from config file automatically (experimental)""" + partial_parser = ArgumentParser(description=description) + partial_parser.add_argument('config', help='config file path') + cfg_file = partial_parser.parse_known_args()[0].config + cfg = Config.fromfile(cfg_file) + parser = ArgumentParser(description=description) + parser.add_argument('config', help='config file path') + add_args(parser, cfg) + return parser, cfg + + def __init__(self, cfg_dict=None, cfg_text=None, filename=None): + if cfg_dict is None: + cfg_dict = dict() + elif not isinstance(cfg_dict, dict): + raise TypeError('cfg_dict must be a dict, but ' + f'got {type(cfg_dict)}') + for key in cfg_dict: + if key in RESERVED_KEYS: + raise KeyError(f'{key} is reserved for config file') + + super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict)) + super(Config, self).__setattr__('_filename', filename) + if cfg_text: + text = cfg_text + elif filename: + with open(filename, 'r') as f: + text = f.read() + else: + text = '' + super(Config, self).__setattr__('_text', text) + + @property + def filename(self): + return self._filename + + @property + def text(self): + return self._text + + @property + def pretty_text(self): + + indent = 4 + + def _indent(s_, num_spaces): + s = s_.split('\n') + if len(s) == 1: + return s_ + first = s.pop(0) + s = [(num_spaces * ' ') + line for line in s] + s = '\n'.join(s) + s = first + '\n' + s + return s + + def _format_basic_types(k, v, use_mapping=False): + if isinstance(v, str): + v_str = f"'{v}'" + else: + v_str = str(v) + + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: {v_str}' + else: + attr_str = f'{str(k)}={v_str}' + attr_str = _indent(attr_str, indent) + + return attr_str + + def _format_list(k, v, use_mapping=False): + # check if all items in the list are dict + if all(isinstance(_, dict) for _ in v): + v_str = '[\n' + v_str += '\n'.join( + f'dict({_indent(_format_dict(v_), indent)}),' + for v_ in v).rstrip(',') + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: {v_str}' + else: + attr_str = f'{str(k)}={v_str}' + attr_str = _indent(attr_str, indent) + ']' + else: + attr_str = _format_basic_types(k, v, use_mapping) + return attr_str + + def _contain_invalid_identifier(dict_str): + contain_invalid_identifier = False + for key_name in dict_str: + contain_invalid_identifier |= \ + (not str(key_name).isidentifier()) + return contain_invalid_identifier + + def _format_dict(input_dict, outest_level=False): + r = '' + s = [] + + use_mapping = _contain_invalid_identifier(input_dict) + if use_mapping: + r += '{' + for idx, (k, v) in enumerate(input_dict.items()): + is_last = idx >= len(input_dict) - 1 + end = '' if outest_level or is_last else ',' + if isinstance(v, dict): + v_str = '\n' + _format_dict(v) + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: dict({v_str}' + else: + attr_str = f'{str(k)}=dict({v_str}' + attr_str = _indent(attr_str, indent) + ')' + end + elif isinstance(v, list): + attr_str = _format_list(k, v, use_mapping) + end + else: + attr_str = _format_basic_types(k, v, use_mapping) + end + + s.append(attr_str) + r += '\n'.join(s) + if use_mapping: + r += '}' + return r + + cfg_dict = self._cfg_dict.to_dict() + text = _format_dict(cfg_dict, outest_level=True) + # copied from setup.cfg + yapf_style = dict( + based_on_style='pep8', + blank_line_before_nested_class_or_def=True, + split_before_expression_after_opening_paren=True) + text, _ = FormatCode(text, style_config=yapf_style, verify=True) + + return text + + def __repr__(self): + return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}' + + def __len__(self): + return len(self._cfg_dict) + + def __getattr__(self, name): + return getattr(self._cfg_dict, name) + + def __getitem__(self, name): + return self._cfg_dict.__getitem__(name) + + def __setattr__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setattr__(name, value) + + def __setitem__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setitem__(name, value) + + def __iter__(self): + return iter(self._cfg_dict) + + def __getstate__(self): + return (self._cfg_dict, self._filename, self._text) + + def __setstate__(self, state): + _cfg_dict, _filename, _text = state + super(Config, self).__setattr__('_cfg_dict', _cfg_dict) + super(Config, self).__setattr__('_filename', _filename) + super(Config, self).__setattr__('_text', _text) + + def dump(self, file=None): + cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict() + if self.filename.endswith('.py'): + if file is None: + return self.pretty_text + else: + with open(file, 'w') as f: + f.write(self.pretty_text) + else: + import mmcv + if file is None: + file_format = self.filename.split('.')[-1] + return mmcv.dump(cfg_dict, file_format=file_format) + else: + mmcv.dump(cfg_dict, file) + + def merge_from_dict(self, options): + """Merge list into cfg_dict. + + Merge the dict parsed by MultipleKVAction into this cfg. + + Examples: + >>> options = {'model.backbone.depth': 50, + ... 'model.backbone.with_cp':True} + >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet')))) + >>> cfg.merge_from_dict(options) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict( + ... model=dict(backbone=dict(depth=50, with_cp=True))) + + Args: + options (dict): dict of configs to merge from. + """ + option_cfg_dict = {} + for full_key, v in options.items(): + d = option_cfg_dict + key_list = full_key.split('.') + for subkey in key_list[:-1]: + d.setdefault(subkey, ConfigDict()) + d = d[subkey] + subkey = key_list[-1] + d[subkey] = v + + cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + super(Config, self).__setattr__( + '_cfg_dict', Config._merge_a_into_b(option_cfg_dict, cfg_dict)) + + +class DictAction(Action): + """ + argparse action to split an argument into KEY=VALUE form + on the first = and append to a dictionary. List options should + be passed as comma separated values, i.e KEY=V1,V2,V3 + """ + + @staticmethod + def _parse_int_float_bool(val): + try: + return int(val) + except ValueError: + pass + try: + return float(val) + except ValueError: + pass + if val.lower() in ['true', 'false']: + return True if val.lower() == 'true' else False + return val + + def __call__(self, parser, namespace, values, option_string=None): + options = {} + for kv in values: + key, val = kv.split('=', maxsplit=1) + val = [self._parse_int_float_bool(v) for v in val.split(',')] + if len(val) == 1: + val = val[0] + options[key] = val + setattr(namespace, self.dest, options) diff --git a/mmcv/mmcv/utils/env.py b/mmcv/mmcv/utils/env.py new file mode 100644 index 0000000..2cea9c7 --- /dev/null +++ b/mmcv/mmcv/utils/env.py @@ -0,0 +1,93 @@ +"""This file holding some environment constant for sharing by other files.""" + +import os.path as osp +import subprocess +import sys +from collections import defaultdict + +import cv2 +import torch + +import mmcv +from .parrots_wrapper import get_build_config + + +def collect_env(): + """Collect the information of the running environments. + + Returns: + dict: The environment information. The following fields are contained. + + - sys.platform: The variable of ``sys.platform``. + - Python: Python version. + - CUDA available: Bool, indicating if CUDA is available. + - GPU devices: Device type of each GPU. + - CUDA_HOME (optional): The env var ``CUDA_HOME``. + - NVCC (optional): NVCC version. + - GCC: GCC version, "n/a" if GCC is not installed. + - PyTorch: PyTorch version. + - PyTorch compiling details: The output of \ + ``torch.__config__.show()``. + - TorchVision (optional): TorchVision version. + - OpenCV: OpenCV version. + - MMCV: MMCV version. + - MMCV Compiler: The GCC version for compiling MMCV ops. + - MMCV CUDA Compiler: The CUDA version for compiling MMCV ops. + """ + env_info = {} + env_info['sys.platform'] = sys.platform + env_info['Python'] = sys.version.replace('\n', '') + + cuda_available = torch.cuda.is_available() + env_info['CUDA available'] = cuda_available + + if cuda_available: + devices = defaultdict(list) + for k in range(torch.cuda.device_count()): + devices[torch.cuda.get_device_name(k)].append(str(k)) + for name, device_ids in devices.items(): + env_info['GPU ' + ','.join(device_ids)] = name + + from torch.utils.cpp_extension import CUDA_HOME + env_info['CUDA_HOME'] = CUDA_HOME + + if CUDA_HOME is not None and osp.isdir(CUDA_HOME): + try: + nvcc = osp.join(CUDA_HOME, 'bin/nvcc') + nvcc = subprocess.check_output( + f'"{nvcc}" -V | tail -n1', shell=True) + nvcc = nvcc.decode('utf-8').strip() + except subprocess.SubprocessError: + nvcc = 'Not Available' + env_info['NVCC'] = nvcc + + try: + gcc = subprocess.check_output('gcc --version | head -n1', shell=True) + gcc = gcc.decode('utf-8').strip() + env_info['GCC'] = gcc + except subprocess.CalledProcessError: # gcc is unavailable + env_info['GCC'] = 'n/a' + + env_info['PyTorch'] = torch.__version__ + env_info['PyTorch compiling details'] = get_build_config() + + try: + import torchvision + env_info['TorchVision'] = torchvision.__version__ + except ModuleNotFoundError: + pass + + env_info['OpenCV'] = cv2.__version__ + + env_info['MMCV'] = mmcv.__version__ + + try: + from mmcv.ops import get_compiler_version, get_compiling_cuda_version + except ModuleNotFoundError: + env_info['MMCV Compiler'] = 'n/a' + env_info['MMCV CUDA Compiler'] = 'n/a' + else: + env_info['MMCV Compiler'] = get_compiler_version() + env_info['MMCV CUDA Compiler'] = get_compiling_cuda_version() + + return env_info diff --git a/mmcv/mmcv/utils/ext_loader.py b/mmcv/mmcv/utils/ext_loader.py new file mode 100644 index 0000000..e56651c --- /dev/null +++ b/mmcv/mmcv/utils/ext_loader.py @@ -0,0 +1,33 @@ +import importlib +import os +import pkgutil +from collections import namedtuple + +import torch + +if torch.__version__ != 'parrots': + + def load_ext(name, funcs): + ext = importlib.import_module('mmcv.' + name) + for fun in funcs: + assert hasattr(ext, fun), f'{fun} miss in module {name}' + return ext +else: + from parrots import extension + + def load_ext(name, funcs): + ExtModule = namedtuple('ExtModule', funcs) + ext_list = [] + lib_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + for fun in funcs: + if fun in ['nms', 'softnms']: + ext_list.append(extension.load(fun, name, lib_dir=lib_root).op) + else: + ext_list.append( + extension.load(fun, name, lib_dir=lib_root).op_) + return ExtModule(*ext_list) + + +def check_ops_exist(): + ext_loader = pkgutil.find_loader('mmcv._ext') + return ext_loader is not None diff --git a/mmcv/mmcv/utils/logging.py b/mmcv/mmcv/utils/logging.py new file mode 100644 index 0000000..b7de773 --- /dev/null +++ b/mmcv/mmcv/utils/logging.py @@ -0,0 +1,93 @@ +import logging + +import torch.distributed as dist + +logger_initialized = {} + + +def get_logger(name, log_file=None, log_level=logging.INFO): + """Initialize and get a logger by name. + + If the logger has not been initialized, this method will initialize the + logger by adding one or two handlers, otherwise the initialized logger will + be directly returned. During initialization, a StreamHandler will always be + added. If `log_file` is specified and the process rank is 0, a FileHandler + will also be added. + + Args: + name (str): Logger name. + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the logger. + log_level (int): The logger level. Note that only the process of + rank 0 is affected, and other processes will set the level to + "Error" thus be silent most of the time. + + Returns: + logging.Logger: The expected logger. + """ + logger = logging.getLogger(name) + if name in logger_initialized: + return logger + # handle hierarchical names + # e.g., logger "a" is initialized, then logger "a.b" will skip the + # initialization since it is a child of "a". + for logger_name in logger_initialized: + if name.startswith(logger_name): + return logger + + stream_handler = logging.StreamHandler() + handlers = [stream_handler] + + if dist.is_available() and dist.is_initialized(): + rank = dist.get_rank() + else: + rank = 0 + + # only rank 0 will add a FileHandler + if rank == 0 and log_file is not None: + file_handler = logging.FileHandler(log_file, 'w') + handlers.append(file_handler) + + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + for handler in handlers: + handler.setFormatter(formatter) + handler.setLevel(log_level) + logger.addHandler(handler) + + if rank == 0: + logger.setLevel(log_level) + else: + logger.setLevel(logging.ERROR) + + logger_initialized[name] = True + + return logger + + +def print_log(msg, logger=None, level=logging.INFO): + """Print a log message. + + Args: + msg (str): The message to be logged. + logger (logging.Logger | str | None): The logger to be used. + Some special loggers are: + - "silent": no message will be printed. + - other str: the logger obtained with `get_root_logger(logger)`. + - None: The `print()` method will be used to print log messages. + level (int): Logging level. Only available when `logger` is a Logger + object or "root". + """ + if logger is None: + print(msg) + elif isinstance(logger, logging.Logger): + logger.log(level, msg) + elif logger == 'silent': + pass + elif isinstance(logger, str): + _logger = get_logger(logger) + _logger.log(level, msg) + else: + raise TypeError( + 'logger should be either a logging.Logger object, str, ' + f'"silent" or None, but got {type(logger)}') diff --git a/mmcv/mmcv/utils/misc.py b/mmcv/mmcv/utils/misc.py new file mode 100644 index 0000000..da70738 --- /dev/null +++ b/mmcv/mmcv/utils/misc.py @@ -0,0 +1,315 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import functools +import itertools +import subprocess +import warnings +from collections import abc +from importlib import import_module +from inspect import getfullargspec + + +def is_str(x): + """Whether the input is an string instance. + + Note: This method is deprecated since python 2 is no longer supported. + """ + return isinstance(x, str) + + +def import_modules_from_strings(imports, allow_failed_imports=False): + """Import modules from the given list of strings. + + Args: + imports (list | str | None): The given module names to be imported. + allow_failed_imports (bool): If True, the failed imports will return + None. Otherwise, an ImportError is raise. Default: False. + + Returns: + list[module] | module | None: The imported modules. + + Examples: + >>> osp, sys = import_modules_from_strings( + ... ['os.path', 'sys']) + >>> import os.path as osp_ + >>> import sys as sys_ + >>> assert osp == osp_ + >>> assert sys == sys_ + """ + if not imports: + return + single_import = False + if isinstance(imports, str): + single_import = True + imports = [imports] + if not isinstance(imports, list): + raise TypeError( + f'custom_imports must be a list but got type {type(imports)}') + imported = [] + for imp in imports: + if not isinstance(imp, str): + raise TypeError( + f'{imp} is of type {type(imp)} and cannot be imported.') + try: + imported_tmp = import_module(imp) + except ImportError: + if allow_failed_imports: + warnings.warn(f'{imp} failed to import and is ignored.', + UserWarning) + imported_tmp = None + else: + raise ImportError + imported.append(imported_tmp) + if single_import: + imported = imported[0] + return imported + + +def iter_cast(inputs, dst_type, return_type=None): + """Cast elements of an iterable object into some type. + + Args: + inputs (Iterable): The input object. + dst_type (type): Destination type. + return_type (type, optional): If specified, the output object will be + converted to this type, otherwise an iterator. + + Returns: + iterator or specified type: The converted object. + """ + if not isinstance(inputs, abc.Iterable): + raise TypeError('inputs must be an iterable object') + if not isinstance(dst_type, type): + raise TypeError('"dst_type" must be a valid type') + + out_iterable = map(dst_type, inputs) + + if return_type is None: + return out_iterable + else: + return return_type(out_iterable) + + +def list_cast(inputs, dst_type): + """Cast elements of an iterable object into a list of some type. + + A partial method of :func:`iter_cast`. + """ + return iter_cast(inputs, dst_type, return_type=list) + + +def tuple_cast(inputs, dst_type): + """Cast elements of an iterable object into a tuple of some type. + + A partial method of :func:`iter_cast`. + """ + return iter_cast(inputs, dst_type, return_type=tuple) + + +def is_seq_of(seq, expected_type, seq_type=None): + """Check whether it is a sequence of some type. + + Args: + seq (Sequence): The sequence to be checked. + expected_type (type): Expected type of sequence items. + seq_type (type, optional): Expected sequence type. + + Returns: + bool: Whether the sequence is valid. + """ + if seq_type is None: + exp_seq_type = abc.Sequence + else: + assert isinstance(seq_type, type) + exp_seq_type = seq_type + if not isinstance(seq, exp_seq_type): + return False + for item in seq: + if not isinstance(item, expected_type): + return False + return True + + +def is_list_of(seq, expected_type): + """Check whether it is a list of some type. + + A partial method of :func:`is_seq_of`. + """ + return is_seq_of(seq, expected_type, seq_type=list) + + +def is_tuple_of(seq, expected_type): + """Check whether it is a tuple of some type. + + A partial method of :func:`is_seq_of`. + """ + return is_seq_of(seq, expected_type, seq_type=tuple) + + +def slice_list(in_list, lens): + """Slice a list into several sub lists by a list of given length. + + Args: + in_list (list): The list to be sliced. + lens(int or list): The expected length of each out list. + + Returns: + list: A list of sliced list. + """ + if isinstance(lens, int): + assert len(in_list) % lens == 0 + lens = [lens] * int(len(in_list) / lens) + if not isinstance(lens, list): + raise TypeError('"indices" must be an integer or a list of integers') + elif sum(lens) != len(in_list): + raise ValueError('sum of lens and list length does not ' + f'match: {sum(lens)} != {len(in_list)}') + out_list = [] + idx = 0 + for i in range(len(lens)): + out_list.append(in_list[idx:idx + lens[i]]) + idx += lens[i] + return out_list + + +def concat_list(in_list): + """Concatenate a list of list into a single list. + + Args: + in_list (list): The list of list to be merged. + + Returns: + list: The concatenated flat list. + """ + return list(itertools.chain(*in_list)) + + +def check_prerequisites( + prerequisites, + checker, + msg_tmpl='Prerequisites "{}" are required in method "{}" but not ' + 'found, please install them first.'): # yapf: disable + """A decorator factory to check if prerequisites are satisfied. + + Args: + prerequisites (str of list[str]): Prerequisites to be checked. + checker (callable): The checker method that returns True if a + prerequisite is meet, False otherwise. + msg_tmpl (str): The message template with two variables. + + Returns: + decorator: A specific decorator. + """ + + def wrap(func): + + @functools.wraps(func) + def wrapped_func(*args, **kwargs): + requirements = [prerequisites] if isinstance( + prerequisites, str) else prerequisites + missing = [] + for item in requirements: + if not checker(item): + missing.append(item) + if missing: + print(msg_tmpl.format(', '.join(missing), func.__name__)) + raise RuntimeError('Prerequisites not meet.') + else: + return func(*args, **kwargs) + + return wrapped_func + + return wrap + + +def _check_py_package(package): + try: + import_module(package) + except ImportError: + return False + else: + return True + + +def _check_executable(cmd): + if subprocess.call(f'which {cmd}', shell=True) != 0: + return False + else: + return True + + +def requires_package(prerequisites): + """A decorator to check if some python packages are installed. + + Example: + >>> @requires_package('numpy') + >>> func(arg1, args): + >>> return numpy.zeros(1) + array([0.]) + >>> @requires_package(['numpy', 'non_package']) + >>> func(arg1, args): + >>> return numpy.zeros(1) + ImportError + """ + return check_prerequisites(prerequisites, checker=_check_py_package) + + +def requires_executable(prerequisites): + """A decorator to check if some executable files are installed. + + Example: + >>> @requires_executable('ffmpeg') + >>> func(arg1, args): + >>> print(1) + 1 + """ + return check_prerequisites(prerequisites, checker=_check_executable) + + +def deprecated_api_warning(name_dict, cls_name=None): + """A decorator to check if some argments are deprecate and try to replace + deprecate src_arg_name to dst_arg_name. + + Args: + name_dict(dict): + key (str): Deprecate argument names. + val (str): Expected argument names. + + Returns: + func: New function. + """ + + def api_warning_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get name of the function + func_name = old_func.__name__ + if cls_name is not None: + func_name = f'{cls_name}.{func_name}' + if args: + arg_names = args_info.args[:len(args)] + for src_arg_name, dst_arg_name in name_dict.items(): + if src_arg_name in arg_names: + warnings.warn( + f'"{src_arg_name}" is deprecated in ' + f'`{func_name}`, please use "{dst_arg_name}" ' + 'instead') + arg_names[arg_names.index(src_arg_name)] = dst_arg_name + if kwargs: + for src_arg_name, dst_arg_name in name_dict.items(): + if src_arg_name in kwargs: + warnings.warn( + f'"{src_arg_name}" is deprecated in ' + f'`{func_name}`, please use "{dst_arg_name}" ' + 'instead') + kwargs[dst_arg_name] = kwargs.pop(src_arg_name) + + # apply converted arguments to the decorated method + output = old_func(*args, **kwargs) + return output + + return new_func + + return api_warning_wrapper diff --git a/mmcv/mmcv/utils/parrots_wrapper.py b/mmcv/mmcv/utils/parrots_wrapper.py new file mode 100644 index 0000000..25761be --- /dev/null +++ b/mmcv/mmcv/utils/parrots_wrapper.py @@ -0,0 +1,95 @@ +from functools import partial + +import torch + +TORCH_VERSION = torch.__version__ + + +def _get_cuda_home(): + if TORCH_VERSION == 'parrots': + from parrots.utils.build_extension import CUDA_HOME + else: + from torch.utils.cpp_extension import CUDA_HOME + return CUDA_HOME + + +def get_build_config(): + if TORCH_VERSION == 'parrots': + from parrots.config import get_build_info + return get_build_info() + else: + return torch.__config__.show() + + +def _get_conv(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.conv import _ConvNd, _ConvTransposeMixin + else: + from torch.nn.modules.conv import _ConvNd, _ConvTransposeMixin + return _ConvNd, _ConvTransposeMixin + + +def _get_dataloader(): + if TORCH_VERSION == 'parrots': + from torch.utils.data import DataLoader, PoolDataLoader + else: + from torch.utils.data import DataLoader + PoolDataLoader = DataLoader + return DataLoader, PoolDataLoader + + +def _get_extension(): + if TORCH_VERSION == 'parrots': + from parrots.utils.build_extension import BuildExtension, Extension + CppExtension = partial(Extension, cuda=False) + CUDAExtension = partial(Extension, cuda=True) + else: + from torch.utils.cpp_extension import (BuildExtension, CppExtension, + CUDAExtension) + return BuildExtension, CppExtension, CUDAExtension + + +def _get_pool(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.pool import (_AdaptiveAvgPoolNd, + _AdaptiveMaxPoolNd, _AvgPoolNd, + _MaxPoolNd) + else: + from torch.nn.modules.pooling import (_AdaptiveAvgPoolNd, + _AdaptiveMaxPoolNd, _AvgPoolNd, + _MaxPoolNd) + return _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd + + +def _get_norm(): + if TORCH_VERSION == 'parrots': + from parrots.nn.modules.batchnorm import _BatchNorm, _InstanceNorm + SyncBatchNorm_ = torch.nn.SyncBatchNorm2d + else: + from torch.nn.modules.instancenorm import _InstanceNorm + from torch.nn.modules.batchnorm import _BatchNorm + SyncBatchNorm_ = torch.nn.SyncBatchNorm + return _BatchNorm, _InstanceNorm, SyncBatchNorm_ + + +CUDA_HOME = _get_cuda_home() +_ConvNd, _ConvTransposeMixin = _get_conv() +DataLoader, PoolDataLoader = _get_dataloader() +BuildExtension, CppExtension, CUDAExtension = _get_extension() +_BatchNorm, _InstanceNorm, SyncBatchNorm_ = _get_norm() +_AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd = _get_pool() + + +class SyncBatchNorm(SyncBatchNorm_): + + def _specify_ddp_gpu_num(self, gpu_size): + if TORCH_VERSION != 'parrots': + super()._specify_ddp_gpu_num(gpu_size) + + def _check_input_dim(self, input): + if TORCH_VERSION == 'parrots': + if input.dim() < 2: + raise ValueError( + f'expected at least 2D input (got {input.dim()}D input)') + else: + super()._check_input_dim(input) diff --git a/mmcv/mmcv/utils/path.py b/mmcv/mmcv/utils/path.py new file mode 100644 index 0000000..aed078f --- /dev/null +++ b/mmcv/mmcv/utils/path.py @@ -0,0 +1,98 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import os +import os.path as osp +from pathlib import Path + +from .misc import is_str + + +def is_filepath(x): + return is_str(x) or isinstance(x, Path) + + +def fopen(filepath, *args, **kwargs): + if is_str(filepath): + return open(filepath, *args, **kwargs) + elif isinstance(filepath, Path): + return filepath.open(*args, **kwargs) + raise ValueError('`filepath` should be a string or a Path') + + +def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): + if not osp.isfile(filename): + raise FileNotFoundError(msg_tmpl.format(filename)) + + +def mkdir_or_exist(dir_name, mode=0o777): + if dir_name == '': + return + dir_name = osp.expanduser(dir_name) + os.makedirs(dir_name, mode=mode, exist_ok=True) + + +def symlink(src, dst, overwrite=True, **kwargs): + if os.path.lexists(dst) and overwrite: + os.remove(dst) + os.symlink(src, dst, **kwargs) + + +def scandir(dir_path, suffix=None, recursive=False): + """Scan a directory to find the interested files. + + Args: + dir_path (str | obj:`Path`): Path of the directory. + suffix (str | tuple(str), optional): File suffix that we are + interested in. Default: None. + recursive (bool, optional): If set to True, recursively scan the + directory. Default: False. + + Returns: + A generator for all the interested files with relative pathes. + """ + if isinstance(dir_path, (str, Path)): + dir_path = str(dir_path) + else: + raise TypeError('"dir_path" must be a string or Path object') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('"suffix" must be a string or tuple of strings') + + root = dir_path + + def _scandir(dir_path, suffix, recursive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + if suffix is None: + yield rel_path + elif rel_path.endswith(suffix): + yield rel_path + else: + if recursive: + yield from _scandir( + entry.path, suffix=suffix, recursive=recursive) + else: + continue + + return _scandir(dir_path, suffix=suffix, recursive=recursive) + + +def find_vcs_root(path, markers=('.git', )): + """Finds the root directory (including itself) of specified markers. + + Args: + path (str): Path of directory or file. + markers (list[str], optional): List of file or directory names. + + Returns: + The directory contained one of the markers or None if not found. + """ + if osp.isfile(path): + path = osp.dirname(path) + + prev, cur = None, osp.abspath(osp.expanduser(path)) + while cur != prev: + if any(osp.exists(osp.join(cur, marker)) for marker in markers): + return cur + prev, cur = cur, osp.split(cur)[0] + return None diff --git a/mmcv/mmcv/utils/progressbar.py b/mmcv/mmcv/utils/progressbar.py new file mode 100644 index 0000000..f204409 --- /dev/null +++ b/mmcv/mmcv/utils/progressbar.py @@ -0,0 +1,208 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import sys +from collections.abc import Iterable +from multiprocessing import Pool +from shutil import get_terminal_size + +from .timer import Timer + + +class ProgressBar: + """A progress bar which can print the progress.""" + + def __init__(self, task_num=0, bar_width=50, start=True, file=sys.stdout): + self.task_num = task_num + self.bar_width = bar_width + self.completed = 0 + self.file = file + if start: + self.start() + + @property + def terminal_width(self): + width, _ = get_terminal_size() + return width + + def start(self): + if self.task_num > 0: + self.file.write(f'[{" " * self.bar_width}] 0/{self.task_num}, ' + 'elapsed: 0s, ETA:') + else: + self.file.write('completed: 0, elapsed: 0s') + self.file.flush() + self.timer = Timer() + + def update(self, num_tasks=1): + assert num_tasks > 0 + self.completed += num_tasks + elapsed = self.timer.since_start() + if elapsed > 0: + fps = self.completed / elapsed + else: + fps = float('inf') + if self.task_num > 0: + percentage = self.completed / float(self.task_num) + eta = int(elapsed * (1 - percentage) / percentage + 0.5) + msg = f'\r[{{}}] {self.completed}/{self.task_num}, ' \ + f'{fps:.1f} task/s, elapsed: {int(elapsed + 0.5)}s, ' \ + f'ETA: {eta:5}s' + + bar_width = min(self.bar_width, + int(self.terminal_width - len(msg)) + 2, + int(self.terminal_width * 0.6)) + bar_width = max(2, bar_width) + mark_width = int(bar_width * percentage) + bar_chars = '>' * mark_width + ' ' * (bar_width - mark_width) + self.file.write(msg.format(bar_chars)) + else: + self.file.write( + f'completed: {self.completed}, elapsed: {int(elapsed + 0.5)}s,' + f' {fps:.1f} tasks/s') + self.file.flush() + + +def track_progress(func, tasks, bar_width=50, file=sys.stdout, **kwargs): + """Track the progress of tasks execution with a progress bar. + + Tasks are done with a simple for-loop. + + Args: + func (callable): The function to be applied to each task. + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + bar_width (int): Width of progress bar. + + Returns: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError( + '"tasks" must be an iterable object or a (iterator, int) tuple') + prog_bar = ProgressBar(task_num, bar_width, file=file) + results = [] + for task in tasks: + results.append(func(task, **kwargs)) + prog_bar.update() + prog_bar.file.write('\n') + return results + + +def init_pool(process_num, initializer=None, initargs=None): + if initializer is None: + return Pool(process_num) + elif initargs is None: + return Pool(process_num, initializer) + else: + if not isinstance(initargs, tuple): + raise TypeError('"initargs" must be a tuple') + return Pool(process_num, initializer, initargs) + + +def track_parallel_progress(func, + tasks, + nproc, + initializer=None, + initargs=None, + bar_width=50, + chunksize=1, + skip_first=False, + keep_order=True, + file=sys.stdout): + """Track the progress of parallel task execution with a progress bar. + + The built-in :mod:`multiprocessing` module is used for process pools and + tasks are done with :func:`Pool.map` or :func:`Pool.imap_unordered`. + + Args: + func (callable): The function to be applied to each task. + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + nproc (int): Process (worker) number. + initializer (None or callable): Refer to :class:`multiprocessing.Pool` + for details. + initargs (None or tuple): Refer to :class:`multiprocessing.Pool` for + details. + chunksize (int): Refer to :class:`multiprocessing.Pool` for details. + bar_width (int): Width of progress bar. + skip_first (bool): Whether to skip the first sample for each worker + when estimating fps, since the initialization step may takes + longer. + keep_order (bool): If True, :func:`Pool.imap` is used, otherwise + :func:`Pool.imap_unordered` is used. + + Returns: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError( + '"tasks" must be an iterable object or a (iterator, int) tuple') + pool = init_pool(nproc, initializer, initargs) + start = not skip_first + task_num -= nproc * chunksize * int(skip_first) + prog_bar = ProgressBar(task_num, bar_width, start, file=file) + results = [] + if keep_order: + gen = pool.imap(func, tasks, chunksize) + else: + gen = pool.imap_unordered(func, tasks, chunksize) + for result in gen: + results.append(result) + if skip_first: + if len(results) < nproc * chunksize: + continue + elif len(results) == nproc * chunksize: + prog_bar.start() + continue + prog_bar.update() + prog_bar.file.write('\n') + pool.close() + pool.join() + return results + + +def track_iter_progress(tasks, bar_width=50, file=sys.stdout): + """Track the progress of tasks iteration or enumeration with a progress + bar. + + Tasks are yielded with a simple for-loop. + + Args: + tasks (list or tuple[Iterable, int]): A list of tasks or + (tasks, total num). + bar_width (int): Width of progress bar. + + Yields: + list: The task results. + """ + if isinstance(tasks, tuple): + assert len(tasks) == 2 + assert isinstance(tasks[0], Iterable) + assert isinstance(tasks[1], int) + task_num = tasks[1] + tasks = tasks[0] + elif isinstance(tasks, Iterable): + task_num = len(tasks) + else: + raise TypeError( + '"tasks" must be an iterable object or a (iterator, int) tuple') + prog_bar = ProgressBar(task_num, bar_width, file=file) + for task in tasks: + yield task + prog_bar.update() + prog_bar.file.write('\n') diff --git a/mmcv/mmcv/utils/registry.py b/mmcv/mmcv/utils/registry.py new file mode 100644 index 0000000..64b83f1 --- /dev/null +++ b/mmcv/mmcv/utils/registry.py @@ -0,0 +1,171 @@ +import inspect +import warnings +from functools import partial + +from .misc import is_str + + +class Registry: + """A registry to map strings to classes. + + Args: + name (str): Registry name. + """ + + def __init__(self, name): + self._name = name + self._module_dict = dict() + + def __len__(self): + return len(self._module_dict) + + def __contains__(self, key): + return self.get(key) is not None + + def __repr__(self): + format_str = self.__class__.__name__ + \ + f'(name={self._name}, ' \ + f'items={self._module_dict})' + return format_str + + @property + def name(self): + return self._name + + @property + def module_dict(self): + return self._module_dict + + def get(self, key): + """Get the registry record. + + Args: + key (str): The class name in string format. + + Returns: + class: The corresponding class. + """ + return self._module_dict.get(key, None) + + def _register_module(self, module_class, module_name=None, force=False): + if not inspect.isclass(module_class): + raise TypeError('module must be a class, ' + f'but got {type(module_class)}') + + if module_name is None: + module_name = module_class.__name__ + if not force and module_name in self._module_dict: + raise KeyError(f'{module_name} is already registered ' + f'in {self.name}') + self._module_dict[module_name] = module_class + + def deprecated_register_module(self, cls=None, force=False): + warnings.warn( + 'The old API of register_module(module, force=False) ' + 'is deprecated and will be removed, please use the new API ' + 'register_module(name=None, force=False, module=None) instead.') + if cls is None: + return partial(self.deprecated_register_module, force=force) + self._register_module(cls, force=force) + return cls + + def register_module(self, name=None, force=False, module=None): + """Register a module. + + A record will be added to `self._module_dict`, whose key is the class + name or the specified name, and value is the class itself. + It can be used as a decorator or a normal function. + + Example: + >>> backbones = Registry('backbone') + >>> @backbones.register_module() + >>> class ResNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> @backbones.register_module(name='mnet') + >>> class MobileNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> class ResNet: + >>> pass + >>> backbones.register_module(ResNet) + + Args: + name (str | None): The module name to be registered. If not + specified, the class name will be used. + force (bool, optional): Whether to override an existing class with + the same name. Default: False. + module (type): Module class to be registered. + """ + if not isinstance(force, bool): + raise TypeError(f'force must be a boolean, but got {type(force)}') + # NOTE: This is a walkaround to be compatible with the old api, + # while it may introduce unexpected bugs. + if isinstance(name, type): + return self.deprecated_register_module(name, force=force) + + # use it as a normal method: x.register_module(module=SomeClass) + if module is not None: + self._register_module( + module_class=module, module_name=name, force=force) + return module + + # raise the error ahead of time + if not (name is None or isinstance(name, str)): + raise TypeError(f'name must be a str, but got {type(name)}') + + # use it as a decorator: @x.register_module() + def _register(cls): + self._register_module( + module_class=cls, module_name=name, force=force) + return cls + + return _register + + +def build_from_cfg(cfg, registry, default_args=None): + """Build a module from config dict. + + Args: + cfg (dict): Config dict. It should at least contain the key "type". + registry (:obj:`Registry`): The registry to search the type from. + default_args (dict, optional): Default initialization arguments. + + Returns: + object: The constructed object. + """ + if not isinstance(cfg, dict): + raise TypeError(f'cfg must be a dict, but got {type(cfg)}') + if 'type' not in cfg: + if default_args is None or 'type' not in default_args: + raise KeyError( + '`cfg` or `default_args` must contain the key "type", ' + f'but got {cfg}\n{default_args}') + if not isinstance(registry, Registry): + raise TypeError('registry must be an mmcv.Registry object, ' + f'but got {type(registry)}') + if not (isinstance(default_args, dict) or default_args is None): + raise TypeError('default_args must be a dict or None, ' + f'but got {type(default_args)}') + + args = cfg.copy() + + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + + obj_type = args.pop('type') + if is_str(obj_type): + obj_cls = registry.get(obj_type) + if obj_cls is None: + raise KeyError( + f'{obj_type} is not in the {registry.name} registry') + elif inspect.isclass(obj_type): + obj_cls = obj_type + else: + raise TypeError( + f'type must be a str or valid type, but got {type(obj_type)}') + + return obj_cls(**args) diff --git a/mmcv/mmcv/utils/timer.py b/mmcv/mmcv/utils/timer.py new file mode 100644 index 0000000..7c7f50c --- /dev/null +++ b/mmcv/mmcv/utils/timer.py @@ -0,0 +1,118 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from time import time + + +class TimerError(Exception): + + def __init__(self, message): + self.message = message + super(TimerError, self).__init__(message) + + +class Timer: + """A flexible Timer class. + + :Example: + + >>> import time + >>> import mmcv + >>> with mmcv.Timer(): + >>> # simulate a code block that will run for 1s + >>> time.sleep(1) + 1.000 + >>> with mmcv.Timer(print_tmpl='it takes {:.1f} seconds'): + >>> # simulate a code block that will run for 1s + >>> time.sleep(1) + it takes 1.0 seconds + >>> timer = mmcv.Timer() + >>> time.sleep(0.5) + >>> print(timer.since_start()) + 0.500 + >>> time.sleep(0.5) + >>> print(timer.since_last_check()) + 0.500 + >>> print(timer.since_start()) + 1.000 + """ + + def __init__(self, start=True, print_tmpl=None): + self._is_running = False + self.print_tmpl = print_tmpl if print_tmpl else '{:.3f}' + if start: + self.start() + + @property + def is_running(self): + """bool: indicate whether the timer is running""" + return self._is_running + + def __enter__(self): + self.start() + return self + + def __exit__(self, type, value, traceback): + print(self.print_tmpl.format(self.since_last_check())) + self._is_running = False + + def start(self): + """Start the timer.""" + if not self._is_running: + self._t_start = time() + self._is_running = True + self._t_last = time() + + def since_start(self): + """Total time since the timer is started. + + Returns (float): Time in seconds. + """ + if not self._is_running: + raise TimerError('timer is not running') + self._t_last = time() + return self._t_last - self._t_start + + def since_last_check(self): + """Time since the last checking. + + Either :func:`since_start` or :func:`since_last_check` is a checking + operation. + + Returns (float): Time in seconds. + """ + if not self._is_running: + raise TimerError('timer is not running') + dur = time() - self._t_last + self._t_last = time() + return dur + + +_g_timers = {} # global timers + + +def check_time(timer_id): + """Add check points in a single line. + + This method is suitable for running a task on a list of items. A timer will + be registered when the method is called for the first time. + + :Example: + + >>> import time + >>> import mmcv + >>> for i in range(1, 6): + >>> # simulate a code block + >>> time.sleep(i) + >>> mmcv.check_time('task1') + 2.000 + 3.000 + 4.000 + 5.000 + + Args: + timer_id (str): Timer identifier. + """ + if timer_id not in _g_timers: + _g_timers[timer_id] = Timer() + return 0 + else: + return _g_timers[timer_id].since_last_check() diff --git a/mmcv/mmcv/utils/version_utils.py b/mmcv/mmcv/utils/version_utils.py new file mode 100644 index 0000000..585c9b7 --- /dev/null +++ b/mmcv/mmcv/utils/version_utils.py @@ -0,0 +1,67 @@ +import os +import subprocess + + +def digit_version(version_str): + """Convert a version string into a tuple of integers. + + This method is usually used for comparing two versions. + + Args: + version_str (str): The version string. + + Returns: + tuple[int]: The version info in digits (integers). + """ + digit_version = [] + for x in version_str.split('.'): + if x.isdigit(): + digit_version.append(int(x)) + elif x.find('rc') != -1: + patch_version = x.split('rc') + digit_version.append(int(patch_version[0]) - 1) + digit_version.append(int(patch_version[1])) + return tuple(digit_version) + + +def _minimal_ext_cmd(cmd): + # construct minimal environment + env = {} + for k in ['SYSTEMROOT', 'PATH', 'HOME']: + v = os.environ.get(k) + if v is not None: + env[k] = v + # LANGUAGE is used on win32 + env['LANGUAGE'] = 'C' + env['LANG'] = 'C' + env['LC_ALL'] = 'C' + out = subprocess.Popen( + cmd, stdout=subprocess.PIPE, env=env).communicate()[0] + return out + + +def get_git_hash(fallback='unknown', digits=None): + """Get the git hash of the current repo. + + Args: + fallback (str, optional): The fallback string when git hash is + unavailable. Defaults to 'unknown'. + digits (int, optional): kept digits of the hash. Defaults to None, + meaning all digits are kept. + + Returns: + str: Git commit hash. + """ + + if digits is not None and not isinstance(digits, int): + raise TypeError('digits must be None or an integer') + + try: + out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) + sha = out.strip().decode('ascii') + if digits is not None: + sha = sha[:digits] + except OSError: + sha = fallback + + return sha diff --git a/mmcv/mmcv/version.py b/mmcv/mmcv/version.py new file mode 100644 index 0000000..27c6386 --- /dev/null +++ b/mmcv/mmcv/version.py @@ -0,0 +1,29 @@ +# Copyright (c) Open-MMLab. All rights reserved. + +__version__ = '1.2.0' + + +def parse_version_info(version_str): + """Parse a version string into a tuple. + + Args: + version_str (str): The version string. + + Returns: + tuple[int | str]: The version info, e.g., "1.3.0" is parsed into + (1, 3, 0), and "2.0.0rc1" is parsed into (2, 0, 0, 'rc1'). + """ + version_info = [] + for x in version_str.split('.'): + if x.isdigit(): + version_info.append(int(x)) + elif x.find('rc') != -1: + patch_version = x.split('rc') + version_info.append(int(patch_version[0])) + version_info.append(f'rc{patch_version[1]}') + return tuple(version_info) + + +version_info = parse_version_info(__version__) + +__all__ = ['__version__', 'version_info', 'parse_version_info'] diff --git a/mmcv/mmcv/video/__init__.py b/mmcv/mmcv/video/__init__.py new file mode 100644 index 0000000..28f2d7e --- /dev/null +++ b/mmcv/mmcv/video/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .io import Cache, VideoReader, frames2video +from .optflow import (dequantize_flow, flow_warp, flowread, flowwrite, + quantize_flow) +from .processing import concat_video, convert_video, cut_video, resize_video + +__all__ = [ + 'Cache', 'VideoReader', 'frames2video', 'convert_video', 'resize_video', + 'cut_video', 'concat_video', 'flowread', 'flowwrite', 'quantize_flow', + 'dequantize_flow', 'flow_warp' +] diff --git a/mmcv/mmcv/video/io.py b/mmcv/mmcv/video/io.py new file mode 100644 index 0000000..08b78cd --- /dev/null +++ b/mmcv/mmcv/video/io.py @@ -0,0 +1,323 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import os.path as osp +from collections import OrderedDict + +import cv2 +from cv2 import (CAP_PROP_FOURCC, CAP_PROP_FPS, CAP_PROP_FRAME_COUNT, + CAP_PROP_FRAME_HEIGHT, CAP_PROP_FRAME_WIDTH, + CAP_PROP_POS_FRAMES, VideoWriter_fourcc) + +from mmcv.utils import (check_file_exist, mkdir_or_exist, scandir, + track_progress) + + +class Cache: + + def __init__(self, capacity): + self._cache = OrderedDict() + self._capacity = int(capacity) + if capacity <= 0: + raise ValueError('capacity must be a positive integer') + + @property + def capacity(self): + return self._capacity + + @property + def size(self): + return len(self._cache) + + def put(self, key, val): + if key in self._cache: + return + if len(self._cache) >= self.capacity: + self._cache.popitem(last=False) + self._cache[key] = val + + def get(self, key, default=None): + val = self._cache[key] if key in self._cache else default + return val + + +class VideoReader: + """Video class with similar usage to a list object. + + This video warpper class provides convenient apis to access frames. + There exists an issue of OpenCV's VideoCapture class that jumping to a + certain frame may be inaccurate. It is fixed in this class by checking + the position after jumping each time. + Cache is used when decoding videos. So if the same frame is visited for + the second time, there is no need to decode again if it is stored in the + cache. + + :Example: + + >>> import mmcv + >>> v = mmcv.VideoReader('sample.mp4') + >>> len(v) # get the total frame number with `len()` + 120 + >>> for img in v: # v is iterable + >>> mmcv.imshow(img) + >>> v[5] # get the 6th frame + """ + + def __init__(self, filename, cache_capacity=10): + # Check whether the video path is a url + if not filename.startswith(('https://', 'http://')): + check_file_exist(filename, 'Video file not found: ' + filename) + self._vcap = cv2.VideoCapture(filename) + assert cache_capacity > 0 + self._cache = Cache(cache_capacity) + self._position = 0 + # get basic info + self._width = int(self._vcap.get(CAP_PROP_FRAME_WIDTH)) + self._height = int(self._vcap.get(CAP_PROP_FRAME_HEIGHT)) + self._fps = self._vcap.get(CAP_PROP_FPS) + self._frame_cnt = int(self._vcap.get(CAP_PROP_FRAME_COUNT)) + self._fourcc = self._vcap.get(CAP_PROP_FOURCC) + + @property + def vcap(self): + """:obj:`cv2.VideoCapture`: The raw VideoCapture object.""" + return self._vcap + + @property + def opened(self): + """bool: Indicate whether the video is opened.""" + return self._vcap.isOpened() + + @property + def width(self): + """int: Width of video frames.""" + return self._width + + @property + def height(self): + """int: Height of video frames.""" + return self._height + + @property + def resolution(self): + """tuple: Video resolution (width, height).""" + return (self._width, self._height) + + @property + def fps(self): + """float: FPS of the video.""" + return self._fps + + @property + def frame_cnt(self): + """int: Total frames of the video.""" + return self._frame_cnt + + @property + def fourcc(self): + """str: "Four character code" of the video.""" + return self._fourcc + + @property + def position(self): + """int: Current cursor position, indicating frame decoded.""" + return self._position + + def _get_real_position(self): + return int(round(self._vcap.get(CAP_PROP_POS_FRAMES))) + + def _set_real_position(self, frame_id): + self._vcap.set(CAP_PROP_POS_FRAMES, frame_id) + pos = self._get_real_position() + for _ in range(frame_id - pos): + self._vcap.read() + self._position = frame_id + + def read(self): + """Read the next frame. + + If the next frame have been decoded before and in the cache, then + return it directly, otherwise decode, cache and return it. + + Returns: + ndarray or None: Return the frame if successful, otherwise None. + """ + # pos = self._position + if self._cache: + img = self._cache.get(self._position) + if img is not None: + ret = True + else: + if self._position != self._get_real_position(): + self._set_real_position(self._position) + ret, img = self._vcap.read() + if ret: + self._cache.put(self._position, img) + else: + ret, img = self._vcap.read() + if ret: + self._position += 1 + return img + + def get_frame(self, frame_id): + """Get frame by index. + + Args: + frame_id (int): Index of the expected frame, 0-based. + + Returns: + ndarray or None: Return the frame if successful, otherwise None. + """ + if frame_id < 0 or frame_id >= self._frame_cnt: + raise IndexError( + f'"frame_id" must be between 0 and {self._frame_cnt - 1}') + if frame_id == self._position: + return self.read() + if self._cache: + img = self._cache.get(frame_id) + if img is not None: + self._position = frame_id + 1 + return img + self._set_real_position(frame_id) + ret, img = self._vcap.read() + if ret: + if self._cache: + self._cache.put(self._position, img) + self._position += 1 + return img + + def current_frame(self): + """Get the current frame (frame that is just visited). + + Returns: + ndarray or None: If the video is fresh, return None, otherwise + return the frame. + """ + if self._position == 0: + return None + return self._cache.get(self._position - 1) + + def cvt2frames(self, + frame_dir, + file_start=0, + filename_tmpl='{:06d}.jpg', + start=0, + max_num=0, + show_progress=True): + """Convert a video to frame images. + + Args: + frame_dir (str): Output directory to store all the frame images. + file_start (int): Filenames will start from the specified number. + filename_tmpl (str): Filename template with the index as the + placeholder. + start (int): The starting frame index. + max_num (int): Maximum number of frames to be written. + show_progress (bool): Whether to show a progress bar. + """ + mkdir_or_exist(frame_dir) + if max_num == 0: + task_num = self.frame_cnt - start + else: + task_num = min(self.frame_cnt - start, max_num) + if task_num <= 0: + raise ValueError('start must be less than total frame number') + if start > 0: + self._set_real_position(start) + + def write_frame(file_idx): + img = self.read() + filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) + cv2.imwrite(filename, img) + + if show_progress: + track_progress(write_frame, range(file_start, + file_start + task_num)) + else: + for i in range(task_num): + img = self.read() + if img is None: + break + filename = osp.join(frame_dir, + filename_tmpl.format(i + file_start)) + cv2.imwrite(filename, img) + + def __len__(self): + return self.frame_cnt + + def __getitem__(self, index): + if isinstance(index, slice): + return [ + self.get_frame(i) + for i in range(*index.indices(self.frame_cnt)) + ] + # support negative indexing + if index < 0: + index += self.frame_cnt + if index < 0: + raise IndexError('index out of range') + return self.get_frame(index) + + def __iter__(self): + self._set_real_position(0) + return self + + def __next__(self): + img = self.read() + if img is not None: + return img + else: + raise StopIteration + + next = __next__ + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._vcap.release() + + +def frames2video(frame_dir, + video_file, + fps=30, + fourcc='XVID', + filename_tmpl='{:06d}.jpg', + start=0, + end=0, + show_progress=True): + """Read the frame images from a directory and join them as a video. + + Args: + frame_dir (str): The directory containing video frames. + video_file (str): Output filename. + fps (float): FPS of the output video. + fourcc (str): Fourcc of the output video, this should be compatible + with the output file type. + filename_tmpl (str): Filename template with the index as the variable. + start (int): Starting frame index. + end (int): Ending frame index. + show_progress (bool): Whether to show a progress bar. + """ + if end == 0: + ext = filename_tmpl.split('.')[-1] + end = len([name for name in scandir(frame_dir, ext)]) + first_file = osp.join(frame_dir, filename_tmpl.format(start)) + check_file_exist(first_file, 'The start frame not found: ' + first_file) + img = cv2.imread(first_file) + height, width = img.shape[:2] + resolution = (width, height) + vwriter = cv2.VideoWriter(video_file, VideoWriter_fourcc(*fourcc), fps, + resolution) + + def write_frame(file_idx): + filename = osp.join(frame_dir, filename_tmpl.format(file_idx)) + img = cv2.imread(filename) + vwriter.write(img) + + if show_progress: + track_progress(write_frame, range(start, end)) + else: + for i in range(start, end): + filename = osp.join(frame_dir, filename_tmpl.format(i)) + img = cv2.imread(filename) + vwriter.write(img) + vwriter.release() diff --git a/mmcv/mmcv/video/optflow.py b/mmcv/mmcv/video/optflow.py new file mode 100644 index 0000000..ab0a532 --- /dev/null +++ b/mmcv/mmcv/video/optflow.py @@ -0,0 +1,169 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import numpy as np + +from mmcv._flow_warp_ext import flow_warp_c +from mmcv.arraymisc import dequantize, quantize +from mmcv.image import imread, imwrite +from mmcv.utils import is_str + + +def flowread(flow_or_path, quantize=False, concat_axis=0, *args, **kwargs): + """Read an optical flow map. + + Args: + flow_or_path (ndarray or str): A flow map or filepath. + quantize (bool): whether to read quantized pair, if set to True, + remaining args will be passed to :func:`dequantize_flow`. + concat_axis (int): The axis that dx and dy are concatenated, + can be either 0 or 1. Ignored if quantize is False. + + Returns: + ndarray: Optical flow represented as a (h, w, 2) numpy array + """ + if isinstance(flow_or_path, np.ndarray): + if (flow_or_path.ndim != 3) or (flow_or_path.shape[-1] != 2): + raise ValueError(f'Invalid flow with shape {flow_or_path.shape}') + return flow_or_path + elif not is_str(flow_or_path): + raise TypeError(f'"flow_or_path" must be a filename or numpy array, ' + f'not {type(flow_or_path)}') + + if not quantize: + with open(flow_or_path, 'rb') as f: + try: + header = f.read(4).decode('utf-8') + except Exception: + raise IOError(f'Invalid flow file: {flow_or_path}') + else: + if header != 'PIEH': + raise IOError(f'Invalid flow file: {flow_or_path}, ' + 'header does not contain PIEH') + + w = np.fromfile(f, np.int32, 1).squeeze() + h = np.fromfile(f, np.int32, 1).squeeze() + flow = np.fromfile(f, np.float32, w * h * 2).reshape((h, w, 2)) + else: + assert concat_axis in [0, 1] + cat_flow = imread(flow_or_path, flag='unchanged') + if cat_flow.ndim != 2: + raise IOError( + f'{flow_or_path} is not a valid quantized flow file, ' + f'its dimension is {cat_flow.ndim}.') + assert cat_flow.shape[concat_axis] % 2 == 0 + dx, dy = np.split(cat_flow, 2, axis=concat_axis) + flow = dequantize_flow(dx, dy, *args, **kwargs) + + return flow.astype(np.float32) + + +def flowwrite(flow, filename, quantize=False, concat_axis=0, *args, **kwargs): + """Write optical flow to file. + + If the flow is not quantized, it will be saved as a .flo file losslessly, + otherwise a jpeg image which is lossy but of much smaller size. (dx and dy + will be concatenated horizontally into a single image if quantize is True.) + + Args: + flow (ndarray): (h, w, 2) array of optical flow. + filename (str): Output filepath. + quantize (bool): Whether to quantize the flow and save it to 2 jpeg + images. If set to True, remaining args will be passed to + :func:`quantize_flow`. + concat_axis (int): The axis that dx and dy are concatenated, + can be either 0 or 1. Ignored if quantize is False. + """ + if not quantize: + with open(filename, 'wb') as f: + f.write('PIEH'.encode('utf-8')) + np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f) + flow = flow.astype(np.float32) + flow.tofile(f) + f.flush() + else: + assert concat_axis in [0, 1] + dx, dy = quantize_flow(flow, *args, **kwargs) + dxdy = np.concatenate((dx, dy), axis=concat_axis) + imwrite(dxdy, filename) + + +def quantize_flow(flow, max_val=0.02, norm=True): + """Quantize flow to [0, 255]. + + After this step, the size of flow will be much smaller, and can be + dumped as jpeg images. + + Args: + flow (ndarray): (h, w, 2) array of optical flow. + max_val (float): Maximum value of flow, values beyond + [-max_val, max_val] will be truncated. + norm (bool): Whether to divide flow values by image width/height. + + Returns: + tuple[ndarray]: Quantized dx and dy. + """ + h, w, _ = flow.shape + dx = flow[..., 0] + dy = flow[..., 1] + if norm: + dx = dx / w # avoid inplace operations + dy = dy / h + # use 255 levels instead of 256 to make sure 0 is 0 after dequantization. + flow_comps = [ + quantize(d, -max_val, max_val, 255, np.uint8) for d in [dx, dy] + ] + return tuple(flow_comps) + + +def dequantize_flow(dx, dy, max_val=0.02, denorm=True): + """Recover from quantized flow. + + Args: + dx (ndarray): Quantized dx. + dy (ndarray): Quantized dy. + max_val (float): Maximum value used when quantizing. + denorm (bool): Whether to multiply flow values with width/height. + + Returns: + ndarray: Dequantized flow. + """ + assert dx.shape == dy.shape + assert dx.ndim == 2 or (dx.ndim == 3 and dx.shape[-1] == 1) + + dx, dy = [dequantize(d, -max_val, max_val, 255) for d in [dx, dy]] + + if denorm: + dx *= dx.shape[1] + dy *= dx.shape[0] + flow = np.dstack((dx, dy)) + return flow + + +def flow_warp(img, flow, filling_value=0, interpolate_mode='nearest'): + """Use flow to warp img. + + Args: + img (ndarray, float or uint8): Image to be warped. + flow (ndarray, float): Optical Flow. + filling_value (int): The missing pixels will be set with filling_value. + interpolate_mode (str): bilinear -> Bilinear Interpolation; + nearest -> Nearest Neighbor. + + Returns: + ndarray: Warped image with the same shape of img + """ + interpolate_mode_dict = {'bilinear': 0, 'nearest': 1} + assert len(img.shape) == 3 + assert len(flow.shape) == 3 and flow.shape[2] == 2 + assert flow.shape[:2] == img.shape[:2] + assert interpolate_mode in interpolate_mode_dict.keys() + + interpolate_mode = interpolate_mode_dict[interpolate_mode] + img_float = img.astype(np.float64) + + out = flow_warp_c( + img_float, + flow.astype(np.float64), + filling_value=filling_value, + interpolate_mode=interpolate_mode) + + return out diff --git a/mmcv/mmcv/video/optflow_warp/__init__.py b/mmcv/mmcv/video/optflow_warp/__init__.py new file mode 100644 index 0000000..bab2965 --- /dev/null +++ b/mmcv/mmcv/video/optflow_warp/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Open-MMLab. All rights reserved. diff --git a/mmcv/mmcv/video/optflow_warp/flow_warp.cpp b/mmcv/mmcv/video/optflow_warp/flow_warp.cpp new file mode 100644 index 0000000..200ebf8 --- /dev/null +++ b/mmcv/mmcv/video/optflow_warp/flow_warp.cpp @@ -0,0 +1,76 @@ +// Copyright (c) Open-MMLab. All rights reserved. +#include "flow_warp.hpp" + +void FlowWarp(double* img, double* flow, double* out, const int height, + const int width, const int channels, const int filling_value = 0, + const int interpolateMode = 0) { + for (int h = 0; h < height; h++) { + for (int w = 0; w < width; w++) { + int offset_cur = h * width + w; + int offset_img = offset_cur * channels; + int offset_flow = offset_cur * 2; + double x, y; + x = h + flow[offset_flow + 1]; + y = w + flow[offset_flow]; + + if (x < 0 || x >= height - 1 || y < 0 || y >= width - 1) { + for (int k = 0; k < channels; k++) { + out[offset_img + k] = filling_value; + } + continue; + } + + if (interpolateMode == 0) + BilinearInterpolate(img, width, height, channels, x, y, + out + offset_img); + else if (interpolateMode == 1) + NNInterpolate(img, width, height, channels, x, y, out + offset_img); + else + throw "Not Implemented Interpolation Method"; + } + } +} + +void BilinearInterpolate(const double* img, int width, int height, int channels, + double x, double y, double* out) { + int xx, yy, m, n, u, v, offset, offset_img, l; + xx = x; + yy = y; + + double dx, dy, s; + + dx = __max__(__min__(x - xx, double(1)), double(0)); + dy = __max__(__min__(y - yy, double(1)), double(0)); + + for (m = 0; m <= 1; m++) + for (n = 0; n <= 1; n++) { + u = EnforceRange(yy + n, width); + v = EnforceRange(xx + m, height); + offset = v * width + u; + offset_img = offset * channels; + s = fabs(1 - m - dx) * fabs(1 - n - dy); + for (l = 0; l < channels; l++) out[l] += img[offset_img + l] * s; + } +} + +void NNInterpolate(const double* img, int width, int height, int channels, + double x, double y, double* out) { + int xx, yy, m, n, u, v, offset, offset_img, l; + xx = x; + yy = y; + + double dx, dy; + + dx = __max__(__min__(x - xx, double(1)), double(0)); + dy = __max__(__min__(y - yy, double(1)), double(0)); + + m = (dx < 0.5) ? 0 : 1; + n = (dy < 0.5) ? 0 : 1; + + u = EnforceRange(yy + n, width); + v = EnforceRange(xx + m, height); + offset = v * width + u; + offset_img = offset * channels; + + for (l = 0; l < channels; l++) out[l] = img[offset_img + l]; +} diff --git a/mmcv/mmcv/video/optflow_warp/flow_warp.hpp b/mmcv/mmcv/video/optflow_warp/flow_warp.hpp new file mode 100644 index 0000000..bd8e2da --- /dev/null +++ b/mmcv/mmcv/video/optflow_warp/flow_warp.hpp @@ -0,0 +1,30 @@ +// Copyright (c) Open-MMLab. All rights reserved. +#include +#include + +using namespace std; + +void FlowWarp(double* img, double* flow1, double* out, const int height, + const int width, const int channels, const int filling_value, + const int interpolateMode); + +void BilinearInterpolate(const double* img, int width, int height, int channels, + double x, double y, double* out); + +void NNInterpolate(const double* img, int width, int height, int channels, + double x, double y, double* out); + +template +inline T __min__(T a, T b) { + return a > b ? b : a; +} + +template +inline T __max__(T a, T b) { + return (a < b) ? b : a; +} + +template +inline T EnforceRange(const T x, const int MaxValue) { + return __min__(__max__(x, 0), MaxValue); +} diff --git a/mmcv/mmcv/video/optflow_warp/flow_warp_module.cpp b/mmcv/mmcv/video/optflow_warp/flow_warp_module.cpp new file mode 100644 index 0000000..be7ffc3 --- /dev/null +++ b/mmcv/mmcv/video/optflow_warp/flow_warp_module.cpp @@ -0,0 +1,6624 @@ +/* Generated by Cython 0.29.24 */ + +/* BEGIN: Cython Metadata +{ + "distutils": { + "depends": [ + "/data/anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/core/include/numpy/arrayobject.h", + "/data/anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/core/include/numpy/arrayscalars.h", + "/data/anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/core/include/numpy/ndarrayobject.h", + "/data/anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/core/include/numpy/ndarraytypes.h", + "/data/anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/core/include/numpy/ufuncobject.h", + "mmcv/video/optflow_warp/flow_warp.hpp" + ], + "include_dirs": [ + "./mmcv/video/optflow_warp", + "/data/anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/core/include" + ], + "language": "c++", + "name": "mmcv._flow_warp_ext", + "sources": [ + "./mmcv/video/optflow_warp/flow_warp_module.pyx", + "./mmcv/video/optflow_warp/flow_warp.cpp" + ] + }, + "module_name": "mmcv._flow_warp_ext" +} +END: Cython Metadata */ + +#ifndef PY_SSIZE_T_CLEAN +#define PY_SSIZE_T_CLEAN +#endif /* PY_SSIZE_T_CLEAN */ +#include "Python.h" +#ifndef Py_PYTHON_H + #error Python headers needed to compile C extensions, please install development version of Python. +#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) + #error Cython requires Python 2.6+ or Python 3.3+. +#else +#define CYTHON_ABI "0_29_24" +#define CYTHON_HEX_VERSION 0x001D18F0 +#define CYTHON_FUTURE_DIVISION 1 +#include +#ifndef offsetof + #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) +#endif +#if !defined(WIN32) && !defined(MS_WINDOWS) + #ifndef __stdcall + #define __stdcall + #endif + #ifndef __cdecl + #define __cdecl + #endif + #ifndef __fastcall + #define __fastcall + #endif +#endif +#ifndef DL_IMPORT + #define DL_IMPORT(t) t +#endif +#ifndef DL_EXPORT + #define DL_EXPORT(t) t +#endif +#define __PYX_COMMA , +#ifndef HAVE_LONG_LONG + #if PY_VERSION_HEX >= 0x02070000 + #define HAVE_LONG_LONG + #endif +#endif +#ifndef PY_LONG_LONG + #define PY_LONG_LONG LONG_LONG +#endif +#ifndef Py_HUGE_VAL + #define Py_HUGE_VAL HUGE_VAL +#endif +#ifdef PYPY_VERSION + #define CYTHON_COMPILING_IN_PYPY 1 + #define CYTHON_COMPILING_IN_PYSTON 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 +#elif defined(PYSTON_VERSION) + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_PYSTON 1 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 +#else + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_PYSTON 0 + #define CYTHON_COMPILING_IN_CPYTHON 1 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #if PY_VERSION_HEX < 0x02070000 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #elif !defined(CYTHON_USE_PYTYPE_LOOKUP) + #define CYTHON_USE_PYTYPE_LOOKUP 1 + #endif + #if PY_MAJOR_VERSION < 3 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #if PY_VERSION_HEX < 0x02070000 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #elif !defined(CYTHON_USE_PYLONG_INTERNALS) + #define CYTHON_USE_PYLONG_INTERNALS 1 + #endif + #ifndef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 1 + #endif + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #if PY_VERSION_HEX < 0x030300F0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #elif !defined(CYTHON_USE_UNICODE_WRITER) + #define CYTHON_USE_UNICODE_WRITER 1 + #endif + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #ifndef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 1 + #endif + #ifndef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 1 + #endif + #ifndef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT (PY_VERSION_HEX >= 0x03050000) + #endif + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1) + #endif + #ifndef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX >= 0x030600B1) + #endif + #ifndef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK (PY_VERSION_HEX >= 0x030700A3) + #endif +#endif +#if !defined(CYTHON_FAST_PYCCALL) +#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) +#endif +#if CYTHON_USE_PYLONG_INTERNALS + #include "longintrepr.h" + #undef SHIFT + #undef BASE + #undef MASK + #ifdef SIZEOF_VOID_P + enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) }; + #endif +#endif +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#ifndef __has_cpp_attribute + #define __has_cpp_attribute(x) 0 +#endif +#ifndef CYTHON_RESTRICT + #if defined(__GNUC__) + #define CYTHON_RESTRICT __restrict__ + #elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define CYTHON_RESTRICT __restrict + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_RESTRICT restrict + #else + #define CYTHON_RESTRICT + #endif +#endif +#ifndef CYTHON_UNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_MAYBE_UNUSED_VAR +# if defined(__cplusplus) + template void CYTHON_MAYBE_UNUSED_VAR( const T& ) { } +# else +# define CYTHON_MAYBE_UNUSED_VAR(x) (void)(x) +# endif +#endif +#ifndef CYTHON_NCP_UNUSED +# if CYTHON_COMPILING_IN_CPYTHON +# define CYTHON_NCP_UNUSED +# else +# define CYTHON_NCP_UNUSED CYTHON_UNUSED +# endif +#endif +#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) +#ifdef _MSC_VER + #ifndef _MSC_STDINT_H_ + #if _MSC_VER < 1300 + typedef unsigned char uint8_t; + typedef unsigned int uint32_t; + #else + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + #endif + #endif +#else + #include +#endif +#ifndef CYTHON_FALLTHROUGH + #if defined(__cplusplus) && __cplusplus >= 201103L + #if __has_cpp_attribute(fallthrough) + #define CYTHON_FALLTHROUGH [[fallthrough]] + #elif __has_cpp_attribute(clang::fallthrough) + #define CYTHON_FALLTHROUGH [[clang::fallthrough]] + #elif __has_cpp_attribute(gnu::fallthrough) + #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_attribute(fallthrough) + #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) + #else + #define CYTHON_FALLTHROUGH + #endif + #endif + #if defined(__clang__ ) && defined(__apple_build_version__) + #if __apple_build_version__ < 7000000 + #undef CYTHON_FALLTHROUGH + #define CYTHON_FALLTHROUGH + #endif + #endif +#endif + +#ifndef __cplusplus + #error "Cython files generated with the C++ option must be compiled with a C++ compiler." +#endif +#ifndef CYTHON_INLINE + #if defined(__clang__) + #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) + #else + #define CYTHON_INLINE inline + #endif +#endif +template +void __Pyx_call_destructor(T& x) { + x.~T(); +} +template +class __Pyx_FakeReference { + public: + __Pyx_FakeReference() : ptr(NULL) { } + __Pyx_FakeReference(const T& ref) : ptr(const_cast(&ref)) { } + T *operator->() { return ptr; } + T *operator&() { return ptr; } + operator T&() { return *ptr; } + template bool operator ==(U other) { return *ptr == other; } + template bool operator !=(U other) { return *ptr != other; } + private: + T *ptr; +}; + +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag) + #define Py_OptimizeFlag 0 +#endif +#define __PYX_BUILD_PY_SSIZE_T "n" +#define CYTHON_FORMAT_SSIZE_T "z" +#if PY_MAJOR_VERSION < 3 + #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) + #define __Pyx_DefaultClassType PyClass_Type +#else + #define __Pyx_BUILTIN_MODULE_NAME "builtins" +#if PY_VERSION_HEX >= 0x030800A4 && PY_VERSION_HEX < 0x030800B2 + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a, 0, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#else + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#endif + #define __Pyx_DefaultClassType PyType_Type +#endif +#ifndef Py_TPFLAGS_CHECKTYPES + #define Py_TPFLAGS_CHECKTYPES 0 +#endif +#ifndef Py_TPFLAGS_HAVE_INDEX + #define Py_TPFLAGS_HAVE_INDEX 0 +#endif +#ifndef Py_TPFLAGS_HAVE_NEWBUFFER + #define Py_TPFLAGS_HAVE_NEWBUFFER 0 +#endif +#ifndef Py_TPFLAGS_HAVE_FINALIZE + #define Py_TPFLAGS_HAVE_FINALIZE 0 +#endif +#ifndef METH_STACKLESS + #define METH_STACKLESS 0 +#endif +#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL) + #ifndef METH_FASTCALL + #define METH_FASTCALL 0x80 + #endif + typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs); + typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args, + Py_ssize_t nargs, PyObject *kwnames); +#else + #define __Pyx_PyCFunctionFast _PyCFunctionFast + #define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords +#endif +#if CYTHON_FAST_PYCCALL +#define __Pyx_PyFastCFunction_Check(func)\ + ((PyCFunction_Check(func) && (METH_FASTCALL == (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS | METH_STACKLESS))))) +#else +#define __Pyx_PyFastCFunction_Check(func) 0 +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) + #define PyObject_Malloc(s) PyMem_Malloc(s) + #define PyObject_Free(p) PyMem_Free(p) + #define PyObject_Realloc(p) PyMem_Realloc(p) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030400A1 + #define PyMem_RawMalloc(n) PyMem_Malloc(n) + #define PyMem_RawRealloc(p, n) PyMem_Realloc(p, n) + #define PyMem_RawFree(p) PyMem_Free(p) +#endif +#if CYTHON_COMPILING_IN_PYSTON + #define __Pyx_PyCode_HasFreeVars(co) PyCode_HasFreeVars(co) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) PyFrame_SetLineNumber(frame, lineno) +#else + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) +#endif +#if !CYTHON_FAST_THREAD_STATE || PY_VERSION_HEX < 0x02070000 + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#elif PY_VERSION_HEX >= 0x03060000 + #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() +#elif PY_VERSION_HEX >= 0x03000000 + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#else + #define __Pyx_PyThreadState_Current _PyThreadState_Current +#endif +#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT) +#include "pythread.h" +#define Py_tss_NEEDS_INIT 0 +typedef int Py_tss_t; +static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) { + *key = PyThread_create_key(); + return 0; +} +static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) { + Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t)); + *key = Py_tss_NEEDS_INIT; + return key; +} +static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) { + PyObject_Free(key); +} +static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) { + return *key != Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) { + PyThread_delete_key(*key); + *key = Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) { + return PyThread_set_key_value(*key, value); +} +static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { + return PyThread_get_key_value(*key); +} +#endif +#if CYTHON_COMPILING_IN_CPYTHON || defined(_PyDict_NewPresized) +#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) +#else +#define __Pyx_PyDict_NewPresized(n) PyDict_New() +#endif +#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION + #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) +#else + #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && CYTHON_USE_UNICODE_INTERNALS +#define __Pyx_PyDict_GetItemStr(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash) +#else +#define __Pyx_PyDict_GetItemStr(dict, name) PyDict_GetItem(dict, name) +#endif +#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) + #define CYTHON_PEP393_ENABLED 1 + #if defined(PyUnicode_IS_READY) + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ + 0 : _PyUnicode_Ready((PyObject *)(op))) + #else + #define __Pyx_PyUnicode_READY(op) (0) + #endif + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) + #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u) + #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) + #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch) + #if defined(PyUnicode_IS_READY) && defined(PyUnicode_GET_SIZE) + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) + #else + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) + #endif + #else + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) + #endif +#else + #define CYTHON_PEP393_ENABLED 0 + #define PyUnicode_1BYTE_KIND 1 + #define PyUnicode_2BYTE_KIND 2 + #define PyUnicode_4BYTE_KIND 4 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535 : 1114111) + #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE)) + #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = ch) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) +#else + #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ + PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains) + #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyByteArray_Check) + #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format) + #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) +#endif +#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) +#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) +#else + #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) +#endif +#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) + #define PyObject_ASCII(o) PyObject_Repr(o) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBaseString_Type PyUnicode_Type + #define PyStringObject PyUnicodeObject + #define PyString_Type PyUnicode_Type + #define PyString_Check PyUnicode_Check + #define PyString_CheckExact PyUnicode_CheckExact +#ifndef PyObject_Unicode + #define PyObject_Unicode PyObject_Str +#endif +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) + #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) +#else + #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) + #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) +#endif +#ifndef PySet_CheckExact + #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type) +#endif +#if PY_VERSION_HEX >= 0x030900A4 + #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size) +#else + #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size) +#endif +#if CYTHON_ASSUME_SAFE_MACROS + #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq) +#else + #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyIntObject PyLongObject + #define PyInt_Type PyLong_Type + #define PyInt_Check(op) PyLong_Check(op) + #define PyInt_CheckExact(op) PyLong_CheckExact(op) + #define PyInt_FromString PyLong_FromString + #define PyInt_FromUnicode PyLong_FromUnicode + #define PyInt_FromLong PyLong_FromLong + #define PyInt_FromSize_t PyLong_FromSize_t + #define PyInt_FromSsize_t PyLong_FromSsize_t + #define PyInt_AsLong PyLong_AsLong + #define PyInt_AS_LONG PyLong_AS_LONG + #define PyInt_AsSsize_t PyLong_AsSsize_t + #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask + #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask + #define PyNumber_Int PyNumber_Long +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBoolObject PyLongObject +#endif +#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY + #ifndef PyUnicode_InternFromString + #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) + #endif +#endif +#if PY_VERSION_HEX < 0x030200A4 + typedef long Py_hash_t; + #define __Pyx_PyInt_FromHash_t PyInt_FromLong + #define __Pyx_PyInt_AsHash_t PyInt_AsLong +#else + #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t + #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyMethod_New(func, self, klass) ((self) ? ((void)(klass), PyMethod_New(func, self)) : __Pyx_NewRef(func)) +#else + #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass) +#endif +#if CYTHON_USE_ASYNC_SLOTS + #if PY_VERSION_HEX >= 0x030500B1 + #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods + #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) + #else + #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) + #endif +#else + #define __Pyx_PyType_AsAsync(obj) NULL +#endif +#ifndef __Pyx_PyAsyncMethodsStruct + typedef struct { + unaryfunc am_await; + unaryfunc am_aiter; + unaryfunc am_anext; + } __Pyx_PyAsyncMethodsStruct; +#endif + +#if defined(WIN32) || defined(MS_WINDOWS) + #define _USE_MATH_DEFINES +#endif +#include +#ifdef NAN +#define __PYX_NAN() ((float) NAN) +#else +static CYTHON_INLINE float __PYX_NAN() { + float value; + memset(&value, 0xFF, sizeof(value)); + return value; +} +#endif +#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) +#define __Pyx_truncl trunc +#else +#define __Pyx_truncl truncl +#endif + +#define __PYX_MARK_ERR_POS(f_index, lineno) \ + { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; } +#define __PYX_ERR(f_index, lineno, Ln_error) \ + { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; } + +#ifndef __PYX_EXTERN_C + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#define __PYX_HAVE__mmcv___flow_warp_ext +#define __PYX_HAVE_API__mmcv___flow_warp_ext +/* Early includes */ +#include +#include +#include "numpy/arrayobject.h" +#include "numpy/ndarrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/arrayscalars.h" +#include "numpy/ufuncobject.h" + + /* NumPy API declarations from "numpy/__init__.pxd" */ + +#include "flow_warp.hpp" +#ifdef _OPENMP +#include +#endif /* _OPENMP */ + +#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) +#define CYTHON_WITHOUT_ASSERTIONS +#endif + +typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; + const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; + +#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8) +#define __PYX_DEFAULT_STRING_ENCODING "" +#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString +#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#define __Pyx_uchar_cast(c) ((unsigned char)c) +#define __Pyx_long_cast(x) ((long)x) +#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ + (sizeof(type) < sizeof(Py_ssize_t)) ||\ + (sizeof(type) > sizeof(Py_ssize_t) &&\ + likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX) &&\ + (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ + v == (type)PY_SSIZE_T_MIN))) ||\ + (sizeof(type) == sizeof(Py_ssize_t) &&\ + (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX))) ) +static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) { + return (size_t) i < (size_t) limit; +} +#if defined (__cplusplus) && __cplusplus >= 201103L + #include + #define __Pyx_sst_abs(value) std::abs(value) +#elif SIZEOF_INT >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) abs(value) +#elif SIZEOF_LONG >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) labs(value) +#elif defined (_MSC_VER) + #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define __Pyx_sst_abs(value) llabs(value) +#elif defined (__GNUC__) + #define __Pyx_sst_abs(value) __builtin_llabs(value) +#else + #define __Pyx_sst_abs(value) ((value<0) ? -value : value) +#endif +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); +#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) +#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) +#define __Pyx_PyBytes_FromString PyBytes_FromString +#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#else + #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize +#endif +#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyObject_AsWritableString(s) ((char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableSString(s) ((signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) +#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) +#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) +#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) +#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) +static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) { + const Py_UNICODE *u_end = u; + while (*u_end++) ; + return (size_t)(u_end - u - 1); +} +#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) +#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode +#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode +#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) +#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b); +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*); +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); +#define __Pyx_PySequence_Tuple(obj)\ + (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); +#if CYTHON_ASSUME_SAFE_MACROS +#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) +#else +#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) +#endif +#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) +#if PY_MAJOR_VERSION >= 3 +#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) +#else +#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) +#endif +#define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x)) +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII +static int __Pyx_sys_getdefaultencoding_not_ascii; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + PyObject* ascii_chars_u = NULL; + PyObject* ascii_chars_b = NULL; + const char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + if (strcmp(default_encoding_c, "ascii") == 0) { + __Pyx_sys_getdefaultencoding_not_ascii = 0; + } else { + char ascii_chars[128]; + int c; + for (c = 0; c < 128; c++) { + ascii_chars[c] = c; + } + __Pyx_sys_getdefaultencoding_not_ascii = 1; + ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); + if (!ascii_chars_u) goto bad; + ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); + if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { + PyErr_Format( + PyExc_ValueError, + "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", + default_encoding_c); + goto bad; + } + Py_DECREF(ascii_chars_u); + Py_DECREF(ascii_chars_b); + } + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + Py_XDECREF(ascii_chars_u); + Py_XDECREF(ascii_chars_b); + return -1; +} +#endif +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) +#else +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +static char* __PYX_DEFAULT_STRING_ENCODING; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1); + if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; + strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + return -1; +} +#endif +#endif + + +/* Test for GCC > 2.95 */ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) + #define likely(x) __builtin_expect(!!(x), 1) + #define unlikely(x) __builtin_expect(!!(x), 0) +#else /* !__GNUC__ or GCC < 2.95 */ + #define likely(x) (x) + #define unlikely(x) (x) +#endif /* __GNUC__ */ +static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } + +static PyObject *__pyx_m = NULL; +static PyObject *__pyx_d; +static PyObject *__pyx_b; +static PyObject *__pyx_cython_runtime = NULL; +static PyObject *__pyx_empty_tuple; +static PyObject *__pyx_empty_bytes; +static PyObject *__pyx_empty_unicode; +static int __pyx_lineno; +static int __pyx_clineno = 0; +static const char * __pyx_cfilenm= __FILE__; +static const char *__pyx_filename; + +/* Header.proto */ +#if !defined(CYTHON_CCOMPLEX) + #if defined(__cplusplus) + #define CYTHON_CCOMPLEX 1 + #elif defined(_Complex_I) + #define CYTHON_CCOMPLEX 1 + #else + #define CYTHON_CCOMPLEX 0 + #endif +#endif +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + #include + #else + #include + #endif +#endif +#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__) + #undef _Complex_I + #define _Complex_I 1.0fj +#endif + + +static const char *__pyx_f[] = { + "mmcv/video/optflow_warp/flow_warp_module.pyx", + "__init__.pxd", + "type.pxd", +}; +/* BufferFormatStructs.proto */ +#define IS_UNSIGNED(type) (((type) -1) > 0) +struct __Pyx_StructField_; +#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0) +typedef struct { + const char* name; + struct __Pyx_StructField_* fields; + size_t size; + size_t arraysize[8]; + int ndim; + char typegroup; + char is_unsigned; + int flags; +} __Pyx_TypeInfo; +typedef struct __Pyx_StructField_ { + __Pyx_TypeInfo* type; + const char* name; + size_t offset; +} __Pyx_StructField; +typedef struct { + __Pyx_StructField* field; + size_t parent_offset; +} __Pyx_BufFmt_StackElem; +typedef struct { + __Pyx_StructField root; + __Pyx_BufFmt_StackElem* head; + size_t fmt_offset; + size_t new_count, enc_count; + size_t struct_alignment; + int is_complex; + char enc_type; + char new_packmode; + char enc_packmode; + char is_valid_array; +} __Pyx_BufFmt_Context; + + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":690 + * # in Cython to enable them only on the right systems. + * + * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t + */ +typedef npy_int8 __pyx_t_5numpy_int8_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":691 + * + * ctypedef npy_int8 int8_t + * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< + * ctypedef npy_int32 int32_t + * ctypedef npy_int64 int64_t + */ +typedef npy_int16 __pyx_t_5numpy_int16_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":692 + * ctypedef npy_int8 int8_t + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< + * ctypedef npy_int64 int64_t + * #ctypedef npy_int96 int96_t + */ +typedef npy_int32 __pyx_t_5numpy_int32_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":693 + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t + * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< + * #ctypedef npy_int96 int96_t + * #ctypedef npy_int128 int128_t + */ +typedef npy_int64 __pyx_t_5numpy_int64_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":697 + * #ctypedef npy_int128 int128_t + * + * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t + */ +typedef npy_uint8 __pyx_t_5numpy_uint8_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":698 + * + * ctypedef npy_uint8 uint8_t + * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< + * ctypedef npy_uint32 uint32_t + * ctypedef npy_uint64 uint64_t + */ +typedef npy_uint16 __pyx_t_5numpy_uint16_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":699 + * ctypedef npy_uint8 uint8_t + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< + * ctypedef npy_uint64 uint64_t + * #ctypedef npy_uint96 uint96_t + */ +typedef npy_uint32 __pyx_t_5numpy_uint32_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":700 + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t + * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< + * #ctypedef npy_uint96 uint96_t + * #ctypedef npy_uint128 uint128_t + */ +typedef npy_uint64 __pyx_t_5numpy_uint64_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":704 + * #ctypedef npy_uint128 uint128_t + * + * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< + * ctypedef npy_float64 float64_t + * #ctypedef npy_float80 float80_t + */ +typedef npy_float32 __pyx_t_5numpy_float32_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":705 + * + * ctypedef npy_float32 float32_t + * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< + * #ctypedef npy_float80 float80_t + * #ctypedef npy_float128 float128_t + */ +typedef npy_float64 __pyx_t_5numpy_float64_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":714 + * # The int types are mapped a bit surprising -- + * # numpy.int corresponds to 'l' and numpy.long to 'q' + * ctypedef npy_long int_t # <<<<<<<<<<<<<< + * ctypedef npy_longlong long_t + * ctypedef npy_longlong longlong_t + */ +typedef npy_long __pyx_t_5numpy_int_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":715 + * # numpy.int corresponds to 'l' and numpy.long to 'q' + * ctypedef npy_long int_t + * ctypedef npy_longlong long_t # <<<<<<<<<<<<<< + * ctypedef npy_longlong longlong_t + * + */ +typedef npy_longlong __pyx_t_5numpy_long_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":716 + * ctypedef npy_long int_t + * ctypedef npy_longlong long_t + * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< + * + * ctypedef npy_ulong uint_t + */ +typedef npy_longlong __pyx_t_5numpy_longlong_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":718 + * ctypedef npy_longlong longlong_t + * + * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<< + * ctypedef npy_ulonglong ulong_t + * ctypedef npy_ulonglong ulonglong_t + */ +typedef npy_ulong __pyx_t_5numpy_uint_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":719 + * + * ctypedef npy_ulong uint_t + * ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<< + * ctypedef npy_ulonglong ulonglong_t + * + */ +typedef npy_ulonglong __pyx_t_5numpy_ulong_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":720 + * ctypedef npy_ulong uint_t + * ctypedef npy_ulonglong ulong_t + * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< + * + * ctypedef npy_intp intp_t + */ +typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":722 + * ctypedef npy_ulonglong ulonglong_t + * + * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< + * ctypedef npy_uintp uintp_t + * + */ +typedef npy_intp __pyx_t_5numpy_intp_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":723 + * + * ctypedef npy_intp intp_t + * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< + * + * ctypedef npy_double float_t + */ +typedef npy_uintp __pyx_t_5numpy_uintp_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":725 + * ctypedef npy_uintp uintp_t + * + * ctypedef npy_double float_t # <<<<<<<<<<<<<< + * ctypedef npy_double double_t + * ctypedef npy_longdouble longdouble_t + */ +typedef npy_double __pyx_t_5numpy_float_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":726 + * + * ctypedef npy_double float_t + * ctypedef npy_double double_t # <<<<<<<<<<<<<< + * ctypedef npy_longdouble longdouble_t + * + */ +typedef npy_double __pyx_t_5numpy_double_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":727 + * ctypedef npy_double float_t + * ctypedef npy_double double_t + * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< + * + * ctypedef npy_cfloat cfloat_t + */ +typedef npy_longdouble __pyx_t_5numpy_longdouble_t; +/* Declarations.proto */ +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + typedef ::std::complex< float > __pyx_t_float_complex; + #else + typedef float _Complex __pyx_t_float_complex; + #endif +#else + typedef struct { float real, imag; } __pyx_t_float_complex; +#endif +static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float); + +/* Declarations.proto */ +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + typedef ::std::complex< double > __pyx_t_double_complex; + #else + typedef double _Complex __pyx_t_double_complex; + #endif +#else + typedef struct { double real, imag; } __pyx_t_double_complex; +#endif +static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double); + + +/*--- Type declarations ---*/ + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":729 + * ctypedef npy_longdouble longdouble_t + * + * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<< + * ctypedef npy_cdouble cdouble_t + * ctypedef npy_clongdouble clongdouble_t + */ +typedef npy_cfloat __pyx_t_5numpy_cfloat_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":730 + * + * ctypedef npy_cfloat cfloat_t + * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<< + * ctypedef npy_clongdouble clongdouble_t + * + */ +typedef npy_cdouble __pyx_t_5numpy_cdouble_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":731 + * ctypedef npy_cfloat cfloat_t + * ctypedef npy_cdouble cdouble_t + * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<< + * + * ctypedef npy_cdouble complex_t + */ +typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t; + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":733 + * ctypedef npy_clongdouble clongdouble_t + * + * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew1(a): + */ +typedef npy_cdouble __pyx_t_5numpy_complex_t; + +/* --- Runtime support code (head) --- */ +/* Refnanny.proto */ +#ifndef CYTHON_REFNANNY + #define CYTHON_REFNANNY 0 +#endif +#if CYTHON_REFNANNY + typedef struct { + void (*INCREF)(void*, PyObject*, int); + void (*DECREF)(void*, PyObject*, int); + void (*GOTREF)(void*, PyObject*, int); + void (*GIVEREF)(void*, PyObject*, int); + void* (*SetupContext)(const char*, int, const char*); + void (*FinishContext)(void**); + } __Pyx_RefNannyAPIStruct; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); + #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; +#ifdef WITH_THREAD + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + if (acquire_gil) {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ + PyGILState_Release(__pyx_gilstate_save);\ + } else {\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ + } +#else + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__) +#endif + #define __Pyx_RefNannyFinishContext()\ + __Pyx_RefNanny->FinishContext(&__pyx_refnanny) + #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0) + #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0) + #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0) + #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0) +#else + #define __Pyx_RefNannyDeclarations + #define __Pyx_RefNannySetupContext(name, acquire_gil) + #define __Pyx_RefNannyFinishContext() + #define __Pyx_INCREF(r) Py_INCREF(r) + #define __Pyx_DECREF(r) Py_DECREF(r) + #define __Pyx_GOTREF(r) + #define __Pyx_GIVEREF(r) + #define __Pyx_XINCREF(r) Py_XINCREF(r) + #define __Pyx_XDECREF(r) Py_XDECREF(r) + #define __Pyx_XGOTREF(r) + #define __Pyx_XGIVEREF(r) +#endif +#define __Pyx_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_XDECREF(tmp);\ + } while (0) +#define __Pyx_DECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_DECREF(tmp);\ + } while (0) +#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) +#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) + +/* RaiseArgTupleInvalid.proto */ +static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, + Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); + +/* RaiseDoubleKeywords.proto */ +static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); + +/* ParseKeywords.proto */ +static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\ + PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\ + const char* function_name); + +/* ArgTypeTest.proto */ +#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\ + ((likely((Py_TYPE(obj) == type) | (none_allowed && (obj == Py_None)))) ? 1 :\ + __Pyx__ArgTypeTest(obj, type, name, exact)) +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); + +/* IsLittleEndian.proto */ +static CYTHON_INLINE int __Pyx_Is_Little_Endian(void); + +/* BufferFormatCheck.proto */ +static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts); +static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx, + __Pyx_BufFmt_StackElem* stack, + __Pyx_TypeInfo* type); + +/* BufferGetAndValidate.proto */ +#define __Pyx_GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)\ + ((obj == Py_None || obj == NULL) ?\ + (__Pyx_ZeroBuffer(buf), 0) :\ + __Pyx__GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)) +static int __Pyx__GetBufferAndValidate(Py_buffer* buf, PyObject* obj, + __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack); +static void __Pyx_ZeroBuffer(Py_buffer* buf); +static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info); +static Py_ssize_t __Pyx_minusones[] = { -1, -1, -1, -1, -1, -1, -1, -1 }; +static Py_ssize_t __Pyx_zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + +/* PyObjectGetAttrStr.proto */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) +#endif + +/* GetBuiltinName.proto */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name); + +/* PyDictVersioning.proto */ +#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1) +#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\ + (version_var) = __PYX_GET_DICT_VERSION(dict);\ + (cache_var) = (value); +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\ + static PY_UINT64_T __pyx_dict_version = 0;\ + static PyObject *__pyx_dict_cached_value = NULL;\ + if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\ + (VAR) = __pyx_dict_cached_value;\ + } else {\ + (VAR) = __pyx_dict_cached_value = (LOOKUP);\ + __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\ + }\ +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj); +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj); +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version); +#else +#define __PYX_GET_DICT_VERSION(dict) (0) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var) +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP); +#endif + +/* GetModuleGlobalName.proto */ +#if CYTHON_USE_DICT_VERSIONS +#define __Pyx_GetModuleGlobalName(var, name) {\ + static PY_UINT64_T __pyx_dict_version = 0;\ + static PyObject *__pyx_dict_cached_value = NULL;\ + (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\ + (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\ + __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ +} +#define __Pyx_GetModuleGlobalNameUncached(var, name) {\ + PY_UINT64_T __pyx_dict_version;\ + PyObject *__pyx_dict_cached_value;\ + (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ +} +static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value); +#else +#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name) +#define __Pyx_GetModuleGlobalNameUncached(var, name) (var) = __Pyx__GetModuleGlobalName(name) +static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name); +#endif + +/* PyCFunctionFastCall.proto */ +#if CYTHON_FAST_PYCCALL +static CYTHON_INLINE PyObject *__Pyx_PyCFunction_FastCall(PyObject *func, PyObject **args, Py_ssize_t nargs); +#else +#define __Pyx_PyCFunction_FastCall(func, args, nargs) (assert(0), NULL) +#endif + +/* PyFunctionFastCall.proto */ +#if CYTHON_FAST_PYCALL +#define __Pyx_PyFunction_FastCall(func, args, nargs)\ + __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL) +#if 1 || PY_VERSION_HEX < 0x030600B1 +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs); +#else +#define __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs) _PyFunction_FastCallDict(func, args, nargs, kwargs) +#endif +#define __Pyx_BUILD_ASSERT_EXPR(cond)\ + (sizeof(char [1 - 2*!(cond)]) - 1) +#ifndef Py_MEMBER_SIZE +#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) +#endif + static size_t __pyx_pyframe_localsplus_offset = 0; + #include "frameobject.h" + #define __Pxy_PyFrame_Initialize_Offsets()\ + ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\ + (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus))) + #define __Pyx_PyFrame_GetLocalsplus(frame)\ + (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset)) +#endif + +/* PyObjectCall.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); +#else +#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) +#endif + +/* PyObjectCall2Args.proto */ +static CYTHON_UNUSED PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2); + +/* PyObjectCallMethO.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +/* PyObjectCallOneArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + +/* ExtTypeTest.proto */ +static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); + +/* GetItemInt.proto */ +#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\ + (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\ + __Pyx_GetItemInt_Generic(o, to_py_func(i)))) +#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j); +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, + int is_list, int wraparound, int boundscheck); + +/* PyThreadStateGet.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; +#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; +#define __Pyx_PyErr_Occurred() __pyx_tstate->curexc_type +#else +#define __Pyx_PyThreadState_declare +#define __Pyx_PyThreadState_assign +#define __Pyx_PyErr_Occurred() PyErr_Occurred() +#endif + +/* PyErrFetchRestore.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) +#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) +#else +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#endif +#else +#define __Pyx_PyErr_Clear() PyErr_Clear() +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) +#endif + +/* GetTopmostException.proto */ +#if CYTHON_USE_EXC_INFO_STACK +static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate); +#endif + +/* SaveResetException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +#else +#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb) +#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb) +#endif + +/* PyErrExceptionMatches.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err) +static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err); +#else +#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err) +#endif + +/* GetException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_GetException(type, value, tb) __Pyx__GetException(__pyx_tstate, type, value, tb) +static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#else +static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb); +#endif + +/* RaiseException.proto */ +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); + +/* TypeImport.proto */ +#ifndef __PYX_HAVE_RT_ImportType_proto +#define __PYX_HAVE_RT_ImportType_proto +enum __Pyx_ImportType_CheckSize { + __Pyx_ImportType_CheckSize_Error = 0, + __Pyx_ImportType_CheckSize_Warn = 1, + __Pyx_ImportType_CheckSize_Ignore = 2 +}; +static PyTypeObject *__Pyx_ImportType(PyObject* module, const char *module_name, const char *class_name, size_t size, enum __Pyx_ImportType_CheckSize check_size); +#endif + +/* Import.proto */ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); + +/* CLineInTraceback.proto */ +#ifdef CYTHON_CLINE_IN_TRACEBACK +#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) +#else +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); +#endif + +/* CodeObjectCache.proto */ +typedef struct { + PyCodeObject* code_object; + int code_line; +} __Pyx_CodeObjectCacheEntry; +struct __Pyx_CodeObjectCache { + int count; + int max_count; + __Pyx_CodeObjectCacheEntry* entries; +}; +static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); +static PyCodeObject *__pyx_find_code_object(int code_line); +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); + +/* AddTraceback.proto */ +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename); + +/* BufferStructDeclare.proto */ +typedef struct { + Py_ssize_t shape, strides, suboffsets; +} __Pyx_Buf_DimInfo; +typedef struct { + size_t refcount; + Py_buffer pybuffer; +} __Pyx_Buffer; +typedef struct { + __Pyx_Buffer *rcbuffer; + char *data; + __Pyx_Buf_DimInfo diminfo[8]; +} __Pyx_LocalBuf_ND; + +#if PY_MAJOR_VERSION < 3 + static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags); + static void __Pyx_ReleaseBuffer(Py_buffer *view); +#else + #define __Pyx_GetBuffer PyObject_GetBuffer + #define __Pyx_ReleaseBuffer PyBuffer_Release +#endif + + +/* GCCDiagnostics.proto */ +#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) +#define __Pyx_HAS_GCC_DIAGNOSTIC +#endif + +/* RealImag.proto */ +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + #define __Pyx_CREAL(z) ((z).real()) + #define __Pyx_CIMAG(z) ((z).imag()) + #else + #define __Pyx_CREAL(z) (__real__(z)) + #define __Pyx_CIMAG(z) (__imag__(z)) + #endif +#else + #define __Pyx_CREAL(z) ((z).real) + #define __Pyx_CIMAG(z) ((z).imag) +#endif +#if defined(__cplusplus) && CYTHON_CCOMPLEX\ + && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103) + #define __Pyx_SET_CREAL(z,x) ((z).real(x)) + #define __Pyx_SET_CIMAG(z,y) ((z).imag(y)) +#else + #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x) + #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y) +#endif + +/* Arithmetic.proto */ +#if CYTHON_CCOMPLEX + #define __Pyx_c_eq_float(a, b) ((a)==(b)) + #define __Pyx_c_sum_float(a, b) ((a)+(b)) + #define __Pyx_c_diff_float(a, b) ((a)-(b)) + #define __Pyx_c_prod_float(a, b) ((a)*(b)) + #define __Pyx_c_quot_float(a, b) ((a)/(b)) + #define __Pyx_c_neg_float(a) (-(a)) + #ifdef __cplusplus + #define __Pyx_c_is_zero_float(z) ((z)==(float)0) + #define __Pyx_c_conj_float(z) (::std::conj(z)) + #if 1 + #define __Pyx_c_abs_float(z) (::std::abs(z)) + #define __Pyx_c_pow_float(a, b) (::std::pow(a, b)) + #endif + #else + #define __Pyx_c_is_zero_float(z) ((z)==0) + #define __Pyx_c_conj_float(z) (conjf(z)) + #if 1 + #define __Pyx_c_abs_float(z) (cabsf(z)) + #define __Pyx_c_pow_float(a, b) (cpowf(a, b)) + #endif + #endif +#else + static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex); + static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex); + #if 1 + static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex, __pyx_t_float_complex); + #endif +#endif + +/* Arithmetic.proto */ +#if CYTHON_CCOMPLEX + #define __Pyx_c_eq_double(a, b) ((a)==(b)) + #define __Pyx_c_sum_double(a, b) ((a)+(b)) + #define __Pyx_c_diff_double(a, b) ((a)-(b)) + #define __Pyx_c_prod_double(a, b) ((a)*(b)) + #define __Pyx_c_quot_double(a, b) ((a)/(b)) + #define __Pyx_c_neg_double(a) (-(a)) + #ifdef __cplusplus + #define __Pyx_c_is_zero_double(z) ((z)==(double)0) + #define __Pyx_c_conj_double(z) (::std::conj(z)) + #if 1 + #define __Pyx_c_abs_double(z) (::std::abs(z)) + #define __Pyx_c_pow_double(a, b) (::std::pow(a, b)) + #endif + #else + #define __Pyx_c_is_zero_double(z) ((z)==0) + #define __Pyx_c_conj_double(z) (conj(z)) + #if 1 + #define __Pyx_c_abs_double(z) (cabs(z)) + #define __Pyx_c_pow_double(a, b) (cpow(a, b)) + #endif + #endif +#else + static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex); + static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex); + #if 1 + static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex, __pyx_t_double_complex); + #endif +#endif + +/* CIntFromPy.proto */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); + +/* CIntFromPy.proto */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); + +/* FastTypeChecks.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); +#else +#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) +#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) +#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) +#endif +#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) + +/* CheckBinaryVersion.proto */ +static int __Pyx_check_binary_version(void); + +/* InitStrings.proto */ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); + + +/* Module declarations from 'cpython.buffer' */ + +/* Module declarations from 'libc.string' */ + +/* Module declarations from 'libc.stdio' */ + +/* Module declarations from '__builtin__' */ + +/* Module declarations from 'cpython.type' */ +static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0; + +/* Module declarations from 'cpython' */ + +/* Module declarations from 'cpython.object' */ + +/* Module declarations from 'cpython.ref' */ + +/* Module declarations from 'cpython.mem' */ + +/* Module declarations from 'numpy' */ + +/* Module declarations from 'numpy' */ +static PyTypeObject *__pyx_ptype_5numpy_dtype = 0; +static PyTypeObject *__pyx_ptype_5numpy_flatiter = 0; +static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0; +static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0; +static PyTypeObject *__pyx_ptype_5numpy_generic = 0; +static PyTypeObject *__pyx_ptype_5numpy_number = 0; +static PyTypeObject *__pyx_ptype_5numpy_integer = 0; +static PyTypeObject *__pyx_ptype_5numpy_signedinteger = 0; +static PyTypeObject *__pyx_ptype_5numpy_unsignedinteger = 0; +static PyTypeObject *__pyx_ptype_5numpy_inexact = 0; +static PyTypeObject *__pyx_ptype_5numpy_floating = 0; +static PyTypeObject *__pyx_ptype_5numpy_complexfloating = 0; +static PyTypeObject *__pyx_ptype_5numpy_flexible = 0; +static PyTypeObject *__pyx_ptype_5numpy_character = 0; +static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0; +static CYTHON_INLINE int __pyx_f_5numpy_import_array(void); /*proto*/ + +/* Module declarations from 'mmcv._flow_warp_ext' */ +static __Pyx_TypeInfo __Pyx_TypeInfo_double = { "double", NULL, sizeof(double), { 0 }, 0, 'R', 0, 0 }; +#define __Pyx_MODULE_NAME "mmcv._flow_warp_ext" +extern int __pyx_module_is_main_mmcv___flow_warp_ext; +int __pyx_module_is_main_mmcv___flow_warp_ext = 0; + +/* Implementation of 'mmcv._flow_warp_ext' */ +static PyObject *__pyx_builtin_ImportError; +static const char __pyx_k_Hi[] = "Hi"; +static const char __pyx_k_np[] = "np"; +static const char __pyx_k_main[] = "__main__"; +static const char __pyx_k_name[] = "__name__"; +static const char __pyx_k_test[] = "__test__"; +static const char __pyx_k_STUFF[] = "STUFF"; +static const char __pyx_k_numpy[] = "numpy"; +static const char __pyx_k_shape[] = "shape"; +static const char __pyx_k_import[] = "__import__"; +static const char __pyx_k_img_array[] = "img_array"; +static const char __pyx_k_out_array[] = "out_array"; +static const char __pyx_k_flow_array[] = "flow_array"; +static const char __pyx_k_zeros_like[] = "zeros_like"; +static const char __pyx_k_ImportError[] = "ImportError"; +static const char __pyx_k_flow_warp_c[] = "flow_warp_c"; +static const char __pyx_k_filling_value[] = "filling_value"; +static const char __pyx_k_interpolate_mode[] = "interpolate_mode"; +static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; +static const char __pyx_k_mmcv__flow_warp_ext[] = "mmcv._flow_warp_ext"; +static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import"; +static const char __pyx_k_mmcv_video_optflow_warp_flow_war[] = "mmcv/video/optflow_warp/flow_warp_module.pyx"; +static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import"; +static PyObject *__pyx_n_u_Hi; +static PyObject *__pyx_n_s_ImportError; +static PyObject *__pyx_n_s_STUFF; +static PyObject *__pyx_n_s_cline_in_traceback; +static PyObject *__pyx_n_s_filling_value; +static PyObject *__pyx_n_s_flow_array; +static PyObject *__pyx_n_s_flow_warp_c; +static PyObject *__pyx_n_s_img_array; +static PyObject *__pyx_n_s_import; +static PyObject *__pyx_n_s_interpolate_mode; +static PyObject *__pyx_n_s_main; +static PyObject *__pyx_n_s_mmcv__flow_warp_ext; +static PyObject *__pyx_kp_s_mmcv_video_optflow_warp_flow_war; +static PyObject *__pyx_n_s_name; +static PyObject *__pyx_n_s_np; +static PyObject *__pyx_n_s_numpy; +static PyObject *__pyx_kp_u_numpy_core_multiarray_failed_to; +static PyObject *__pyx_kp_u_numpy_core_umath_failed_to_impor; +static PyObject *__pyx_n_s_out_array; +static PyObject *__pyx_n_s_shape; +static PyObject *__pyx_n_s_test; +static PyObject *__pyx_n_s_zeros_like; +static PyObject *__pyx_pf_4mmcv_14_flow_warp_ext_flow_warp_c(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_img_array, PyArrayObject *__pyx_v_flow_array, int __pyx_v_filling_value, int __pyx_v_interpolate_mode); /* proto */ +static PyObject *__pyx_tuple_; +static PyObject *__pyx_tuple__2; +static PyObject *__pyx_tuple__3; +static PyObject *__pyx_codeobj__4; +/* Late includes */ + +/* "mmcv/video/optflow_warp/flow_warp_module.pyx":13 + * void FlowWarp(double* img, double* flow1, double* out, const int height, const int width, const int channels, const int filling_value, const int interpolateMode) + * + * def flow_warp_c(np.ndarray[double, ndim=3, mode="c"] img_array not None, # <<<<<<<<<<<<<< + * np.ndarray[double, ndim=3, mode="c"] flow_array not None, + * int filling_value=0, + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4mmcv_14_flow_warp_ext_1flow_warp_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_4mmcv_14_flow_warp_ext_1flow_warp_c = {"flow_warp_c", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_4mmcv_14_flow_warp_ext_1flow_warp_c, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_4mmcv_14_flow_warp_ext_1flow_warp_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyArrayObject *__pyx_v_img_array = 0; + PyArrayObject *__pyx_v_flow_array = 0; + int __pyx_v_filling_value; + int __pyx_v_interpolate_mode; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("flow_warp_c (wrapper)", 0); + { + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_img_array,&__pyx_n_s_flow_array,&__pyx_n_s_filling_value,&__pyx_n_s_interpolate_mode,0}; + PyObject* values[4] = {0,0,0,0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + CYTHON_FALLTHROUGH; + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_img_array)) != 0)) kw_args--; + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_flow_array)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("flow_warp_c", 0, 2, 4, 1); __PYX_ERR(0, 13, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 2: + if (kw_args > 0) { + PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_filling_value); + if (value) { values[2] = value; kw_args--; } + } + CYTHON_FALLTHROUGH; + case 3: + if (kw_args > 0) { + PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_interpolate_mode); + if (value) { values[3] = value; kw_args--; } + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "flow_warp_c") < 0)) __PYX_ERR(0, 13, __pyx_L3_error) + } + } else { + switch (PyTuple_GET_SIZE(__pyx_args)) { + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + CYTHON_FALLTHROUGH; + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + break; + default: goto __pyx_L5_argtuple_error; + } + } + __pyx_v_img_array = ((PyArrayObject *)values[0]); + __pyx_v_flow_array = ((PyArrayObject *)values[1]); + if (values[2]) { + __pyx_v_filling_value = __Pyx_PyInt_As_int(values[2]); if (unlikely((__pyx_v_filling_value == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error) + } else { + __pyx_v_filling_value = ((int)0); + } + if (values[3]) { + __pyx_v_interpolate_mode = __Pyx_PyInt_As_int(values[3]); if (unlikely((__pyx_v_interpolate_mode == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 16, __pyx_L3_error) + } else { + __pyx_v_interpolate_mode = ((int)1); + } + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("flow_warp_c", 0, 2, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 13, __pyx_L3_error) + __pyx_L3_error:; + __Pyx_AddTraceback("mmcv._flow_warp_ext.flow_warp_c", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_img_array), __pyx_ptype_5numpy_ndarray, 0, "img_array", 0))) __PYX_ERR(0, 13, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_flow_array), __pyx_ptype_5numpy_ndarray, 0, "flow_array", 0))) __PYX_ERR(0, 14, __pyx_L1_error) + __pyx_r = __pyx_pf_4mmcv_14_flow_warp_ext_flow_warp_c(__pyx_self, __pyx_v_img_array, __pyx_v_flow_array, __pyx_v_filling_value, __pyx_v_interpolate_mode); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4mmcv_14_flow_warp_ext_flow_warp_c(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_img_array, PyArrayObject *__pyx_v_flow_array, int __pyx_v_filling_value, int __pyx_v_interpolate_mode) { + PyObject *__pyx_v_out_array = NULL; + __Pyx_LocalBuf_ND __pyx_pybuffernd_flow_array; + __Pyx_Buffer __pyx_pybuffer_flow_array; + __Pyx_LocalBuf_ND __pyx_pybuffernd_img_array; + __Pyx_Buffer __pyx_pybuffer_img_array; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + int __pyx_t_5; + int __pyx_t_6; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("flow_warp_c", 0); + __pyx_pybuffer_img_array.pybuffer.buf = NULL; + __pyx_pybuffer_img_array.refcount = 0; + __pyx_pybuffernd_img_array.data = NULL; + __pyx_pybuffernd_img_array.rcbuffer = &__pyx_pybuffer_img_array; + __pyx_pybuffer_flow_array.pybuffer.buf = NULL; + __pyx_pybuffer_flow_array.refcount = 0; + __pyx_pybuffernd_flow_array.data = NULL; + __pyx_pybuffernd_flow_array.rcbuffer = &__pyx_pybuffer_flow_array; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_img_array.rcbuffer->pybuffer, (PyObject*)__pyx_v_img_array, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 3, 0, __pyx_stack) == -1)) __PYX_ERR(0, 13, __pyx_L1_error) + } + __pyx_pybuffernd_img_array.diminfo[0].strides = __pyx_pybuffernd_img_array.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_img_array.diminfo[0].shape = __pyx_pybuffernd_img_array.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_img_array.diminfo[1].strides = __pyx_pybuffernd_img_array.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_img_array.diminfo[1].shape = __pyx_pybuffernd_img_array.rcbuffer->pybuffer.shape[1]; __pyx_pybuffernd_img_array.diminfo[2].strides = __pyx_pybuffernd_img_array.rcbuffer->pybuffer.strides[2]; __pyx_pybuffernd_img_array.diminfo[2].shape = __pyx_pybuffernd_img_array.rcbuffer->pybuffer.shape[2]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_flow_array.rcbuffer->pybuffer, (PyObject*)__pyx_v_flow_array, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_C_CONTIGUOUS, 3, 0, __pyx_stack) == -1)) __PYX_ERR(0, 13, __pyx_L1_error) + } + __pyx_pybuffernd_flow_array.diminfo[0].strides = __pyx_pybuffernd_flow_array.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_flow_array.diminfo[0].shape = __pyx_pybuffernd_flow_array.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_flow_array.diminfo[1].strides = __pyx_pybuffernd_flow_array.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_flow_array.diminfo[1].shape = __pyx_pybuffernd_flow_array.rcbuffer->pybuffer.shape[1]; __pyx_pybuffernd_flow_array.diminfo[2].strides = __pyx_pybuffernd_flow_array.rcbuffer->pybuffer.strides[2]; __pyx_pybuffernd_flow_array.diminfo[2].shape = __pyx_pybuffernd_flow_array.rcbuffer->pybuffer.shape[2]; + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":18 + * int interpolate_mode=1): + * + * out_array = np.zeros_like(img_array) # <<<<<<<<<<<<<< + * + * FlowWarp( np.PyArray_DATA(img_array), + */ + __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros_like); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = NULL; + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) { + __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3); + if (likely(__pyx_t_2)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); + __Pyx_INCREF(__pyx_t_2); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_3, function); + } + } + __pyx_t_1 = (__pyx_t_2) ? __Pyx_PyObject_Call2Args(__pyx_t_3, __pyx_t_2, ((PyObject *)__pyx_v_img_array)) : __Pyx_PyObject_CallOneArg(__pyx_t_3, ((PyObject *)__pyx_v_img_array)); + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_out_array = __pyx_t_1; + __pyx_t_1 = 0; + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":22 + * FlowWarp( np.PyArray_DATA(img_array), + * np.PyArray_DATA(flow_array), + * np.PyArray_DATA(out_array), # <<<<<<<<<<<<<< + * out_array.shape[0], + * out_array.shape[1], + */ + if (!(likely(((__pyx_v_out_array) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_out_array, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 22, __pyx_L1_error) + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":23 + * np.PyArray_DATA(flow_array), + * np.PyArray_DATA(out_array), + * out_array.shape[0], # <<<<<<<<<<<<<< + * out_array.shape[1], + * out_array.shape[2], + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_out_array, __pyx_n_s_shape); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 23, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_1, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 23, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_4 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 23, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":24 + * np.PyArray_DATA(out_array), + * out_array.shape[0], + * out_array.shape[1], # <<<<<<<<<<<<<< + * out_array.shape[2], + * filling_value, + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_out_array, __pyx_n_s_shape); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 24, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_GetItemInt(__pyx_t_3, 1, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 24, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 24, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":25 + * out_array.shape[0], + * out_array.shape[1], + * out_array.shape[2], # <<<<<<<<<<<<<< + * filling_value, + * interpolate_mode) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_out_array, __pyx_n_s_shape); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 25, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_1, 2, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 25, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_6 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_6 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 25, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":20 + * out_array = np.zeros_like(img_array) + * + * FlowWarp( np.PyArray_DATA(img_array), # <<<<<<<<<<<<<< + * np.PyArray_DATA(flow_array), + * np.PyArray_DATA(out_array), + */ + FlowWarp(((double *)PyArray_DATA(((PyArrayObject *)__pyx_v_img_array))), ((double *)PyArray_DATA(((PyArrayObject *)__pyx_v_flow_array))), ((double *)PyArray_DATA(((PyArrayObject *)__pyx_v_out_array))), __pyx_t_4, __pyx_t_5, __pyx_t_6, __pyx_v_filling_value, __pyx_v_interpolate_mode); + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":29 + * interpolate_mode) + * + * return out_array # <<<<<<<<<<<<<< + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_out_array); + __pyx_r = __pyx_v_out_array; + goto __pyx_L0; + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":13 + * void FlowWarp(double* img, double* flow1, double* out, const int height, const int width, const int channels, const int filling_value, const int interpolateMode) + * + * def flow_warp_c(np.ndarray[double, ndim=3, mode="c"] img_array not None, # <<<<<<<<<<<<<< + * np.ndarray[double, ndim=3, mode="c"] flow_array not None, + * int filling_value=0, + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_flow_array.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_img_array.rcbuffer->pybuffer); + __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} + __Pyx_AddTraceback("mmcv._flow_warp_ext.flow_warp_c", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + goto __pyx_L2; + __pyx_L0:; + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_flow_array.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_img_array.rcbuffer->pybuffer); + __pyx_L2:; + __Pyx_XDECREF(__pyx_v_out_array); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":735 + * ctypedef npy_cdouble complex_t + * + * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(1, a) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":736 + * + * cdef inline object PyArray_MultiIterNew1(a): + * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew2(a, b): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 736, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":735 + * ctypedef npy_cdouble complex_t + * + * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(1, a) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":738 + * return PyArray_MultiIterNew(1, a) + * + * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(2, a, b) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":739 + * + * cdef inline object PyArray_MultiIterNew2(a, b): + * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 739, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":738 + * return PyArray_MultiIterNew(1, a) + * + * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(2, a, b) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":741 + * return PyArray_MultiIterNew(2, a, b) + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(3, a, b, c) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":742 + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): + * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 742, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":741 + * return PyArray_MultiIterNew(2, a, b) + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(3, a, b, c) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":744 + * return PyArray_MultiIterNew(3, a, b, c) + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(4, a, b, c, d) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":745 + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): + * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 745, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":744 + * return PyArray_MultiIterNew(3, a, b, c) + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(4, a, b, c, d) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":747 + * return PyArray_MultiIterNew(4, a, b, c, d) + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":748 + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): + * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 748, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":747 + * return PyArray_MultiIterNew(4, a, b, c, d) + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":750 + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__pyx_v_d) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + __Pyx_RefNannySetupContext("PyDataType_SHAPE", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":751 + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< + * return d.subarray.shape + * else: + */ + __pyx_t_1 = (PyDataType_HASSUBARRAY(__pyx_v_d) != 0); + if (__pyx_t_1) { + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":752 + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape # <<<<<<<<<<<<<< + * else: + * return () + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(((PyObject*)__pyx_v_d->subarray->shape)); + __pyx_r = ((PyObject*)__pyx_v_d->subarray->shape); + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":751 + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< + * return d.subarray.shape + * else: + */ + } + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":754 + * return d.subarray.shape + * else: + * return () # <<<<<<<<<<<<<< + * + * + */ + /*else*/ { + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_empty_tuple); + __pyx_r = __pyx_empty_tuple; + goto __pyx_L0; + } + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":750 + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":931 + * int _import_umath() except -1 + * + * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< + * Py_INCREF(base) # important to do this before stealing the reference below! + * PyArray_SetBaseObject(arr, base) + */ + +static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("set_array_base", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":932 + * + * cdef inline void set_array_base(ndarray arr, object base): + * Py_INCREF(base) # important to do this before stealing the reference below! # <<<<<<<<<<<<<< + * PyArray_SetBaseObject(arr, base) + * + */ + Py_INCREF(__pyx_v_base); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":933 + * cdef inline void set_array_base(ndarray arr, object base): + * Py_INCREF(base) # important to do this before stealing the reference below! + * PyArray_SetBaseObject(arr, base) # <<<<<<<<<<<<<< + * + * cdef inline object get_array_base(ndarray arr): + */ + (void)(PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base)); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":931 + * int _import_umath() except -1 + * + * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< + * Py_INCREF(base) # important to do this before stealing the reference below! + * PyArray_SetBaseObject(arr, base) + */ + + /* function exit code */ + __Pyx_RefNannyFinishContext(); +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":935 + * PyArray_SetBaseObject(arr, base) + * + * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< + * base = PyArray_BASE(arr) + * if base is NULL: + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) { + PyObject *__pyx_v_base; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + __Pyx_RefNannySetupContext("get_array_base", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":936 + * + * cdef inline object get_array_base(ndarray arr): + * base = PyArray_BASE(arr) # <<<<<<<<<<<<<< + * if base is NULL: + * return None + */ + __pyx_v_base = PyArray_BASE(__pyx_v_arr); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":937 + * cdef inline object get_array_base(ndarray arr): + * base = PyArray_BASE(arr) + * if base is NULL: # <<<<<<<<<<<<<< + * return None + * return base + */ + __pyx_t_1 = ((__pyx_v_base == NULL) != 0); + if (__pyx_t_1) { + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":938 + * base = PyArray_BASE(arr) + * if base is NULL: + * return None # <<<<<<<<<<<<<< + * return base + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":937 + * cdef inline object get_array_base(ndarray arr): + * base = PyArray_BASE(arr) + * if base is NULL: # <<<<<<<<<<<<<< + * return None + * return base + */ + } + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":939 + * if base is NULL: + * return None + * return base # <<<<<<<<<<<<<< + * + * # Versions of the import_* functions which are more suitable for + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(((PyObject *)__pyx_v_base)); + __pyx_r = ((PyObject *)__pyx_v_base); + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":935 + * PyArray_SetBaseObject(arr, base) + * + * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< + * base = PyArray_BASE(arr) + * if base is NULL: + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":943 + * # Versions of the import_* functions which are more suitable for + * # Cython code. + * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< + * try: + * __pyx_import_array() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("import_array", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":944 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * __pyx_import_array() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":945 + * cdef inline int import_array() except -1: + * try: + * __pyx_import_array() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy.core.multiarray failed to import") + */ + __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 945, __pyx_L3_error) + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":944 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * __pyx_import_array() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":946 + * try: + * __pyx_import_array() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy.core.multiarray failed to import") + * + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 946, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GOTREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_t_7); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":947 + * __pyx_import_array() + * except Exception: + * raise ImportError("numpy.core.multiarray failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_umath() except -1: + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 947, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 947, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + __pyx_L5_except_error:; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":944 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * __pyx_import_array() + * except Exception: + */ + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":943 + * # Versions of the import_* functions which are more suitable for + * # Cython code. + * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< + * try: + * __pyx_import_array() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":949 + * raise ImportError("numpy.core.multiarray failed to import") + * + * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("import_umath", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":950 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":951 + * cdef inline int import_umath() except -1: + * try: + * _import_umath() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy.core.umath failed to import") + */ + __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 951, __pyx_L3_error) + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":950 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":952 + * try: + * _import_umath() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy.core.umath failed to import") + * + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 952, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GOTREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_t_7); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":953 + * _import_umath() + * except Exception: + * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_ufunc() except -1: + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 953, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 953, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + __pyx_L5_except_error:; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":950 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":949 + * raise ImportError("numpy.core.multiarray failed to import") + * + * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":955 + * raise ImportError("numpy.core.umath failed to import") + * + * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("import_ufunc", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":956 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":957 + * cdef inline int import_ufunc() except -1: + * try: + * _import_umath() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy.core.umath failed to import") + */ + __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 957, __pyx_L3_error) + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":956 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":958 + * try: + * _import_umath() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy.core.umath failed to import") + * + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 958, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GOTREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_t_7); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":959 + * _import_umath() + * except Exception: + * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< + * + * cdef extern from *: + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 959, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 959, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + __pyx_L5_except_error:; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":956 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":955 + * raise ImportError("numpy.core.umath failed to import") + * + * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":969 + * + * + * cdef inline bint is_timedelta64_object(object obj): # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.timedelta64)` + */ + +static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_obj) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_timedelta64_object", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":981 + * bool + * """ + * return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyTimedeltaArrType_Type)); + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":969 + * + * + * cdef inline bint is_timedelta64_object(object obj): # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.timedelta64)` + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":984 + * + * + * cdef inline bint is_datetime64_object(object obj): # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.datetime64)` + */ + +static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_obj) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_datetime64_object", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":996 + * bool + * """ + * return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyDatetimeArrType_Type)); + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":984 + * + * + * cdef inline bint is_datetime64_object(object obj): # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.datetime64)` + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":999 + * + * + * cdef inline npy_datetime get_datetime64_value(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy datetime64 object + */ + +static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject *__pyx_v_obj) { + npy_datetime __pyx_r; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":1006 + * also needed. That can be found using `get_datetime64_unit`. + * """ + * return (obj).obval # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = ((PyDatetimeScalarObject *)__pyx_v_obj)->obval; + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":999 + * + * + * cdef inline npy_datetime get_datetime64_value(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy datetime64 object + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":1009 + * + * + * cdef inline npy_timedelta get_timedelta64_value(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy timedelta64 object + */ + +static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject *__pyx_v_obj) { + npy_timedelta __pyx_r; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":1013 + * returns the int64 value underlying scalar numpy timedelta64 object + * """ + * return (obj).obval # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = ((PyTimedeltaScalarObject *)__pyx_v_obj)->obval; + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":1009 + * + * + * cdef inline npy_timedelta get_timedelta64_value(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy timedelta64 object + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":1016 + * + * + * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the unit part of the dtype for a numpy datetime64 object. + */ + +static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit(PyObject *__pyx_v_obj) { + NPY_DATETIMEUNIT __pyx_r; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":1020 + * returns the unit part of the dtype for a numpy datetime64 object. + * """ + * return (obj).obmeta.base # <<<<<<<<<<<<<< + */ + __pyx_r = ((NPY_DATETIMEUNIT)((PyDatetimeScalarObject *)__pyx_v_obj)->obmeta.base); + goto __pyx_L0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":1016 + * + * + * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the unit part of the dtype for a numpy datetime64 object. + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +static PyMethodDef __pyx_methods[] = { + {0, 0, 0, 0} +}; + +#if PY_MAJOR_VERSION >= 3 +#if CYTHON_PEP489_MULTI_PHASE_INIT +static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ +static int __pyx_pymod_exec__flow_warp_ext(PyObject* module); /*proto*/ +static PyModuleDef_Slot __pyx_moduledef_slots[] = { + {Py_mod_create, (void*)__pyx_pymod_create}, + {Py_mod_exec, (void*)__pyx_pymod_exec__flow_warp_ext}, + {0, NULL} +}; +#endif + +static struct PyModuleDef __pyx_moduledef = { + PyModuleDef_HEAD_INIT, + "_flow_warp_ext", + 0, /* m_doc */ + #if CYTHON_PEP489_MULTI_PHASE_INIT + 0, /* m_size */ + #else + -1, /* m_size */ + #endif + __pyx_methods /* m_methods */, + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_moduledef_slots, /* m_slots */ + #else + NULL, /* m_reload */ + #endif + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ +}; +#endif +#ifndef CYTHON_SMALL_CODE +#if defined(__clang__) + #define CYTHON_SMALL_CODE +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define CYTHON_SMALL_CODE __attribute__((cold)) +#else + #define CYTHON_SMALL_CODE +#endif +#endif + +static __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_n_u_Hi, __pyx_k_Hi, sizeof(__pyx_k_Hi), 0, 1, 0, 1}, + {&__pyx_n_s_ImportError, __pyx_k_ImportError, sizeof(__pyx_k_ImportError), 0, 0, 1, 1}, + {&__pyx_n_s_STUFF, __pyx_k_STUFF, sizeof(__pyx_k_STUFF), 0, 0, 1, 1}, + {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, + {&__pyx_n_s_filling_value, __pyx_k_filling_value, sizeof(__pyx_k_filling_value), 0, 0, 1, 1}, + {&__pyx_n_s_flow_array, __pyx_k_flow_array, sizeof(__pyx_k_flow_array), 0, 0, 1, 1}, + {&__pyx_n_s_flow_warp_c, __pyx_k_flow_warp_c, sizeof(__pyx_k_flow_warp_c), 0, 0, 1, 1}, + {&__pyx_n_s_img_array, __pyx_k_img_array, sizeof(__pyx_k_img_array), 0, 0, 1, 1}, + {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, + {&__pyx_n_s_interpolate_mode, __pyx_k_interpolate_mode, sizeof(__pyx_k_interpolate_mode), 0, 0, 1, 1}, + {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_n_s_mmcv__flow_warp_ext, __pyx_k_mmcv__flow_warp_ext, sizeof(__pyx_k_mmcv__flow_warp_ext), 0, 0, 1, 1}, + {&__pyx_kp_s_mmcv_video_optflow_warp_flow_war, __pyx_k_mmcv_video_optflow_warp_flow_war, sizeof(__pyx_k_mmcv_video_optflow_warp_flow_war), 0, 0, 1, 0}, + {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, + {&__pyx_n_s_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 0, 1, 1}, + {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1}, + {&__pyx_kp_u_numpy_core_multiarray_failed_to, __pyx_k_numpy_core_multiarray_failed_to, sizeof(__pyx_k_numpy_core_multiarray_failed_to), 0, 1, 0, 0}, + {&__pyx_kp_u_numpy_core_umath_failed_to_impor, __pyx_k_numpy_core_umath_failed_to_impor, sizeof(__pyx_k_numpy_core_umath_failed_to_impor), 0, 1, 0, 0}, + {&__pyx_n_s_out_array, __pyx_k_out_array, sizeof(__pyx_k_out_array), 0, 0, 1, 1}, + {&__pyx_n_s_shape, __pyx_k_shape, sizeof(__pyx_k_shape), 0, 0, 1, 1}, + {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_n_s_zeros_like, __pyx_k_zeros_like, sizeof(__pyx_k_zeros_like), 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0} +}; +static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) { + __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(1, 947, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} + +static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":947 + * __pyx_import_array() + * except Exception: + * raise ImportError("numpy.core.multiarray failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_umath() except -1: + */ + __pyx_tuple_ = PyTuple_Pack(1, __pyx_kp_u_numpy_core_multiarray_failed_to); if (unlikely(!__pyx_tuple_)) __PYX_ERR(1, 947, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple_); + __Pyx_GIVEREF(__pyx_tuple_); + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":953 + * _import_umath() + * except Exception: + * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_ufunc() except -1: + */ + __pyx_tuple__2 = PyTuple_Pack(1, __pyx_kp_u_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(1, 953, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__2); + __Pyx_GIVEREF(__pyx_tuple__2); + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":13 + * void FlowWarp(double* img, double* flow1, double* out, const int height, const int width, const int channels, const int filling_value, const int interpolateMode) + * + * def flow_warp_c(np.ndarray[double, ndim=3, mode="c"] img_array not None, # <<<<<<<<<<<<<< + * np.ndarray[double, ndim=3, mode="c"] flow_array not None, + * int filling_value=0, + */ + __pyx_tuple__3 = PyTuple_Pack(5, __pyx_n_s_img_array, __pyx_n_s_flow_array, __pyx_n_s_filling_value, __pyx_n_s_interpolate_mode, __pyx_n_s_out_array); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 13, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__3); + __Pyx_GIVEREF(__pyx_tuple__3); + __pyx_codeobj__4 = (PyObject*)__Pyx_PyCode_New(4, 0, 5, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__3, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_mmcv_video_optflow_warp_flow_war, __pyx_n_s_flow_warp_c, 13, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__4)) __PYX_ERR(0, 13, __pyx_L1_error) + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} + +static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) { + if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error); + return 0; + __pyx_L1_error:; + return -1; +} + +static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/ + +static int __Pyx_modinit_global_init_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0); + /*--- Global init code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_variable_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0); + /*--- Variable export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0); + /*--- Function export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_type_init_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0); + /*--- Type init code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_type_import_code(void) { + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0); + /*--- Type import code ---*/ + __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 9, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_ptype_7cpython_4type_type = __Pyx_ImportType(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type", + #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 + sizeof(PyTypeObject), + #else + sizeof(PyHeapTypeObject), + #endif + __Pyx_ImportType_CheckSize_Warn); + if (!__pyx_ptype_7cpython_4type_type) __PYX_ERR(2, 9, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = PyImport_ImportModule("numpy"); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 200, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_ptype_5numpy_dtype = __Pyx_ImportType(__pyx_t_1, "numpy", "dtype", sizeof(PyArray_Descr), __Pyx_ImportType_CheckSize_Ignore); + if (!__pyx_ptype_5numpy_dtype) __PYX_ERR(1, 200, __pyx_L1_error) + __pyx_ptype_5numpy_flatiter = __Pyx_ImportType(__pyx_t_1, "numpy", "flatiter", sizeof(PyArrayIterObject), __Pyx_ImportType_CheckSize_Ignore); + if (!__pyx_ptype_5numpy_flatiter) __PYX_ERR(1, 223, __pyx_L1_error) + __pyx_ptype_5numpy_broadcast = __Pyx_ImportType(__pyx_t_1, "numpy", "broadcast", sizeof(PyArrayMultiIterObject), __Pyx_ImportType_CheckSize_Ignore); + if (!__pyx_ptype_5numpy_broadcast) __PYX_ERR(1, 227, __pyx_L1_error) + __pyx_ptype_5numpy_ndarray = __Pyx_ImportType(__pyx_t_1, "numpy", "ndarray", sizeof(PyArrayObject), __Pyx_ImportType_CheckSize_Ignore); + if (!__pyx_ptype_5numpy_ndarray) __PYX_ERR(1, 239, __pyx_L1_error) + __pyx_ptype_5numpy_generic = __Pyx_ImportType(__pyx_t_1, "numpy", "generic", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn); + if (!__pyx_ptype_5numpy_generic) __PYX_ERR(1, 771, __pyx_L1_error) + __pyx_ptype_5numpy_number = __Pyx_ImportType(__pyx_t_1, "numpy", "number", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn); + if (!__pyx_ptype_5numpy_number) __PYX_ERR(1, 773, __pyx_L1_error) + __pyx_ptype_5numpy_integer = __Pyx_ImportType(__pyx_t_1, "numpy", "integer", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn); + if (!__pyx_ptype_5numpy_integer) __PYX_ERR(1, 775, __pyx_L1_error) + __pyx_ptype_5numpy_signedinteger = __Pyx_ImportType(__pyx_t_1, "numpy", "signedinteger", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn); + if (!__pyx_ptype_5numpy_signedinteger) __PYX_ERR(1, 777, __pyx_L1_error) + __pyx_ptype_5numpy_unsignedinteger = __Pyx_ImportType(__pyx_t_1, "numpy", "unsignedinteger", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn); + if (!__pyx_ptype_5numpy_unsignedinteger) __PYX_ERR(1, 779, __pyx_L1_error) + __pyx_ptype_5numpy_inexact = __Pyx_ImportType(__pyx_t_1, "numpy", "inexact", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn); + if (!__pyx_ptype_5numpy_inexact) __PYX_ERR(1, 781, __pyx_L1_error) + __pyx_ptype_5numpy_floating = __Pyx_ImportType(__pyx_t_1, "numpy", "floating", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn); + if (!__pyx_ptype_5numpy_floating) __PYX_ERR(1, 783, __pyx_L1_error) + __pyx_ptype_5numpy_complexfloating = __Pyx_ImportType(__pyx_t_1, "numpy", "complexfloating", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn); + if (!__pyx_ptype_5numpy_complexfloating) __PYX_ERR(1, 785, __pyx_L1_error) + __pyx_ptype_5numpy_flexible = __Pyx_ImportType(__pyx_t_1, "numpy", "flexible", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn); + if (!__pyx_ptype_5numpy_flexible) __PYX_ERR(1, 787, __pyx_L1_error) + __pyx_ptype_5numpy_character = __Pyx_ImportType(__pyx_t_1, "numpy", "character", sizeof(PyObject), __Pyx_ImportType_CheckSize_Warn); + if (!__pyx_ptype_5numpy_character) __PYX_ERR(1, 789, __pyx_L1_error) + __pyx_ptype_5numpy_ufunc = __Pyx_ImportType(__pyx_t_1, "numpy", "ufunc", sizeof(PyUFuncObject), __Pyx_ImportType_CheckSize_Ignore); + if (!__pyx_ptype_5numpy_ufunc) __PYX_ERR(1, 827, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_RefNannyFinishContext(); + return -1; +} + +static int __Pyx_modinit_variable_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0); + /*--- Variable import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0); + /*--- Function import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + + +#ifndef CYTHON_NO_PYINIT_EXPORT +#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC +#elif PY_MAJOR_VERSION < 3 +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" void +#else +#define __Pyx_PyMODINIT_FUNC void +#endif +#else +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" PyObject * +#else +#define __Pyx_PyMODINIT_FUNC PyObject * +#endif +#endif + + +#if PY_MAJOR_VERSION < 3 +__Pyx_PyMODINIT_FUNC init_flow_warp_ext(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC init_flow_warp_ext(void) +#else +__Pyx_PyMODINIT_FUNC PyInit__flow_warp_ext(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC PyInit__flow_warp_ext(void) +#if CYTHON_PEP489_MULTI_PHASE_INIT +{ + return PyModuleDef_Init(&__pyx_moduledef); +} +static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) { + #if PY_VERSION_HEX >= 0x030700A1 + static PY_INT64_T main_interpreter_id = -1; + PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp); + if (main_interpreter_id == -1) { + main_interpreter_id = current_id; + return (unlikely(current_id == -1)) ? -1 : 0; + } else if (unlikely(main_interpreter_id != current_id)) + #else + static PyInterpreterState *main_interpreter = NULL; + PyInterpreterState *current_interpreter = PyThreadState_Get()->interp; + if (!main_interpreter) { + main_interpreter = current_interpreter; + } else if (unlikely(main_interpreter != current_interpreter)) + #endif + { + PyErr_SetString( + PyExc_ImportError, + "Interpreter change detected - this module can only be loaded into one interpreter per process."); + return -1; + } + return 0; +} +static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) { + PyObject *value = PyObject_GetAttrString(spec, from_name); + int result = 0; + if (likely(value)) { + if (allow_none || value != Py_None) { + result = PyDict_SetItemString(moddict, to_name, value); + } + Py_DECREF(value); + } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } else { + result = -1; + } + return result; +} +static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, CYTHON_UNUSED PyModuleDef *def) { + PyObject *module = NULL, *moddict, *modname; + if (__Pyx_check_single_interpreter()) + return NULL; + if (__pyx_m) + return __Pyx_NewRef(__pyx_m); + modname = PyObject_GetAttrString(spec, "name"); + if (unlikely(!modname)) goto bad; + module = PyModule_NewObject(modname); + Py_DECREF(modname); + if (unlikely(!module)) goto bad; + moddict = PyModule_GetDict(module); + if (unlikely(!moddict)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad; + return module; +bad: + Py_XDECREF(module); + return NULL; +} + + +static CYTHON_SMALL_CODE int __pyx_pymod_exec__flow_warp_ext(PyObject *__pyx_pyinit_module) +#endif +#endif +{ + PyObject *__pyx_t_1 = NULL; + int __pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannyDeclarations + #if CYTHON_PEP489_MULTI_PHASE_INIT + if (__pyx_m) { + if (__pyx_m == __pyx_pyinit_module) return 0; + PyErr_SetString(PyExc_RuntimeError, "Module '_flow_warp_ext' has already been imported. Re-initialisation is not supported."); + return -1; + } + #elif PY_MAJOR_VERSION >= 3 + if (__pyx_m) return __Pyx_NewRef(__pyx_m); + #endif + #if CYTHON_REFNANNY +__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); +if (!__Pyx_RefNanny) { + PyErr_Clear(); + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); + if (!__Pyx_RefNanny) + Py_FatalError("failed to import 'refnanny' module"); +} +#endif + __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit__flow_warp_ext(void)", 0); + if (__Pyx_check_binary_version() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pxy_PyFrame_Initialize_Offsets + __Pxy_PyFrame_Initialize_Offsets(); + #endif + __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pyx_CyFunction_USED + if (__pyx_CyFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_FusedFunction_USED + if (__pyx_FusedFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Coroutine_USED + if (__pyx_Coroutine_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Generator_USED + if (__pyx_Generator_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_AsyncGen_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_StopAsyncIteration_USED + if (__pyx_StopAsyncIteration_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + /*--- Library function declarations ---*/ + /*--- Threads initialization code ---*/ + #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS + PyEval_InitThreads(); + #endif + /*--- Module creation code ---*/ + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_m = __pyx_pyinit_module; + Py_INCREF(__pyx_m); + #else + #if PY_MAJOR_VERSION < 3 + __pyx_m = Py_InitModule4("_flow_warp_ext", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + #else + __pyx_m = PyModule_Create(&__pyx_moduledef); + #endif + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) + Py_INCREF(__pyx_d); + __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) + Py_INCREF(__pyx_b); + __pyx_cython_runtime = PyImport_AddModule((char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) + Py_INCREF(__pyx_cython_runtime); + if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error); + /*--- Initialize various global constants etc. ---*/ + if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + if (__pyx_module_is_main_mmcv___flow_warp_ext) { + if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + } + #if PY_MAJOR_VERSION >= 3 + { + PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) + if (!PyDict_GetItemString(modules, "mmcv._flow_warp_ext")) { + if (unlikely(PyDict_SetItemString(modules, "mmcv._flow_warp_ext", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error) + } + } + #endif + /*--- Builtin init code ---*/ + if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Constants init code ---*/ + if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Global type/function init code ---*/ + (void)__Pyx_modinit_global_init_code(); + (void)__Pyx_modinit_variable_export_code(); + (void)__Pyx_modinit_function_export_code(); + (void)__Pyx_modinit_type_init_code(); + if (unlikely(__Pyx_modinit_type_import_code() < 0)) __PYX_ERR(0, 1, __pyx_L1_error) + (void)__Pyx_modinit_variable_import_code(); + (void)__Pyx_modinit_function_import_code(); + /*--- Execution code ---*/ + #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":3 + * # Copyright (c) Open-MMLab. All rights reserved. + * # cython: language_level=3 + * STUFF = "Hi" # <<<<<<<<<<<<<< + * + * import numpy as np + */ + if (PyDict_SetItem(__pyx_d, __pyx_n_s_STUFF, __pyx_n_u_Hi) < 0) __PYX_ERR(0, 3, __pyx_L1_error) + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":5 + * STUFF = "Hi" + * + * import numpy as np # <<<<<<<<<<<<<< + * cimport numpy as np + * + */ + __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 5, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) __PYX_ERR(0, 5, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":8 + * cimport numpy as np + * + * np.import_array() # <<<<<<<<<<<<<< + * + * cdef extern from "flow_warp.hpp": + */ + __pyx_t_2 = __pyx_f_5numpy_import_array(); if (unlikely(__pyx_t_2 == ((int)-1))) __PYX_ERR(0, 8, __pyx_L1_error) + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":13 + * void FlowWarp(double* img, double* flow1, double* out, const int height, const int width, const int channels, const int filling_value, const int interpolateMode) + * + * def flow_warp_c(np.ndarray[double, ndim=3, mode="c"] img_array not None, # <<<<<<<<<<<<<< + * np.ndarray[double, ndim=3, mode="c"] flow_array not None, + * int filling_value=0, + */ + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_4mmcv_14_flow_warp_ext_1flow_warp_c, NULL, __pyx_n_s_mmcv__flow_warp_ext); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 13, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_flow_warp_c, __pyx_t_1) < 0) __PYX_ERR(0, 13, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "mmcv/video/optflow_warp/flow_warp_module.pyx":1 + * # Copyright (c) Open-MMLab. All rights reserved. # <<<<<<<<<<<<<< + * # cython: language_level=3 + * STUFF = "Hi" + */ + __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "../../anaconda3/envs/recycle/lib/python3.8/site-packages/numpy/__init__.pxd":1016 + * + * + * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: # <<<<<<<<<<<<<< + * """ + * returns the unit part of the dtype for a numpy datetime64 object. + */ + + /*--- Wrapped vars code ---*/ + + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + if (__pyx_m) { + if (__pyx_d) { + __Pyx_AddTraceback("init mmcv._flow_warp_ext", __pyx_clineno, __pyx_lineno, __pyx_filename); + } + Py_CLEAR(__pyx_m); + } else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ImportError, "init mmcv._flow_warp_ext"); + } + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + #if CYTHON_PEP489_MULTI_PHASE_INIT + return (__pyx_m != NULL) ? 0 : -1; + #elif PY_MAJOR_VERSION >= 3 + return __pyx_m; + #else + return; + #endif +} + +/* --- Runtime support code --- */ +/* Refnanny */ +#if CYTHON_REFNANNY +static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { + PyObject *m = NULL, *p = NULL; + void *r = NULL; + m = PyImport_ImportModule(modname); + if (!m) goto end; + p = PyObject_GetAttrString(m, "RefNannyAPI"); + if (!p) goto end; + r = PyLong_AsVoidPtr(p); +end: + Py_XDECREF(p); + Py_XDECREF(m); + return (__Pyx_RefNannyAPIStruct *)r; +} +#endif + +/* RaiseArgTupleInvalid */ +static void __Pyx_RaiseArgtupleInvalid( + const char* func_name, + int exact, + Py_ssize_t num_min, + Py_ssize_t num_max, + Py_ssize_t num_found) +{ + Py_ssize_t num_expected; + const char *more_or_less; + if (num_found < num_min) { + num_expected = num_min; + more_or_less = "at least"; + } else { + num_expected = num_max; + more_or_less = "at most"; + } + if (exact) { + more_or_less = "exactly"; + } + PyErr_Format(PyExc_TypeError, + "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", + func_name, more_or_less, num_expected, + (num_expected == 1) ? "" : "s", num_found); +} + +/* RaiseDoubleKeywords */ +static void __Pyx_RaiseDoubleKeywordsError( + const char* func_name, + PyObject* kw_name) +{ + PyErr_Format(PyExc_TypeError, + #if PY_MAJOR_VERSION >= 3 + "%s() got multiple values for keyword argument '%U'", func_name, kw_name); + #else + "%s() got multiple values for keyword argument '%s'", func_name, + PyString_AsString(kw_name)); + #endif +} + +/* ParseKeywords */ +static int __Pyx_ParseOptionalKeywords( + PyObject *kwds, + PyObject **argnames[], + PyObject *kwds2, + PyObject *values[], + Py_ssize_t num_pos_args, + const char* function_name) +{ + PyObject *key = 0, *value = 0; + Py_ssize_t pos = 0; + PyObject*** name; + PyObject*** first_kw_arg = argnames + num_pos_args; + while (PyDict_Next(kwds, &pos, &key, &value)) { + name = first_kw_arg; + while (*name && (**name != key)) name++; + if (*name) { + values[name-argnames] = value; + continue; + } + name = first_kw_arg; + #if PY_MAJOR_VERSION < 3 + if (likely(PyString_Check(key))) { + while (*name) { + if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) + && _PyString_Eq(**name, key)) { + values[name-argnames] = value; + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + if ((**argname == key) || ( + (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) + && _PyString_Eq(**argname, key))) { + goto arg_passed_twice; + } + argname++; + } + } + } else + #endif + if (likely(PyUnicode_Check(key))) { + while (*name) { + int cmp = (**name == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**name, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) { + values[name-argnames] = value; + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + int cmp = (**argname == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**argname, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) goto arg_passed_twice; + argname++; + } + } + } else + goto invalid_keyword_type; + if (kwds2) { + if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; + } else { + goto invalid_keyword; + } + } + return 0; +arg_passed_twice: + __Pyx_RaiseDoubleKeywordsError(function_name, key); + goto bad; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%.200s() keywords must be strings", function_name); + goto bad; +invalid_keyword: + PyErr_Format(PyExc_TypeError, + #if PY_MAJOR_VERSION < 3 + "%.200s() got an unexpected keyword argument '%.200s'", + function_name, PyString_AsString(key)); + #else + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif +bad: + return -1; +} + +/* ArgTypeTest */ +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact) +{ + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + else if (exact) { + #if PY_MAJOR_VERSION == 2 + if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; + #endif + } + else { + if (likely(__Pyx_TypeCheck(obj, type))) return 1; + } + PyErr_Format(PyExc_TypeError, + "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)", + name, type->tp_name, Py_TYPE(obj)->tp_name); + return 0; +} + +/* IsLittleEndian */ +static CYTHON_INLINE int __Pyx_Is_Little_Endian(void) +{ + union { + uint32_t u32; + uint8_t u8[4]; + } S; + S.u32 = 0x01020304; + return S.u8[0] == 4; +} + +/* BufferFormatCheck */ +static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx, + __Pyx_BufFmt_StackElem* stack, + __Pyx_TypeInfo* type) { + stack[0].field = &ctx->root; + stack[0].parent_offset = 0; + ctx->root.type = type; + ctx->root.name = "buffer dtype"; + ctx->root.offset = 0; + ctx->head = stack; + ctx->head->field = &ctx->root; + ctx->fmt_offset = 0; + ctx->head->parent_offset = 0; + ctx->new_packmode = '@'; + ctx->enc_packmode = '@'; + ctx->new_count = 1; + ctx->enc_count = 0; + ctx->enc_type = 0; + ctx->is_complex = 0; + ctx->is_valid_array = 0; + ctx->struct_alignment = 0; + while (type->typegroup == 'S') { + ++ctx->head; + ctx->head->field = type->fields; + ctx->head->parent_offset = 0; + type = type->fields->type; + } +} +static int __Pyx_BufFmt_ParseNumber(const char** ts) { + int count; + const char* t = *ts; + if (*t < '0' || *t > '9') { + return -1; + } else { + count = *t++ - '0'; + while (*t >= '0' && *t <= '9') { + count *= 10; + count += *t++ - '0'; + } + } + *ts = t; + return count; +} +static int __Pyx_BufFmt_ExpectNumber(const char **ts) { + int number = __Pyx_BufFmt_ParseNumber(ts); + if (number == -1) + PyErr_Format(PyExc_ValueError,\ + "Does not understand character buffer dtype format string ('%c')", **ts); + return number; +} +static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) { + PyErr_Format(PyExc_ValueError, + "Unexpected format string character: '%c'", ch); +} +static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) { + switch (ch) { + case '?': return "'bool'"; + case 'c': return "'char'"; + case 'b': return "'signed char'"; + case 'B': return "'unsigned char'"; + case 'h': return "'short'"; + case 'H': return "'unsigned short'"; + case 'i': return "'int'"; + case 'I': return "'unsigned int'"; + case 'l': return "'long'"; + case 'L': return "'unsigned long'"; + case 'q': return "'long long'"; + case 'Q': return "'unsigned long long'"; + case 'f': return (is_complex ? "'complex float'" : "'float'"); + case 'd': return (is_complex ? "'complex double'" : "'double'"); + case 'g': return (is_complex ? "'complex long double'" : "'long double'"); + case 'T': return "a struct"; + case 'O': return "Python object"; + case 'P': return "a pointer"; + case 's': case 'p': return "a string"; + case 0: return "end"; + default: return "unparseable format string"; + } +} +static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) { + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return 2; + case 'i': case 'I': case 'l': case 'L': return 4; + case 'q': case 'Q': return 8; + case 'f': return (is_complex ? 8 : 4); + case 'd': return (is_complex ? 16 : 8); + case 'g': { + PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g').."); + return 0; + } + case 'O': case 'P': return sizeof(void*); + default: + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } +} +static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) { + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return sizeof(short); + case 'i': case 'I': return sizeof(int); + case 'l': case 'L': return sizeof(long); + #ifdef HAVE_LONG_LONG + case 'q': case 'Q': return sizeof(PY_LONG_LONG); + #endif + case 'f': return sizeof(float) * (is_complex ? 2 : 1); + case 'd': return sizeof(double) * (is_complex ? 2 : 1); + case 'g': return sizeof(long double) * (is_complex ? 2 : 1); + case 'O': case 'P': return sizeof(void*); + default: { + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } + } +} +typedef struct { char c; short x; } __Pyx_st_short; +typedef struct { char c; int x; } __Pyx_st_int; +typedef struct { char c; long x; } __Pyx_st_long; +typedef struct { char c; float x; } __Pyx_st_float; +typedef struct { char c; double x; } __Pyx_st_double; +typedef struct { char c; long double x; } __Pyx_st_longdouble; +typedef struct { char c; void *x; } __Pyx_st_void_p; +#ifdef HAVE_LONG_LONG +typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong; +#endif +static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, CYTHON_UNUSED int is_complex) { + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short); + case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int); + case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long); +#ifdef HAVE_LONG_LONG + case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG); +#endif + case 'f': return sizeof(__Pyx_st_float) - sizeof(float); + case 'd': return sizeof(__Pyx_st_double) - sizeof(double); + case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double); + case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*); + default: + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } +} +/* These are for computing the padding at the end of the struct to align + on the first member of the struct. This will probably the same as above, + but we don't have any guarantees. + */ +typedef struct { short x; char c; } __Pyx_pad_short; +typedef struct { int x; char c; } __Pyx_pad_int; +typedef struct { long x; char c; } __Pyx_pad_long; +typedef struct { float x; char c; } __Pyx_pad_float; +typedef struct { double x; char c; } __Pyx_pad_double; +typedef struct { long double x; char c; } __Pyx_pad_longdouble; +typedef struct { void *x; char c; } __Pyx_pad_void_p; +#ifdef HAVE_LONG_LONG +typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong; +#endif +static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, CYTHON_UNUSED int is_complex) { + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short); + case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int); + case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long); +#ifdef HAVE_LONG_LONG + case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG); +#endif + case 'f': return sizeof(__Pyx_pad_float) - sizeof(float); + case 'd': return sizeof(__Pyx_pad_double) - sizeof(double); + case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double); + case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*); + default: + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } +} +static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) { + switch (ch) { + case 'c': + return 'H'; + case 'b': case 'h': case 'i': + case 'l': case 'q': case 's': case 'p': + return 'I'; + case '?': case 'B': case 'H': case 'I': case 'L': case 'Q': + return 'U'; + case 'f': case 'd': case 'g': + return (is_complex ? 'C' : 'R'); + case 'O': + return 'O'; + case 'P': + return 'P'; + default: { + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } + } +} +static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) { + if (ctx->head == NULL || ctx->head->field == &ctx->root) { + const char* expected; + const char* quote; + if (ctx->head == NULL) { + expected = "end"; + quote = ""; + } else { + expected = ctx->head->field->type->name; + quote = "'"; + } + PyErr_Format(PyExc_ValueError, + "Buffer dtype mismatch, expected %s%s%s but got %s", + quote, expected, quote, + __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex)); + } else { + __Pyx_StructField* field = ctx->head->field; + __Pyx_StructField* parent = (ctx->head - 1)->field; + PyErr_Format(PyExc_ValueError, + "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'", + field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex), + parent->type->name, field->name); + } +} +static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) { + char group; + size_t size, offset, arraysize = 1; + if (ctx->enc_type == 0) return 0; + if (ctx->head->field->type->arraysize[0]) { + int i, ndim = 0; + if (ctx->enc_type == 's' || ctx->enc_type == 'p') { + ctx->is_valid_array = ctx->head->field->type->ndim == 1; + ndim = 1; + if (ctx->enc_count != ctx->head->field->type->arraysize[0]) { + PyErr_Format(PyExc_ValueError, + "Expected a dimension of size %zu, got %zu", + ctx->head->field->type->arraysize[0], ctx->enc_count); + return -1; + } + } + if (!ctx->is_valid_array) { + PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d", + ctx->head->field->type->ndim, ndim); + return -1; + } + for (i = 0; i < ctx->head->field->type->ndim; i++) { + arraysize *= ctx->head->field->type->arraysize[i]; + } + ctx->is_valid_array = 0; + ctx->enc_count = 1; + } + group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex); + do { + __Pyx_StructField* field = ctx->head->field; + __Pyx_TypeInfo* type = field->type; + if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') { + size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex); + } else { + size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex); + } + if (ctx->enc_packmode == '@') { + size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex); + size_t align_mod_offset; + if (align_at == 0) return -1; + align_mod_offset = ctx->fmt_offset % align_at; + if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset; + if (ctx->struct_alignment == 0) + ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type, + ctx->is_complex); + } + if (type->size != size || type->typegroup != group) { + if (type->typegroup == 'C' && type->fields != NULL) { + size_t parent_offset = ctx->head->parent_offset + field->offset; + ++ctx->head; + ctx->head->field = type->fields; + ctx->head->parent_offset = parent_offset; + continue; + } + if ((type->typegroup == 'H' || group == 'H') && type->size == size) { + } else { + __Pyx_BufFmt_RaiseExpected(ctx); + return -1; + } + } + offset = ctx->head->parent_offset + field->offset; + if (ctx->fmt_offset != offset) { + PyErr_Format(PyExc_ValueError, + "Buffer dtype mismatch; next field is at offset %" CYTHON_FORMAT_SSIZE_T "d but %" CYTHON_FORMAT_SSIZE_T "d expected", + (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset); + return -1; + } + ctx->fmt_offset += size; + if (arraysize) + ctx->fmt_offset += (arraysize - 1) * size; + --ctx->enc_count; + while (1) { + if (field == &ctx->root) { + ctx->head = NULL; + if (ctx->enc_count != 0) { + __Pyx_BufFmt_RaiseExpected(ctx); + return -1; + } + break; + } + ctx->head->field = ++field; + if (field->type == NULL) { + --ctx->head; + field = ctx->head->field; + continue; + } else if (field->type->typegroup == 'S') { + size_t parent_offset = ctx->head->parent_offset + field->offset; + if (field->type->fields->type == NULL) continue; + field = field->type->fields; + ++ctx->head; + ctx->head->field = field; + ctx->head->parent_offset = parent_offset; + break; + } else { + break; + } + } + } while (ctx->enc_count); + ctx->enc_type = 0; + ctx->is_complex = 0; + return 0; +} +static PyObject * +__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp) +{ + const char *ts = *tsp; + int i = 0, number, ndim; + ++ts; + if (ctx->new_count != 1) { + PyErr_SetString(PyExc_ValueError, + "Cannot handle repeated arrays in format string"); + return NULL; + } + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ndim = ctx->head->field->type->ndim; + while (*ts && *ts != ')') { + switch (*ts) { + case ' ': case '\f': case '\r': case '\n': case '\t': case '\v': continue; + default: break; + } + number = __Pyx_BufFmt_ExpectNumber(&ts); + if (number == -1) return NULL; + if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i]) + return PyErr_Format(PyExc_ValueError, + "Expected a dimension of size %zu, got %d", + ctx->head->field->type->arraysize[i], number); + if (*ts != ',' && *ts != ')') + return PyErr_Format(PyExc_ValueError, + "Expected a comma in format string, got '%c'", *ts); + if (*ts == ',') ts++; + i++; + } + if (i != ndim) + return PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d", + ctx->head->field->type->ndim, i); + if (!*ts) { + PyErr_SetString(PyExc_ValueError, + "Unexpected end of format string, expected ')'"); + return NULL; + } + ctx->is_valid_array = 1; + ctx->new_count = 1; + *tsp = ++ts; + return Py_None; +} +static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) { + int got_Z = 0; + while (1) { + switch(*ts) { + case 0: + if (ctx->enc_type != 0 && ctx->head == NULL) { + __Pyx_BufFmt_RaiseExpected(ctx); + return NULL; + } + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + if (ctx->head != NULL) { + __Pyx_BufFmt_RaiseExpected(ctx); + return NULL; + } + return ts; + case ' ': + case '\r': + case '\n': + ++ts; + break; + case '<': + if (!__Pyx_Is_Little_Endian()) { + PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler"); + return NULL; + } + ctx->new_packmode = '='; + ++ts; + break; + case '>': + case '!': + if (__Pyx_Is_Little_Endian()) { + PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler"); + return NULL; + } + ctx->new_packmode = '='; + ++ts; + break; + case '=': + case '@': + case '^': + ctx->new_packmode = *ts++; + break; + case 'T': + { + const char* ts_after_sub; + size_t i, struct_count = ctx->new_count; + size_t struct_alignment = ctx->struct_alignment; + ctx->new_count = 1; + ++ts; + if (*ts != '{') { + PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'"); + return NULL; + } + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->enc_type = 0; + ctx->enc_count = 0; + ctx->struct_alignment = 0; + ++ts; + ts_after_sub = ts; + for (i = 0; i != struct_count; ++i) { + ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts); + if (!ts_after_sub) return NULL; + } + ts = ts_after_sub; + if (struct_alignment) ctx->struct_alignment = struct_alignment; + } + break; + case '}': + { + size_t alignment = ctx->struct_alignment; + ++ts; + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->enc_type = 0; + if (alignment && ctx->fmt_offset % alignment) { + ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment); + } + } + return ts; + case 'x': + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->fmt_offset += ctx->new_count; + ctx->new_count = 1; + ctx->enc_count = 0; + ctx->enc_type = 0; + ctx->enc_packmode = ctx->new_packmode; + ++ts; + break; + case 'Z': + got_Z = 1; + ++ts; + if (*ts != 'f' && *ts != 'd' && *ts != 'g') { + __Pyx_BufFmt_RaiseUnexpectedChar('Z'); + return NULL; + } + CYTHON_FALLTHROUGH; + case '?': case 'c': case 'b': case 'B': case 'h': case 'H': case 'i': case 'I': + case 'l': case 'L': case 'q': case 'Q': + case 'f': case 'd': case 'g': + case 'O': case 'p': + if ((ctx->enc_type == *ts) && (got_Z == ctx->is_complex) && + (ctx->enc_packmode == ctx->new_packmode) && (!ctx->is_valid_array)) { + ctx->enc_count += ctx->new_count; + ctx->new_count = 1; + got_Z = 0; + ++ts; + break; + } + CYTHON_FALLTHROUGH; + case 's': + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->enc_count = ctx->new_count; + ctx->enc_packmode = ctx->new_packmode; + ctx->enc_type = *ts; + ctx->is_complex = got_Z; + ++ts; + ctx->new_count = 1; + got_Z = 0; + break; + case ':': + ++ts; + while(*ts != ':') ++ts; + ++ts; + break; + case '(': + if (!__pyx_buffmt_parse_array(ctx, &ts)) return NULL; + break; + default: + { + int number = __Pyx_BufFmt_ExpectNumber(&ts); + if (number == -1) return NULL; + ctx->new_count = (size_t)number; + } + } + } +} + +/* BufferGetAndValidate */ + static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info) { + if (unlikely(info->buf == NULL)) return; + if (info->suboffsets == __Pyx_minusones) info->suboffsets = NULL; + __Pyx_ReleaseBuffer(info); +} +static void __Pyx_ZeroBuffer(Py_buffer* buf) { + buf->buf = NULL; + buf->obj = NULL; + buf->strides = __Pyx_zeros; + buf->shape = __Pyx_zeros; + buf->suboffsets = __Pyx_minusones; +} +static int __Pyx__GetBufferAndValidate( + Py_buffer* buf, PyObject* obj, __Pyx_TypeInfo* dtype, int flags, + int nd, int cast, __Pyx_BufFmt_StackElem* stack) +{ + buf->buf = NULL; + if (unlikely(__Pyx_GetBuffer(obj, buf, flags) == -1)) { + __Pyx_ZeroBuffer(buf); + return -1; + } + if (unlikely(buf->ndim != nd)) { + PyErr_Format(PyExc_ValueError, + "Buffer has wrong number of dimensions (expected %d, got %d)", + nd, buf->ndim); + goto fail; + } + if (!cast) { + __Pyx_BufFmt_Context ctx; + __Pyx_BufFmt_Init(&ctx, stack, dtype); + if (!__Pyx_BufFmt_CheckString(&ctx, buf->format)) goto fail; + } + if (unlikely((size_t)buf->itemsize != dtype->size)) { + PyErr_Format(PyExc_ValueError, + "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "d byte%s) does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "d byte%s)", + buf->itemsize, (buf->itemsize > 1) ? "s" : "", + dtype->name, (Py_ssize_t)dtype->size, (dtype->size > 1) ? "s" : ""); + goto fail; + } + if (buf->suboffsets == NULL) buf->suboffsets = __Pyx_minusones; + return 0; +fail:; + __Pyx_SafeReleaseBuffer(buf); + return -1; +} + +/* PyObjectGetAttrStr */ + #if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro)) + return tp->tp_getattro(obj, attr_name); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_getattr)) + return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); +#endif + return PyObject_GetAttr(obj, attr_name); +} +#endif + +/* GetBuiltinName */ + static PyObject *__Pyx_GetBuiltinName(PyObject *name) { + PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name); + if (unlikely(!result)) { + PyErr_Format(PyExc_NameError, +#if PY_MAJOR_VERSION >= 3 + "name '%U' is not defined", name); +#else + "name '%.200s' is not defined", PyString_AS_STRING(name)); +#endif + } + return result; +} + +/* PyDictVersioning */ + #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0; +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) { + PyObject **dictptr = NULL; + Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset; + if (offset) { +#if CYTHON_COMPILING_IN_CPYTHON + dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj); +#else + dictptr = _PyObject_GetDictPtr(obj); +#endif + } + return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0; +} +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict))) + return 0; + return obj_dict_version == __Pyx_get_object_dict_version(obj); +} +#endif + +/* GetModuleGlobalName */ + #if CYTHON_USE_DICT_VERSIONS +static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value) +#else +static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name) +#endif +{ + PyObject *result; +#if !CYTHON_AVOID_BORROWED_REFS +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 + result = _PyDict_GetItem_KnownHash(__pyx_d, name, ((PyASCIIObject *) name)->hash); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } else if (unlikely(PyErr_Occurred())) { + return NULL; + } +#else + result = PyDict_GetItem(__pyx_d, name); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } +#endif +#else + result = PyObject_GetItem(__pyx_d, name); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } + PyErr_Clear(); +#endif + return __Pyx_GetBuiltinName(name); +} + +/* PyCFunctionFastCall */ + #if CYTHON_FAST_PYCCALL +static CYTHON_INLINE PyObject * __Pyx_PyCFunction_FastCall(PyObject *func_obj, PyObject **args, Py_ssize_t nargs) { + PyCFunctionObject *func = (PyCFunctionObject*)func_obj; + PyCFunction meth = PyCFunction_GET_FUNCTION(func); + PyObject *self = PyCFunction_GET_SELF(func); + int flags = PyCFunction_GET_FLAGS(func); + assert(PyCFunction_Check(func)); + assert(METH_FASTCALL == (flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS | METH_STACKLESS))); + assert(nargs >= 0); + assert(nargs == 0 || args != NULL); + /* _PyCFunction_FastCallDict() must not be called with an exception set, + because it may clear it (directly or indirectly) and so the + caller loses its exception */ + assert(!PyErr_Occurred()); + if ((PY_VERSION_HEX < 0x030700A0) || unlikely(flags & METH_KEYWORDS)) { + return (*((__Pyx_PyCFunctionFastWithKeywords)(void*)meth)) (self, args, nargs, NULL); + } else { + return (*((__Pyx_PyCFunctionFast)(void*)meth)) (self, args, nargs); + } +} +#endif + +/* PyFunctionFastCall */ + #if CYTHON_FAST_PYCALL +static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, + PyObject *globals) { + PyFrameObject *f; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject **fastlocals; + Py_ssize_t i; + PyObject *result; + assert(globals != NULL); + /* XXX Perhaps we should create a specialized + PyFrame_New() that doesn't take locals, but does + take builtins without sanity checking them. + */ + assert(tstate != NULL); + f = PyFrame_New(tstate, co, globals, NULL); + if (f == NULL) { + return NULL; + } + fastlocals = __Pyx_PyFrame_GetLocalsplus(f); + for (i = 0; i < na; i++) { + Py_INCREF(*args); + fastlocals[i] = *args++; + } + result = PyEval_EvalFrameEx(f,0); + ++tstate->recursion_depth; + Py_DECREF(f); + --tstate->recursion_depth; + return result; +} +#if 1 || PY_VERSION_HEX < 0x030600B1 +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) { + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + PyObject *globals = PyFunction_GET_GLOBALS(func); + PyObject *argdefs = PyFunction_GET_DEFAULTS(func); + PyObject *closure; +#if PY_MAJOR_VERSION >= 3 + PyObject *kwdefs; +#endif + PyObject *kwtuple, **k; + PyObject **d; + Py_ssize_t nd; + Py_ssize_t nk; + PyObject *result; + assert(kwargs == NULL || PyDict_Check(kwargs)); + nk = kwargs ? PyDict_Size(kwargs) : 0; + if (Py_EnterRecursiveCall((char*)" while calling a Python object")) { + return NULL; + } + if ( +#if PY_MAJOR_VERSION >= 3 + co->co_kwonlyargcount == 0 && +#endif + likely(kwargs == NULL || nk == 0) && + co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { + if (argdefs == NULL && co->co_argcount == nargs) { + result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals); + goto done; + } + else if (nargs == 0 && argdefs != NULL + && co->co_argcount == Py_SIZE(argdefs)) { + /* function called with no arguments, but all parameters have + a default value: use default values as arguments .*/ + args = &PyTuple_GET_ITEM(argdefs, 0); + result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals); + goto done; + } + } + if (kwargs != NULL) { + Py_ssize_t pos, i; + kwtuple = PyTuple_New(2 * nk); + if (kwtuple == NULL) { + result = NULL; + goto done; + } + k = &PyTuple_GET_ITEM(kwtuple, 0); + pos = i = 0; + while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) { + Py_INCREF(k[i]); + Py_INCREF(k[i+1]); + i += 2; + } + nk = i / 2; + } + else { + kwtuple = NULL; + k = NULL; + } + closure = PyFunction_GET_CLOSURE(func); +#if PY_MAJOR_VERSION >= 3 + kwdefs = PyFunction_GET_KW_DEFAULTS(func); +#endif + if (argdefs != NULL) { + d = &PyTuple_GET_ITEM(argdefs, 0); + nd = Py_SIZE(argdefs); + } + else { + d = NULL; + nd = 0; + } +#if PY_MAJOR_VERSION >= 3 + result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, kwdefs, closure); +#else + result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, closure); +#endif + Py_XDECREF(kwtuple); +done: + Py_LeaveRecursiveCall(); + return result; +} +#endif +#endif + +/* PyObjectCall */ + #if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *result; + ternaryfunc call = Py_TYPE(func)->tp_call; + if (unlikely(!call)) + return PyObject_Call(func, arg, kw); + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + result = (*call)(func, arg, kw); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyObjectCall2Args */ + static CYTHON_UNUSED PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2) { + PyObject *args, *result = NULL; + #if CYTHON_FAST_PYCALL + if (PyFunction_Check(function)) { + PyObject *args[2] = {arg1, arg2}; + return __Pyx_PyFunction_FastCall(function, args, 2); + } + #endif + #if CYTHON_FAST_PYCCALL + if (__Pyx_PyFastCFunction_Check(function)) { + PyObject *args[2] = {arg1, arg2}; + return __Pyx_PyCFunction_FastCall(function, args, 2); + } + #endif + args = PyTuple_New(2); + if (unlikely(!args)) goto done; + Py_INCREF(arg1); + PyTuple_SET_ITEM(args, 0, arg1); + Py_INCREF(arg2); + PyTuple_SET_ITEM(args, 1, arg2); + Py_INCREF(function); + result = __Pyx_PyObject_Call(function, args, NULL); + Py_DECREF(args); + Py_DECREF(function); +done: + return result; +} + +/* PyObjectCallMethO */ + #if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = PyCFunction_GET_FUNCTION(func); + self = PyCFunction_GET_SELF(func); + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyObjectCallOneArg */ + #if CYTHON_COMPILING_IN_CPYTHON +static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_New(1); + if (unlikely(!args)) return NULL; + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 0, arg); + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { +#if CYTHON_FAST_PYCALL + if (PyFunction_Check(func)) { + return __Pyx_PyFunction_FastCall(func, &arg, 1); + } +#endif + if (likely(PyCFunction_Check(func))) { + if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { + return __Pyx_PyObject_CallMethO(func, arg); +#if CYTHON_FAST_PYCCALL + } else if (__Pyx_PyFastCFunction_Check(func)) { + return __Pyx_PyCFunction_FastCall(func, &arg, 1); +#endif + } + } + return __Pyx__PyObject_CallOneArg(func, arg); +} +#else +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_Pack(1, arg); + if (unlikely(!args)) return NULL; + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +#endif + +/* ExtTypeTest */ + static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) { + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + if (likely(__Pyx_TypeCheck(obj, type))) + return 1; + PyErr_Format(PyExc_TypeError, "Cannot convert %.200s to %.200s", + Py_TYPE(obj)->tp_name, type->tp_name); + return 0; +} + +/* GetItemInt */ + static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { + PyObject *r; + if (!j) return NULL; + r = PyObject_GetItem(o, j); + Py_DECREF(j); + return r; +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + Py_ssize_t wrapped_i = i; + if (wraparound & unlikely(i < 0)) { + wrapped_i += PyList_GET_SIZE(o); + } + if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyList_GET_SIZE(o)))) { + PyObject *r = PyList_GET_ITEM(o, wrapped_i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + Py_ssize_t wrapped_i = i; + if (wraparound & unlikely(i < 0)) { + wrapped_i += PyTuple_GET_SIZE(o); + } + if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, wrapped_i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS + if (is_list || PyList_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); + if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) { + PyObject *r = PyList_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } + else if (PyTuple_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); + if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } else { + PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence; + if (likely(m && m->sq_item)) { + if (wraparound && unlikely(i < 0) && likely(m->sq_length)) { + Py_ssize_t l = m->sq_length(o); + if (likely(l >= 0)) { + i += l; + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + return NULL; + PyErr_Clear(); + } + } + return m->sq_item(o, i); + } + } +#else + if (is_list || PySequence_Check(o)) { + return PySequence_GetItem(o, i); + } +#endif + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +} + +/* PyErrFetchRestore */ + #if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { + PyObject *tmp_type, *tmp_value, *tmp_tb; + tmp_type = tstate->curexc_type; + tmp_value = tstate->curexc_value; + tmp_tb = tstate->curexc_traceback; + tstate->curexc_type = type; + tstate->curexc_value = value; + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +} +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + *type = tstate->curexc_type; + *value = tstate->curexc_value; + *tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +} +#endif + +/* GetTopmostException */ + #if CYTHON_USE_EXC_INFO_STACK +static _PyErr_StackItem * +__Pyx_PyErr_GetTopmostException(PyThreadState *tstate) +{ + _PyErr_StackItem *exc_info = tstate->exc_info; + while ((exc_info->exc_type == NULL || exc_info->exc_type == Py_None) && + exc_info->previous_item != NULL) + { + exc_info = exc_info->previous_item; + } + return exc_info; +} +#endif + +/* SaveResetException */ + #if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + #if CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); + *type = exc_info->exc_type; + *value = exc_info->exc_value; + *tb = exc_info->exc_traceback; + #else + *type = tstate->exc_type; + *value = tstate->exc_value; + *tb = tstate->exc_traceback; + #endif + Py_XINCREF(*type); + Py_XINCREF(*value); + Py_XINCREF(*tb); +} +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { + PyObject *tmp_type, *tmp_value, *tmp_tb; + #if CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = tstate->exc_info; + tmp_type = exc_info->exc_type; + tmp_value = exc_info->exc_value; + tmp_tb = exc_info->exc_traceback; + exc_info->exc_type = type; + exc_info->exc_value = value; + exc_info->exc_traceback = tb; + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = type; + tstate->exc_value = value; + tstate->exc_traceback = tb; + #endif + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +} +#endif + +/* PyErrExceptionMatches */ + #if CYTHON_FAST_THREAD_STATE +static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; icurexc_type; + if (exc_type == err) return 1; + if (unlikely(!exc_type)) return 0; + if (unlikely(PyTuple_Check(err))) + return __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err); + return __Pyx_PyErr_GivenExceptionMatches(exc_type, err); +} +#endif + +/* GetException */ + #if CYTHON_FAST_THREAD_STATE +static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) +#else +static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) +#endif +{ + PyObject *local_type, *local_value, *local_tb; +#if CYTHON_FAST_THREAD_STATE + PyObject *tmp_type, *tmp_value, *tmp_tb; + local_type = tstate->curexc_type; + local_value = tstate->curexc_value; + local_tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +#else + PyErr_Fetch(&local_type, &local_value, &local_tb); +#endif + PyErr_NormalizeException(&local_type, &local_value, &local_tb); +#if CYTHON_FAST_THREAD_STATE + if (unlikely(tstate->curexc_type)) +#else + if (unlikely(PyErr_Occurred())) +#endif + goto bad; + #if PY_MAJOR_VERSION >= 3 + if (local_tb) { + if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0)) + goto bad; + } + #endif + Py_XINCREF(local_tb); + Py_XINCREF(local_type); + Py_XINCREF(local_value); + *type = local_type; + *value = local_value; + *tb = local_tb; +#if CYTHON_FAST_THREAD_STATE + #if CYTHON_USE_EXC_INFO_STACK + { + _PyErr_StackItem *exc_info = tstate->exc_info; + tmp_type = exc_info->exc_type; + tmp_value = exc_info->exc_value; + tmp_tb = exc_info->exc_traceback; + exc_info->exc_type = local_type; + exc_info->exc_value = local_value; + exc_info->exc_traceback = local_tb; + } + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = local_type; + tstate->exc_value = local_value; + tstate->exc_traceback = local_tb; + #endif + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +#else + PyErr_SetExcInfo(local_type, local_value, local_tb); +#endif + return 0; +bad: + *type = 0; + *value = 0; + *tb = 0; + Py_XDECREF(local_type); + Py_XDECREF(local_value); + Py_XDECREF(local_tb); + return -1; +} + +/* RaiseException */ + #if PY_MAJOR_VERSION < 3 +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, + CYTHON_UNUSED PyObject *cause) { + __Pyx_PyThreadState_declare + Py_XINCREF(type); + if (!value || value == Py_None) + value = NULL; + else + Py_INCREF(value); + if (!tb || tb == Py_None) + tb = NULL; + else { + Py_INCREF(tb); + if (!PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto raise_error; + } + } + if (PyType_Check(type)) { +#if CYTHON_COMPILING_IN_PYPY + if (!value) { + Py_INCREF(Py_None); + value = Py_None; + } +#endif + PyErr_NormalizeException(&type, &value, &tb); + } else { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto raise_error; + } + value = type; + type = (PyObject*) Py_TYPE(type); + Py_INCREF(type); + if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto raise_error; + } + } + __Pyx_PyThreadState_assign + __Pyx_ErrRestore(type, value, tb); + return; +raise_error: + Py_XDECREF(value); + Py_XDECREF(type); + Py_XDECREF(tb); + return; +} +#else +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { + PyObject* owned_instance = NULL; + if (tb == Py_None) { + tb = 0; + } else if (tb && !PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto bad; + } + if (value == Py_None) + value = 0; + if (PyExceptionInstance_Check(type)) { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto bad; + } + value = type; + type = (PyObject*) Py_TYPE(value); + } else if (PyExceptionClass_Check(type)) { + PyObject *instance_class = NULL; + if (value && PyExceptionInstance_Check(value)) { + instance_class = (PyObject*) Py_TYPE(value); + if (instance_class != type) { + int is_subclass = PyObject_IsSubclass(instance_class, type); + if (!is_subclass) { + instance_class = NULL; + } else if (unlikely(is_subclass == -1)) { + goto bad; + } else { + type = instance_class; + } + } + } + if (!instance_class) { + PyObject *args; + if (!value) + args = PyTuple_New(0); + else if (PyTuple_Check(value)) { + Py_INCREF(value); + args = value; + } else + args = PyTuple_Pack(1, value); + if (!args) + goto bad; + owned_instance = PyObject_Call(type, args, NULL); + Py_DECREF(args); + if (!owned_instance) + goto bad; + value = owned_instance; + if (!PyExceptionInstance_Check(value)) { + PyErr_Format(PyExc_TypeError, + "calling %R should have returned an instance of " + "BaseException, not %R", + type, Py_TYPE(value)); + goto bad; + } + } + } else { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto bad; + } + if (cause) { + PyObject *fixed_cause; + if (cause == Py_None) { + fixed_cause = NULL; + } else if (PyExceptionClass_Check(cause)) { + fixed_cause = PyObject_CallObject(cause, NULL); + if (fixed_cause == NULL) + goto bad; + } else if (PyExceptionInstance_Check(cause)) { + fixed_cause = cause; + Py_INCREF(fixed_cause); + } else { + PyErr_SetString(PyExc_TypeError, + "exception causes must derive from " + "BaseException"); + goto bad; + } + PyException_SetCause(value, fixed_cause); + } + PyErr_SetObject(type, value); + if (tb) { +#if CYTHON_COMPILING_IN_PYPY + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); + Py_INCREF(tb); + PyErr_Restore(tmp_type, tmp_value, tb); + Py_XDECREF(tmp_tb); +#else + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject* tmp_tb = tstate->curexc_traceback; + if (tb != tmp_tb) { + Py_INCREF(tb); + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_tb); + } +#endif + } +bad: + Py_XDECREF(owned_instance); + return; +} +#endif + +/* TypeImport */ + #ifndef __PYX_HAVE_RT_ImportType +#define __PYX_HAVE_RT_ImportType +static PyTypeObject *__Pyx_ImportType(PyObject *module, const char *module_name, const char *class_name, + size_t size, enum __Pyx_ImportType_CheckSize check_size) +{ + PyObject *result = 0; + char warning[200]; + Py_ssize_t basicsize; +#ifdef Py_LIMITED_API + PyObject *py_basicsize; +#endif + result = PyObject_GetAttrString(module, class_name); + if (!result) + goto bad; + if (!PyType_Check(result)) { + PyErr_Format(PyExc_TypeError, + "%.200s.%.200s is not a type object", + module_name, class_name); + goto bad; + } +#ifndef Py_LIMITED_API + basicsize = ((PyTypeObject *)result)->tp_basicsize; +#else + py_basicsize = PyObject_GetAttrString(result, "__basicsize__"); + if (!py_basicsize) + goto bad; + basicsize = PyLong_AsSsize_t(py_basicsize); + Py_DECREF(py_basicsize); + py_basicsize = 0; + if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred()) + goto bad; +#endif + if ((size_t)basicsize < size) { + PyErr_Format(PyExc_ValueError, + "%.200s.%.200s size changed, may indicate binary incompatibility. " + "Expected %zd from C header, got %zd from PyObject", + module_name, class_name, size, basicsize); + goto bad; + } + if (check_size == __Pyx_ImportType_CheckSize_Error && (size_t)basicsize != size) { + PyErr_Format(PyExc_ValueError, + "%.200s.%.200s size changed, may indicate binary incompatibility. " + "Expected %zd from C header, got %zd from PyObject", + module_name, class_name, size, basicsize); + goto bad; + } + else if (check_size == __Pyx_ImportType_CheckSize_Warn && (size_t)basicsize > size) { + PyOS_snprintf(warning, sizeof(warning), + "%s.%s size changed, may indicate binary incompatibility. " + "Expected %zd from C header, got %zd from PyObject", + module_name, class_name, size, basicsize); + if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad; + } + return (PyTypeObject *)result; +bad: + Py_XDECREF(result); + return NULL; +} +#endif + +/* Import */ + static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + PyObject *empty_list = 0; + PyObject *module = 0; + PyObject *global_dict = 0; + PyObject *empty_dict = 0; + PyObject *list; + #if PY_MAJOR_VERSION < 3 + PyObject *py_import; + py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); + if (!py_import) + goto bad; + #endif + if (from_list) + list = from_list; + else { + empty_list = PyList_New(0); + if (!empty_list) + goto bad; + list = empty_list; + } + global_dict = PyModule_GetDict(__pyx_m); + if (!global_dict) + goto bad; + empty_dict = PyDict_New(); + if (!empty_dict) + goto bad; + { + #if PY_MAJOR_VERSION >= 3 + if (level == -1) { + if ((1) && (strchr(__Pyx_MODULE_NAME, '.'))) { + module = PyImport_ImportModuleLevelObject( + name, global_dict, empty_dict, list, 1); + if (!module) { + if (!PyErr_ExceptionMatches(PyExc_ImportError)) + goto bad; + PyErr_Clear(); + } + } + level = 0; + } + #endif + if (!module) { + #if PY_MAJOR_VERSION < 3 + PyObject *py_level = PyInt_FromLong(level); + if (!py_level) + goto bad; + module = PyObject_CallFunctionObjArgs(py_import, + name, global_dict, empty_dict, list, py_level, (PyObject *)NULL); + Py_DECREF(py_level); + #else + module = PyImport_ImportModuleLevelObject( + name, global_dict, empty_dict, list, level); + #endif + } + } +bad: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_import); + #endif + Py_XDECREF(empty_list); + Py_XDECREF(empty_dict); + return module; +} + +/* CLineInTraceback */ + #ifndef CYTHON_CLINE_IN_TRACEBACK +static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tstate, int c_line) { + PyObject *use_cline; + PyObject *ptype, *pvalue, *ptraceback; +#if CYTHON_COMPILING_IN_CPYTHON + PyObject **cython_runtime_dict; +#endif + if (unlikely(!__pyx_cython_runtime)) { + return c_line; + } + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); +#if CYTHON_COMPILING_IN_CPYTHON + cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); + if (likely(cython_runtime_dict)) { + __PYX_PY_DICT_LOOKUP_IF_MODIFIED( + use_cline, *cython_runtime_dict, + __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback)) + } else +#endif + { + PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); + if (use_cline_obj) { + use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; + Py_DECREF(use_cline_obj); + } else { + PyErr_Clear(); + use_cline = NULL; + } + } + if (!use_cline) { + c_line = 0; + PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); + } + else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) { + c_line = 0; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + return c_line; +} +#endif + +/* CodeObjectCache */ + static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { + int start = 0, mid = 0, end = count - 1; + if (end >= 0 && code_line > entries[end].code_line) { + return count; + } + while (start < end) { + mid = start + (end - start) / 2; + if (code_line < entries[mid].code_line) { + end = mid; + } else if (code_line > entries[mid].code_line) { + start = mid + 1; + } else { + return mid; + } + } + if (code_line <= entries[mid].code_line) { + return mid; + } else { + return mid + 1; + } +} +static PyCodeObject *__pyx_find_code_object(int code_line) { + PyCodeObject* code_object; + int pos; + if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { + return NULL; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { + return NULL; + } + code_object = __pyx_code_cache.entries[pos].code_object; + Py_INCREF(code_object); + return code_object; +} +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { + int pos, i; + __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; + if (unlikely(!code_line)) { + return; + } + if (unlikely(!entries)) { + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); + if (likely(entries)) { + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = 64; + __pyx_code_cache.count = 1; + entries[0].code_line = code_line; + entries[0].code_object = code_object; + Py_INCREF(code_object); + } + return; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { + PyCodeObject* tmp = entries[pos].code_object; + entries[pos].code_object = code_object; + Py_DECREF(tmp); + return; + } + if (__pyx_code_cache.count == __pyx_code_cache.max_count) { + int new_max = __pyx_code_cache.max_count + 64; + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( + __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry)); + if (unlikely(!entries)) { + return; + } + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = new_max; + } + for (i=__pyx_code_cache.count; i>pos; i--) { + entries[i] = entries[i-1]; + } + entries[pos].code_line = code_line; + entries[pos].code_object = code_object; + __pyx_code_cache.count++; + Py_INCREF(code_object); +} + +/* AddTraceback */ + #include "compile.h" +#include "frameobject.h" +#include "traceback.h" +static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( + const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyObject *py_srcfile = 0; + PyObject *py_funcname = 0; + #if PY_MAJOR_VERSION < 3 + py_srcfile = PyString_FromString(filename); + #else + py_srcfile = PyUnicode_FromString(filename); + #endif + if (!py_srcfile) goto bad; + if (c_line) { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + #else + py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + #endif + } + else { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromString(funcname); + #else + py_funcname = PyUnicode_FromString(funcname); + #endif + } + if (!py_funcname) goto bad; + py_code = __Pyx_PyCode_New( + 0, + 0, + 0, + 0, + 0, + __pyx_empty_bytes, /*PyObject *code,*/ + __pyx_empty_tuple, /*PyObject *consts,*/ + __pyx_empty_tuple, /*PyObject *names,*/ + __pyx_empty_tuple, /*PyObject *varnames,*/ + __pyx_empty_tuple, /*PyObject *freevars,*/ + __pyx_empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + py_line, + __pyx_empty_bytes /*PyObject *lnotab*/ + ); + Py_DECREF(py_srcfile); + Py_DECREF(py_funcname); + return py_code; +bad: + Py_XDECREF(py_srcfile); + Py_XDECREF(py_funcname); + return NULL; +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyFrameObject *py_frame = 0; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + if (c_line) { + c_line = __Pyx_CLineForTraceback(tstate, c_line); + } + py_code = __pyx_find_code_object(c_line ? -c_line : py_line); + if (!py_code) { + py_code = __Pyx_CreateCodeObjectForTraceback( + funcname, c_line, py_line, filename); + if (!py_code) goto bad; + __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); + } + py_frame = PyFrame_New( + tstate, /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + __Pyx_PyFrame_SetLineNumber(py_frame, py_line); + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} + +#if PY_MAJOR_VERSION < 3 +static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) { + if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags); + PyErr_Format(PyExc_TypeError, "'%.200s' does not have the buffer interface", Py_TYPE(obj)->tp_name); + return -1; +} +static void __Pyx_ReleaseBuffer(Py_buffer *view) { + PyObject *obj = view->obj; + if (!obj) return; + if (PyObject_CheckBuffer(obj)) { + PyBuffer_Release(view); + return; + } + if ((0)) {} + view->obj = NULL; + Py_DECREF(obj); +} +#endif + + + /* CIntFromPyVerify */ + #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) +#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) +#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ + {\ + func_type value = func_value;\ + if (sizeof(target_type) < sizeof(func_type)) {\ + if (unlikely(value != (func_type) (target_type) value)) {\ + func_type zero = 0;\ + if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ + return (target_type) -1;\ + if (is_unsigned && unlikely(value < zero))\ + goto raise_neg_overflow;\ + else\ + goto raise_overflow;\ + }\ + }\ + return (target_type) value;\ + } + +/* Declarations */ + #if CYTHON_CCOMPLEX + #ifdef __cplusplus + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + return ::std::complex< float >(x, y); + } + #else + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + return x + y*(__pyx_t_float_complex)_Complex_I; + } + #endif +#else + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + __pyx_t_float_complex z; + z.real = x; + z.imag = y; + return z; + } +#endif + +/* Arithmetic */ + #if CYTHON_CCOMPLEX +#else + static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + return (a.real == b.real) && (a.imag == b.imag); + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real + b.real; + z.imag = a.imag + b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real - b.real; + z.imag = a.imag - b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real * b.real - a.imag * b.imag; + z.imag = a.real * b.imag + a.imag * b.real; + return z; + } + #if 1 + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + if (b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real); + } else if (fabsf(b.real) >= fabsf(b.imag)) { + if (b.real == 0 && b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.imag); + } else { + float r = b.imag / b.real; + float s = (float)(1.0) / (b.real + b.imag * r); + return __pyx_t_float_complex_from_parts( + (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); + } + } else { + float r = b.real / b.imag; + float s = (float)(1.0) / (b.imag + b.real * r); + return __pyx_t_float_complex_from_parts( + (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); + } + } + #else + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + if (b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real); + } else { + float denom = b.real * b.real + b.imag * b.imag; + return __pyx_t_float_complex_from_parts( + (a.real * b.real + a.imag * b.imag) / denom, + (a.imag * b.real - a.real * b.imag) / denom); + } + } + #endif + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex a) { + __pyx_t_float_complex z; + z.real = -a.real; + z.imag = -a.imag; + return z; + } + static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex a) { + return (a.real == 0) && (a.imag == 0); + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex a) { + __pyx_t_float_complex z; + z.real = a.real; + z.imag = -a.imag; + return z; + } + #if 1 + static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex z) { + #if !defined(HAVE_HYPOT) || defined(_MSC_VER) + return sqrtf(z.real*z.real + z.imag*z.imag); + #else + return hypotf(z.real, z.imag); + #endif + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + float r, lnr, theta, z_r, z_theta; + if (b.imag == 0 && b.real == (int)b.real) { + if (b.real < 0) { + float denom = a.real * a.real + a.imag * a.imag; + a.real = a.real / denom; + a.imag = -a.imag / denom; + b.real = -b.real; + } + switch ((int)b.real) { + case 0: + z.real = 1; + z.imag = 0; + return z; + case 1: + return a; + case 2: + return __Pyx_c_prod_float(a, a); + case 3: + z = __Pyx_c_prod_float(a, a); + return __Pyx_c_prod_float(z, a); + case 4: + z = __Pyx_c_prod_float(a, a); + return __Pyx_c_prod_float(z, z); + } + } + if (a.imag == 0) { + if (a.real == 0) { + return a; + } else if (b.imag == 0) { + z.real = powf(a.real, b.real); + z.imag = 0; + return z; + } else if (a.real > 0) { + r = a.real; + theta = 0; + } else { + r = -a.real; + theta = atan2f(0.0, -1.0); + } + } else { + r = __Pyx_c_abs_float(a); + theta = atan2f(a.imag, a.real); + } + lnr = logf(r); + z_r = expf(lnr * b.real - theta * b.imag); + z_theta = theta * b.real + lnr * b.imag; + z.real = z_r * cosf(z_theta); + z.imag = z_r * sinf(z_theta); + return z; + } + #endif +#endif + +/* Declarations */ + #if CYTHON_CCOMPLEX + #ifdef __cplusplus + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + return ::std::complex< double >(x, y); + } + #else + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + return x + y*(__pyx_t_double_complex)_Complex_I; + } + #endif +#else + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + __pyx_t_double_complex z; + z.real = x; + z.imag = y; + return z; + } +#endif + +/* Arithmetic */ + #if CYTHON_CCOMPLEX +#else + static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + return (a.real == b.real) && (a.imag == b.imag); + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real + b.real; + z.imag = a.imag + b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real - b.real; + z.imag = a.imag - b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real * b.real - a.imag * b.imag; + z.imag = a.real * b.imag + a.imag * b.real; + return z; + } + #if 1 + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + if (b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real); + } else if (fabs(b.real) >= fabs(b.imag)) { + if (b.real == 0 && b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.imag); + } else { + double r = b.imag / b.real; + double s = (double)(1.0) / (b.real + b.imag * r); + return __pyx_t_double_complex_from_parts( + (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); + } + } else { + double r = b.real / b.imag; + double s = (double)(1.0) / (b.imag + b.real * r); + return __pyx_t_double_complex_from_parts( + (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); + } + } + #else + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + if (b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real); + } else { + double denom = b.real * b.real + b.imag * b.imag; + return __pyx_t_double_complex_from_parts( + (a.real * b.real + a.imag * b.imag) / denom, + (a.imag * b.real - a.real * b.imag) / denom); + } + } + #endif + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex a) { + __pyx_t_double_complex z; + z.real = -a.real; + z.imag = -a.imag; + return z; + } + static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex a) { + return (a.real == 0) && (a.imag == 0); + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex a) { + __pyx_t_double_complex z; + z.real = a.real; + z.imag = -a.imag; + return z; + } + #if 1 + static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex z) { + #if !defined(HAVE_HYPOT) || defined(_MSC_VER) + return sqrt(z.real*z.real + z.imag*z.imag); + #else + return hypot(z.real, z.imag); + #endif + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + double r, lnr, theta, z_r, z_theta; + if (b.imag == 0 && b.real == (int)b.real) { + if (b.real < 0) { + double denom = a.real * a.real + a.imag * a.imag; + a.real = a.real / denom; + a.imag = -a.imag / denom; + b.real = -b.real; + } + switch ((int)b.real) { + case 0: + z.real = 1; + z.imag = 0; + return z; + case 1: + return a; + case 2: + return __Pyx_c_prod_double(a, a); + case 3: + z = __Pyx_c_prod_double(a, a); + return __Pyx_c_prod_double(z, a); + case 4: + z = __Pyx_c_prod_double(a, a); + return __Pyx_c_prod_double(z, z); + } + } + if (a.imag == 0) { + if (a.real == 0) { + return a; + } else if (b.imag == 0) { + z.real = pow(a.real, b.real); + z.imag = 0; + return z; + } else if (a.real > 0) { + r = a.real; + theta = 0; + } else { + r = -a.real; + theta = atan2(0.0, -1.0); + } + } else { + r = __Pyx_c_abs_double(a); + theta = atan2(a.imag, a.real); + } + lnr = log(r); + z_r = exp(lnr * b.real - theta * b.imag); + z_theta = theta * b.real + lnr * b.imag; + z.real = z_r * cos(z_theta); + z.imag = z_r * sin(z_theta); + return z; + } + #endif +#endif + +/* CIntFromPy */ + static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const int neg_one = (int) -1, const_zero = (int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(int) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (int) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (int) 0; + case 1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0]) + case 2: + if (8 * sizeof(int) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) { + return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(int) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) { + return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(int) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) { + return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (int) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(int) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (int) 0; + case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, (sdigit) (-(sdigit)digits[0])) + case 1: __PYX_VERIFY_RETURN_INT(int, digit, +digits[0]) + case -2: + if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(int) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(int) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(int) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { + return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + } +#endif + if (sizeof(int) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + int val; + PyObject *v = __Pyx_PyNumber_IntOrLong(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (int) -1; + } + } else { + int val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (int) -1; + val = __Pyx_PyInt_As_int(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to int"); + return (int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int"); + return (int) -1; +} + +/* CIntToPy */ + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(long) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(long) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(long), + little, !is_unsigned); + } +} + +/* CIntFromPy */ + static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(long) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (long) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (long) 0; + case 1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0]) + case 2: + if (8 * sizeof(long) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) { + return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(long) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) { + return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(long) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) { + return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (long) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(long) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (long) 0; + case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, (sdigit) (-(sdigit)digits[0])) + case 1: __PYX_VERIFY_RETURN_INT(long, digit, +digits[0]) + case -2: + if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(long) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(long) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(long) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + } +#endif + if (sizeof(long) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + long val; + PyObject *v = __Pyx_PyNumber_IntOrLong(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (long) -1; + } + } else { + long val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (long) -1; + val = __Pyx_PyInt_As_long(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to long"); + return (long) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to long"); + return (long) -1; +} + +/* FastTypeChecks */ + #if CYTHON_COMPILING_IN_CPYTHON +static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { + while (a) { + a = a->tp_base; + if (a == b) + return 1; + } + return b == &PyBaseObject_Type; +} +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (a == b) return 1; + mro = a->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(a, b); +} +#if PY_MAJOR_VERSION == 2 +static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { + PyObject *exception, *value, *tb; + int res; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&exception, &value, &tb); + res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + if (!res) { + res = PyObject_IsSubclass(err, exc_type2); + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + } + __Pyx_ErrRestore(exception, value, tb); + return res; +} +#else +static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { + int res = exc_type1 ? __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type1) : 0; + if (!res) { + res = __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); + } + return res; +} +#endif +static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + assert(PyExceptionClass_Check(exc_type)); + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; ip) { + #if PY_MAJOR_VERSION < 3 + if (t->is_unicode) { + *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); + } else if (t->intern) { + *t->p = PyString_InternFromString(t->s); + } else { + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + } + #else + if (t->is_unicode | t->is_str) { + if (t->intern) { + *t->p = PyUnicode_InternFromString(t->s); + } else if (t->encoding) { + *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL); + } else { + *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); + } + } else { + *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1); + } + #endif + if (!*t->p) + return -1; + if (PyObject_Hash(*t->p) == -1) + return -1; + ++t; + } + return 0; +} + +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { + return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str)); +} +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { + Py_ssize_t ignore; + return __Pyx_PyObject_AsStringAndSize(o, &ignore); +} +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#if !CYTHON_PEP393_ENABLED +static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + char* defenc_c; + PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); + if (!defenc) return NULL; + defenc_c = PyBytes_AS_STRING(defenc); +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + { + char* end = defenc_c + PyBytes_GET_SIZE(defenc); + char* c; + for (c = defenc_c; c < end; c++) { + if ((unsigned char) (*c) >= 128) { + PyUnicode_AsASCIIString(o); + return NULL; + } + } + } +#endif + *length = PyBytes_GET_SIZE(defenc); + return defenc_c; +} +#else +static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + if (likely(PyUnicode_IS_ASCII(o))) { + *length = PyUnicode_GET_LENGTH(o); + return PyUnicode_AsUTF8(o); + } else { + PyUnicode_AsASCIIString(o); + return NULL; + } +#else + return PyUnicode_AsUTF8AndSize(o, length); +#endif +} +#endif +#endif +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT + if ( +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + __Pyx_sys_getdefaultencoding_not_ascii && +#endif + PyUnicode_Check(o)) { + return __Pyx_PyUnicode_AsStringAndSize(o, length); + } else +#endif +#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) + if (PyByteArray_Check(o)) { + *length = PyByteArray_GET_SIZE(o); + return PyByteArray_AS_STRING(o); + } else +#endif + { + char* result; + int r = PyBytes_AsStringAndSize(o, &result, length); + if (unlikely(r < 0)) { + return NULL; + } else { + return result; + } + } +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { + int is_true = x == Py_True; + if (is_true | (x == Py_False) | (x == Py_None)) return is_true; + else return PyObject_IsTrue(x); +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) { + int retval; + if (unlikely(!x)) return -1; + retval = __Pyx_PyObject_IsTrue(x); + Py_DECREF(x); + return retval; +} +static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { +#if PY_MAJOR_VERSION >= 3 + if (PyLong_Check(result)) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "__int__ returned non-int (type %.200s). " + "The ability to return an instance of a strict subclass of int " + "is deprecated, and may be removed in a future version of Python.", + Py_TYPE(result)->tp_name)) { + Py_DECREF(result); + return NULL; + } + return result; + } +#endif + PyErr_Format(PyExc_TypeError, + "__%.4s__ returned non-%.4s (type %.200s)", + type_name, type_name, Py_TYPE(result)->tp_name); + Py_DECREF(result); + return NULL; +} +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { +#if CYTHON_USE_TYPE_SLOTS + PyNumberMethods *m; +#endif + const char *name = NULL; + PyObject *res = NULL; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x) || PyLong_Check(x))) +#else + if (likely(PyLong_Check(x))) +#endif + return __Pyx_NewRef(x); +#if CYTHON_USE_TYPE_SLOTS + m = Py_TYPE(x)->tp_as_number; + #if PY_MAJOR_VERSION < 3 + if (m && m->nb_int) { + name = "int"; + res = m->nb_int(x); + } + else if (m && m->nb_long) { + name = "long"; + res = m->nb_long(x); + } + #else + if (likely(m && m->nb_int)) { + name = "int"; + res = m->nb_int(x); + } + #endif +#else + if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { + res = PyNumber_Int(x); + } +#endif + if (likely(res)) { +#if PY_MAJOR_VERSION < 3 + if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { +#else + if (unlikely(!PyLong_CheckExact(res))) { +#endif + return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); + } + } + else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + } + return res; +} +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { + Py_ssize_t ival; + PyObject *x; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(b))) { + if (sizeof(Py_ssize_t) >= sizeof(long)) + return PyInt_AS_LONG(b); + else + return PyInt_AsSsize_t(b); + } +#endif + if (likely(PyLong_CheckExact(b))) { + #if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)b)->ob_digit; + const Py_ssize_t size = Py_SIZE(b); + if (likely(__Pyx_sst_abs(size) <= 1)) { + ival = likely(size) ? digits[0] : 0; + if (size == -1) ival = -ival; + return ival; + } else { + switch (size) { + case 2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + } + } + #endif + return PyLong_AsSsize_t(b); + } + x = PyNumber_Index(b); + if (!x) return -1; + ival = PyInt_AsSsize_t(x); + Py_DECREF(x); + return ival; +} +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) { + return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False); +} +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { + return PyInt_FromSize_t(ival); +} + + +#endif /* Py_PYTHON_H */ diff --git a/mmcv/mmcv/video/optflow_warp/flow_warp_module.pyx b/mmcv/mmcv/video/optflow_warp/flow_warp_module.pyx new file mode 100644 index 0000000..f25f949 --- /dev/null +++ b/mmcv/mmcv/video/optflow_warp/flow_warp_module.pyx @@ -0,0 +1,29 @@ +# Copyright (c) Open-MMLab. All rights reserved. +# cython: language_level=3 +STUFF = "Hi" + +import numpy as np +cimport numpy as np + +np.import_array() + +cdef extern from "flow_warp.hpp": + void FlowWarp(double* img, double* flow1, double* out, const int height, const int width, const int channels, const int filling_value, const int interpolateMode) + +def flow_warp_c(np.ndarray[double, ndim=3, mode="c"] img_array not None, + np.ndarray[double, ndim=3, mode="c"] flow_array not None, + int filling_value=0, + int interpolate_mode=1): + + out_array = np.zeros_like(img_array) + + FlowWarp( np.PyArray_DATA(img_array), + np.PyArray_DATA(flow_array), + np.PyArray_DATA(out_array), + out_array.shape[0], + out_array.shape[1], + out_array.shape[2], + filling_value, + interpolate_mode) + + return out_array diff --git a/mmcv/mmcv/video/processing.py b/mmcv/mmcv/video/processing.py new file mode 100644 index 0000000..8331572 --- /dev/null +++ b/mmcv/mmcv/video/processing.py @@ -0,0 +1,159 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import os +import os.path as osp +import subprocess +import tempfile + +from mmcv.utils import requires_executable + + +@requires_executable('ffmpeg') +def convert_video(in_file, + out_file, + print_cmd=False, + pre_options='', + **kwargs): + """Convert a video with ffmpeg. + + This provides a general api to ffmpeg, the executed command is:: + + `ffmpeg -y -i ` + + Options(kwargs) are mapped to ffmpeg commands with the following rules: + + - key=val: "-key val" + - key=True: "-key" + - key=False: "" + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + pre_options (str): Options appears before "-i ". + print_cmd (bool): Whether to print the final ffmpeg command. + """ + options = [] + for k, v in kwargs.items(): + if isinstance(v, bool): + if v: + options.append(f'-{k}') + elif k == 'log_level': + assert v in [ + 'quiet', 'panic', 'fatal', 'error', 'warning', 'info', + 'verbose', 'debug', 'trace' + ] + options.append(f'-loglevel {v}') + else: + options.append(f'-{k} {v}') + cmd = f'ffmpeg -y {pre_options} -i {in_file} {" ".join(options)} ' \ + f'{out_file}' + if print_cmd: + print(cmd) + subprocess.call(cmd, shell=True) + + +@requires_executable('ffmpeg') +def resize_video(in_file, + out_file, + size=None, + ratio=None, + keep_ar=False, + log_level='info', + print_cmd=False): + """Resize a video. + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + size (tuple): Expected size (w, h), eg, (320, 240) or (320, -1). + ratio (tuple or float): Expected resize ratio, (2, 0.5) means + (w*2, h*0.5). + keep_ar (bool): Whether to keep original aspect ratio. + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + if size is None and ratio is None: + raise ValueError('expected size or ratio must be specified') + if size is not None and ratio is not None: + raise ValueError('size and ratio cannot be specified at the same time') + options = {'log_level': log_level} + if size: + if not keep_ar: + options['vf'] = f'scale={size[0]}:{size[1]}' + else: + options['vf'] = f'scale=w={size[0]}:h={size[1]}:' \ + 'force_original_aspect_ratio=decrease' + else: + if not isinstance(ratio, tuple): + ratio = (ratio, ratio) + options['vf'] = f'scale="trunc(iw*{ratio[0]}):trunc(ih*{ratio[1]})"' + convert_video(in_file, out_file, print_cmd, **options) + + +@requires_executable('ffmpeg') +def cut_video(in_file, + out_file, + start=None, + end=None, + vcodec=None, + acodec=None, + log_level='info', + print_cmd=False): + """Cut a clip from a video. + + Args: + in_file (str): Input video filename. + out_file (str): Output video filename. + start (None or float): Start time (in seconds). + end (None or float): End time (in seconds). + vcodec (None or str): Output video codec, None for unchanged. + acodec (None or str): Output audio codec, None for unchanged. + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + options = {'log_level': log_level} + if vcodec is None: + options['vcodec'] = 'copy' + if acodec is None: + options['acodec'] = 'copy' + if start: + options['ss'] = start + else: + start = 0 + if end: + options['t'] = end - start + convert_video(in_file, out_file, print_cmd, **options) + + +@requires_executable('ffmpeg') +def concat_video(video_list, + out_file, + vcodec=None, + acodec=None, + log_level='info', + print_cmd=False): + """Concatenate multiple videos into a single one. + + Args: + video_list (list): A list of video filenames + out_file (str): Output video filename + vcodec (None or str): Output video codec, None for unchanged + acodec (None or str): Output audio codec, None for unchanged + log_level (str): Logging level of ffmpeg. + print_cmd (bool): Whether to print the final ffmpeg command. + """ + _, tmp_filename = tempfile.mkstemp(suffix='.txt', text=True) + with open(tmp_filename, 'w') as f: + for filename in video_list: + f.write(f'file {osp.abspath(filename)}\n') + options = {'log_level': log_level} + if vcodec is None: + options['vcodec'] = 'copy' + if acodec is None: + options['acodec'] = 'copy' + convert_video( + tmp_filename, + out_file, + print_cmd, + pre_options='-f concat -safe 0', + **options) + os.remove(tmp_filename) diff --git a/mmcv/mmcv/visualization/__init__.py b/mmcv/mmcv/visualization/__init__.py new file mode 100644 index 0000000..38b857e --- /dev/null +++ b/mmcv/mmcv/visualization/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from .color import Color, color_val +from .image import imshow, imshow_bboxes, imshow_det_bboxes +from .optflow import flow2rgb, flowshow, make_color_wheel + +__all__ = [ + 'Color', 'color_val', 'imshow', 'imshow_bboxes', 'imshow_det_bboxes', + 'flowshow', 'flow2rgb', 'make_color_wheel' +] diff --git a/mmcv/mmcv/visualization/color.py b/mmcv/mmcv/visualization/color.py new file mode 100644 index 0000000..44f465e --- /dev/null +++ b/mmcv/mmcv/visualization/color.py @@ -0,0 +1,51 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from enum import Enum + +import numpy as np + +from mmcv.utils import is_str + + +class Color(Enum): + """An enum that defines common colors. + + Contains red, green, blue, cyan, yellow, magenta, white and black. + """ + red = (0, 0, 255) + green = (0, 255, 0) + blue = (255, 0, 0) + cyan = (255, 255, 0) + yellow = (0, 255, 255) + magenta = (255, 0, 255) + white = (255, 255, 255) + black = (0, 0, 0) + + +def color_val(color): + """Convert various input to color tuples. + + Args: + color (:obj:`Color`/str/tuple/int/ndarray): Color inputs + + Returns: + tuple[int]: A tuple of 3 integers indicating BGR channels. + """ + if is_str(color): + return Color[color].value + elif isinstance(color, Color): + return color.value + elif isinstance(color, tuple): + assert len(color) == 3 + for channel in color: + assert 0 <= channel <= 255 + return color + elif isinstance(color, int): + assert 0 <= color <= 255 + return color, color, color + elif isinstance(color, np.ndarray): + assert color.ndim == 1 and color.size == 3 + assert np.all((color >= 0) & (color <= 255)) + color = color.astype(np.uint8) + return tuple(color) + else: + raise TypeError(f'Invalid type for color: {type(color)}') diff --git a/mmcv/mmcv/visualization/image.py b/mmcv/mmcv/visualization/image.py new file mode 100644 index 0000000..05ad91e --- /dev/null +++ b/mmcv/mmcv/visualization/image.py @@ -0,0 +1,150 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import cv2 +import numpy as np + +from mmcv.image import imread, imwrite +from .color import color_val + + +def imshow(img, win_name='', wait_time=0): + """Show an image. + + Args: + img (str or ndarray): The image to be displayed. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + """ + cv2.imshow(win_name, imread(img)) + if wait_time == 0: # prevent from hangning if windows was closed + while True: + ret = cv2.waitKey(1) + + closed = cv2.getWindowProperty(win_name, cv2.WND_PROP_VISIBLE) < 1 + # if user closed window or if some key pressed + if closed or ret != -1: + break + else: + ret = cv2.waitKey(wait_time) + + +def imshow_bboxes(img, + bboxes, + colors='green', + top_k=-1, + thickness=1, + show=True, + win_name='', + wait_time=0, + out_file=None): + """Draw bboxes on an image. + + Args: + img (str or ndarray): The image to be displayed. + bboxes (list or ndarray): A list of ndarray of shape (k, 4). + colors (list[str or tuple or Color]): A list of colors. + top_k (int): Plot the first k bboxes only if set positive. + thickness (int): Thickness of lines. + show (bool): Whether to show the image. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + out_file (str, optional): The filename to write the image. + + Returns: + ndarray: The image with bboxes drawn on it. + """ + img = imread(img) + + if isinstance(bboxes, np.ndarray): + bboxes = [bboxes] + if not isinstance(colors, list): + colors = [colors for _ in range(len(bboxes))] + colors = [color_val(c) for c in colors] + assert len(bboxes) == len(colors) + + for i, _bboxes in enumerate(bboxes): + _bboxes = _bboxes.astype(np.int32) + if top_k <= 0: + _top_k = _bboxes.shape[0] + else: + _top_k = min(top_k, _bboxes.shape[0]) + for j in range(_top_k): + left_top = (_bboxes[j, 0], _bboxes[j, 1]) + right_bottom = (_bboxes[j, 2], _bboxes[j, 3]) + cv2.rectangle( + img, left_top, right_bottom, colors[i], thickness=thickness) + + if show: + imshow(img, win_name, wait_time) + if out_file is not None: + imwrite(img, out_file) + return img + + +def imshow_det_bboxes(img, + bboxes, + labels, + class_names=None, + score_thr=0, + bbox_color='green', + text_color='green', + thickness=1, + font_scale=0.5, + show=True, + win_name='', + wait_time=0, + out_file=None): + """Draw bboxes and class labels (with scores) on an image. + + Args: + img (str or ndarray): The image to be displayed. + bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or + (n, 5). + labels (ndarray): Labels of bboxes. + class_names (list[str]): Names of each classes. + score_thr (float): Minimum score of bboxes to be shown. + bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. + text_color (str or tuple or :obj:`Color`): Color of texts. + thickness (int): Thickness of lines. + font_scale (float): Font scales of texts. + show (bool): Whether to show the image. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + out_file (str or None): The filename to write the image. + + Returns: + ndarray: The image with bboxes drawn on it. + """ + assert bboxes.ndim == 2 + assert labels.ndim == 1 + assert bboxes.shape[0] == labels.shape[0] + assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5 + img = imread(img) + + if score_thr > 0: + assert bboxes.shape[1] == 5 + scores = bboxes[:, -1] + inds = scores > score_thr + bboxes = bboxes[inds, :] + labels = labels[inds] + + bbox_color = color_val(bbox_color) + text_color = color_val(text_color) + img = np.ascontiguousarray(img) + for bbox, label in zip(bboxes, labels): + bbox_int = bbox.astype(np.int32) + left_top = (bbox_int[0], bbox_int[1]) + right_bottom = (bbox_int[2], bbox_int[3]) + cv2.rectangle( + img, left_top, right_bottom, bbox_color, thickness=thickness) + label_text = class_names[ + label] if class_names is not None else f'cls {label}' + if len(bbox) > 4: + label_text += f'|{bbox[-1]:.02f}' + cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2), + cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) + + if show: + imshow(img, win_name, wait_time) + if out_file is not None: + imwrite(img, out_file) + return img diff --git a/mmcv/mmcv/visualization/optflow.py b/mmcv/mmcv/visualization/optflow.py new file mode 100644 index 0000000..f8792cb --- /dev/null +++ b/mmcv/mmcv/visualization/optflow.py @@ -0,0 +1,112 @@ +# Copyright (c) Open-MMLab. All rights reserved. +from __future__ import division + +import numpy as np + +from mmcv.image import rgb2bgr +from mmcv.video import flowread +from .image import imshow + + +def flowshow(flow, win_name='', wait_time=0): + """Show optical flow. + + Args: + flow (ndarray or str): The optical flow to be displayed. + win_name (str): The window name. + wait_time (int): Value of waitKey param. + """ + flow = flowread(flow) + flow_img = flow2rgb(flow) + imshow(rgb2bgr(flow_img), win_name, wait_time) + + +def flow2rgb(flow, color_wheel=None, unknown_thr=1e6): + """Convert flow map to RGB image. + + Args: + flow (ndarray): Array of optical flow. + color_wheel (ndarray or None): Color wheel used to map flow field to + RGB colorspace. Default color wheel will be used if not specified. + unknown_thr (str): Values above this threshold will be marked as + unknown and thus ignored. + + Returns: + ndarray: RGB image that can be visualized. + """ + assert flow.ndim == 3 and flow.shape[-1] == 2 + if color_wheel is None: + color_wheel = make_color_wheel() + assert color_wheel.ndim == 2 and color_wheel.shape[1] == 3 + num_bins = color_wheel.shape[0] + + dx = flow[:, :, 0].copy() + dy = flow[:, :, 1].copy() + + ignore_inds = ( + np.isnan(dx) | np.isnan(dy) | (np.abs(dx) > unknown_thr) | + (np.abs(dy) > unknown_thr)) + dx[ignore_inds] = 0 + dy[ignore_inds] = 0 + + rad = np.sqrt(dx**2 + dy**2) + if np.any(rad > np.finfo(float).eps): + max_rad = np.max(rad) + dx /= max_rad + dy /= max_rad + + rad = np.sqrt(dx**2 + dy**2) + angle = np.arctan2(-dy, -dx) / np.pi + + bin_real = (angle + 1) / 2 * (num_bins - 1) + bin_left = np.floor(bin_real).astype(int) + bin_right = (bin_left + 1) % num_bins + w = (bin_real - bin_left.astype(np.float32))[..., None] + flow_img = (1 - + w) * color_wheel[bin_left, :] + w * color_wheel[bin_right, :] + small_ind = rad <= 1 + flow_img[small_ind] = 1 - rad[small_ind, None] * (1 - flow_img[small_ind]) + flow_img[np.logical_not(small_ind)] *= 0.75 + + flow_img[ignore_inds, :] = 0 + + return flow_img + + +def make_color_wheel(bins=None): + """Build a color wheel. + + Args: + bins(list or tuple, optional): Specify the number of bins for each + color range, corresponding to six ranges: red -> yellow, + yellow -> green, green -> cyan, cyan -> blue, blue -> magenta, + magenta -> red. [15, 6, 4, 11, 13, 6] is used for default + (see Middlebury). + + Returns: + ndarray: Color wheel of shape (total_bins, 3). + """ + if bins is None: + bins = [15, 6, 4, 11, 13, 6] + assert len(bins) == 6 + + RY, YG, GC, CB, BM, MR = tuple(bins) + + ry = [1, np.arange(RY) / RY, 0] + yg = [1 - np.arange(YG) / YG, 1, 0] + gc = [0, 1, np.arange(GC) / GC] + cb = [0, 1 - np.arange(CB) / CB, 1] + bm = [np.arange(BM) / BM, 0, 1] + mr = [1, 0, 1 - np.arange(MR) / MR] + + num_bins = RY + YG + GC + CB + BM + MR + + color_wheel = np.zeros((3, num_bins), dtype=np.float32) + + col = 0 + for i, color in enumerate([ry, yg, gc, cb, bm, mr]): + for j in range(3): + color_wheel[j, col:col + bins[i]] = color[j] + col += bins[i] + + return color_wheel.T