From 53839b51a7671eeb3fb44d479d541cf3a0f2dd45 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 21 Oct 2020 14:49:52 +0300 Subject: [PATCH 001/147] RDMA/bnxt_re: Set queue pair state when being queried The API for ib_query_qp requires the driver to set cur_qp_state on return, add the missing set. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://lore.kernel.org/r/20201021114952.38876-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Acked-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index cf3db96283976b..f9c999d5ba28ef 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2078,6 +2078,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, goto out; } qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state); + qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state); qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0; qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access); qp_attr->pkey_index = qplib_qp->pkey_index; From a5c29a262ebe4c5b85643bac833b9e7e00ebe8a4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:01 -0300 Subject: [PATCH 002/147] RDMA/cxgb4: Remove MW support This driver never enabled IB_USER_VERBS_CMD_ALLOC_MW so memory windows were not usable from userspace. The kernel side was removed long ago. Drop this dead code. Fixes: feb7c1e38bcc ("IB: remove in-kernel support for memory windows") Link: https://lore.kernel.org/r/1-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 - drivers/infiniband/hw/cxgb4/mem.c | 84 -------------------------- drivers/infiniband/hw/cxgb4/provider.c | 2 - 3 files changed, 88 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index a27899402f59a5..f85477f3b037d2 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -983,9 +983,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -int c4iw_dealloc_mw(struct ib_mw *mw); void c4iw_dealloc(struct uld_ctx *ctx); -int c4iw_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 42234df896fb2c..a2c71a1d93d5a8 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -365,22 +365,6 @@ static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, pbl_size, pbl_addr, skb, wr_waitp); } -static int allocate_window(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, - struct c4iw_wr_wait *wr_waitp) -{ - *stag = T4_STAG_UNSET; - return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, - 0UL, 0, 0, 0, 0, NULL, wr_waitp); -} - -static int deallocate_window(struct c4iw_rdev *rdev, u32 stag, - struct sk_buff *skb, - struct c4iw_wr_wait *wr_waitp) -{ - return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, - 0, skb, wr_waitp); -} - static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr, struct c4iw_wr_wait *wr_waitp) @@ -611,74 +595,6 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(err); } -int c4iw_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) -{ - struct c4iw_mw *mhp = to_c4iw_mw(ibmw); - struct c4iw_dev *rhp; - struct c4iw_pd *php; - u32 mmid; - u32 stag = 0; - int ret; - - if (ibmw->type != IB_MW_TYPE_1) - return -EINVAL; - - php = to_c4iw_pd(ibmw->pd); - rhp = php->rhp; - mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); - if (!mhp->wr_waitp) - return -ENOMEM; - - mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); - if (!mhp->dereg_skb) { - ret = -ENOMEM; - goto free_wr_wait; - } - - ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp); - if (ret) - goto free_skb; - - mhp->rhp = rhp; - mhp->attr.pdid = php->pdid; - mhp->attr.type = FW_RI_STAG_MW; - mhp->attr.stag = stag; - mmid = (stag) >> 8; - ibmw->rkey = stag; - if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { - ret = -ENOMEM; - goto dealloc_win; - } - pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); - return 0; - -dealloc_win: - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, - mhp->wr_waitp); -free_skb: - kfree_skb(mhp->dereg_skb); -free_wr_wait: - c4iw_put_wr_wait(mhp->wr_waitp); - return ret; -} - -int c4iw_dealloc_mw(struct ib_mw *mw) -{ - struct c4iw_dev *rhp; - struct c4iw_mw *mhp; - u32 mmid; - - mhp = to_c4iw_mw(mw); - rhp = mhp->rhp; - mmid = (mw->rkey) >> 8; - xa_erase_irq(&rhp->mrs, mmid); - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, - mhp->wr_waitp); - kfree_skb(mhp->dereg_skb); - c4iw_put_wr_wait(mhp->wr_waitp); - return 0; -} - struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 8138c57a1e43bc..b7fe4b47357cab 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -456,13 +456,11 @@ static const struct ib_device_ops c4iw_dev_ops = { .alloc_hw_stats = c4iw_alloc_stats, .alloc_mr = c4iw_alloc_mr, - .alloc_mw = c4iw_alloc_mw, .alloc_pd = c4iw_allocate_pd, .alloc_ucontext = c4iw_alloc_ucontext, .create_cq = c4iw_create_cq, .create_qp = c4iw_create_qp, .create_srq = c4iw_create_srq, - .dealloc_mw = c4iw_dealloc_mw, .dealloc_pd = c4iw_deallocate_pd, .dealloc_ucontext = c4iw_dealloc_ucontext, .dereg_mr = c4iw_dereg_mr, From b8e3130dd96b7b2d6d92e62dcd1515af30212fe2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:02 -0300 Subject: [PATCH 003/147] RDMA: Remove uverbs_ex_cmd_mask values that are linked to functions Since a while now the uverbs layer checks if the driver implements a function before allowing the ucmd to proceed. This largely obsoletes the cmd_mask stuff, but there is some tricky bits in drivers preventing it from being removed. Remove the easy elements of uverbs_ex_cmd_mask by pre-setting them in the core code. These are triggered soley based on the related ops function pointer. query_device_ex is not triggered based on an op, but all drivers already implement something compatible with the extension, so enable it globally too. Link: https://lore.kernel.org/r/2-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 11 +++++++++++ drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/hw/efa/efa_main.c | 3 --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 7 ------- drivers/infiniband/hw/hns/hns_roce_main.c | 2 -- drivers/infiniband/hw/mlx4/main.c | 14 +------------- drivers/infiniband/hw/mlx5/main.c | 14 ++------------ 7 files changed, 15 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a3b1fc84cdcab9..6ae69e76663bf6 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -600,6 +600,17 @@ struct ib_device *_ib_alloc_device(size_t size) init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); + device->uverbs_ex_cmd_mask = + BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_WQ) | + BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | + BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL) | + BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_CQ) | + BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + BIT_ULL(IB_USER_VERBS_EX_CMD_QUERY_DEVICE); + return device; } EXPORT_SYMBOL(_ib_alloc_device); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 418d133a8fb080..2f3f9b87922e92 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3753,7 +3753,7 @@ const struct uapi_definition uverbs_def_write_intf[] = { IB_USER_VERBS_EX_CMD_MODIFY_CQ, ib_uverbs_ex_modify_cq, UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq), - UAPI_DEF_METHOD_NEEDS_FN(create_cq))), + UAPI_DEF_METHOD_NEEDS_FN(modify_cq))), DECLARE_UVERBS_OBJECT( UVERBS_OBJECT_DEVICE, diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 6faed3a81e0878..ff50b18cdd8f83 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -326,9 +326,6 @@ static int efa_ib_device_add(struct efa_dev *dev) (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - dev->ibdev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); - ib_set_device_ops(&dev->ibdev, &efa_dev_ops); err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5f4d8a32ed6d9d..b3d5ba8ef439a3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2062,11 +2062,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); } -static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) -{ - return -EOPNOTSUPP; -} - static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { @@ -4347,7 +4342,6 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev) static const struct ib_device_ops hns_roce_v1_dev_ops = { .destroy_qp = hns_roce_v1_destroy_qp, - .modify_cq = hns_roce_v1_modify_cq, .poll_cq = hns_roce_v1_poll_cq, .post_recv = hns_roce_v1_post_recv, .post_send = hns_roce_v1_post_send, @@ -4367,7 +4361,6 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .set_mtu = hns_roce_v1_set_mtu, .write_mtpt = hns_roce_v1_write_mtpt, .write_cqc = hns_roce_v1_write_cqc, - .modify_cq = hns_roce_v1_modify_cq, .clear_hem = hns_roce_v1_clear_hem, .modify_qp = hns_roce_v1_modify_qp, .query_qp = hns_roce_v1_query_qp, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index afeffafc59f906..f1aa8d202b6961 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -507,8 +507,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); - ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index cd0fba6b096428..451b248560611d 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2685,8 +2685,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); @@ -2694,15 +2692,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == - IB_LINK_LAYER_ETHERNET))) { - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + IB_LINK_LAYER_ETHERNET))) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); - } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { @@ -2721,9 +2712,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (check_flow_steering_support(dev)) { ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 89e04ca62ae0fa..383cd1e52aae67 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4166,14 +4166,10 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); - dev->ib_dev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | + dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) @@ -4276,12 +4272,6 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); port_num = mlx5_core_native_port_num(dev->mdev) - 1; From c074bb1e30f9b51032245300798613a290e42f25 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:03 -0300 Subject: [PATCH 004/147] RDMA: Remove elements in uverbs_cmd_mask that all drivers set This is a step toward eliminating uverbs_cmd_mask. Preset this list in the core code. Only the op reg_user_mr wasn't already being required from the drivers. Link: https://lore.kernel.org/r/3-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 16 ++++++++++++++++ drivers/infiniband/hw/bnxt_re/main.c | 16 +--------------- drivers/infiniband/hw/cxgb4/provider.c | 16 +--------------- drivers/infiniband/hw/efa/efa_main.c | 16 +--------------- drivers/infiniband/hw/hns/hns_roce_main.c | 15 --------------- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 16 +--------------- drivers/infiniband/hw/mlx4/main.c | 16 +--------------- drivers/infiniband/hw/mlx5/main.c | 16 +--------------- drivers/infiniband/hw/mthca/mthca_provider.c | 16 +--------------- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 16 +--------------- drivers/infiniband/hw/qedr/main.c | 15 +-------------- drivers/infiniband/hw/usnic/usnic_ib_main.c | 16 +--------------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 16 +--------------- drivers/infiniband/sw/rdmavt/vt.c | 16 +--------------- drivers/infiniband/sw/rxe/rxe_verbs.c | 17 ++--------------- drivers/infiniband/sw/siw/siw_main.c | 16 +--------------- 16 files changed, 31 insertions(+), 224 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 6ae69e76663bf6..f99548de881fc1 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -600,6 +600,22 @@ struct ib_device *_ib_alloc_device(size_t size) init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); + device->uverbs_cmd_mask = + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | + BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_REG_MR); + device->uverbs_ex_cmd_mask = BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_FLOW) | BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 04621ba8fa7611..46dac956eead47 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -702,23 +702,9 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; /* User space */ - ibdev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | + ibdev->uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index b7fe4b47357cab..3e6ec35f748349 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -531,23 +531,9 @@ void c4iw_register_device(struct work_struct *work) if (fastreg_support) dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.local_dma_lkey = 0; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + dev->ibdev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index ff50b18cdd8f83..843b44a4887be1 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -308,21 +308,7 @@ static int efa_ib_device_add(struct efa_dev *dev) dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &pdev->dev; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + dev->ibdev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f1aa8d202b6961..dc64582a2022f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -491,21 +491,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->phys_port_cnt = hr_dev->caps.num_ports; ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; - ib_dev->uverbs_cmd_mask = - (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ULL << IB_USER_VERBS_CMD_REG_MR) | - (1ULL << IB_USER_VERBS_CMD_DEREG_MR) | - (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ULL << IB_USER_VERBS_CMD_CREATE_QP) | - (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 581ecbadf5861f..76f7d8bb3b6059 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2684,25 +2684,11 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.node_type = RDMA_NODE_RNIC; ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr); - iwibdev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + iwibdev->ibdev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_RECV) | (1ull << IB_USER_VERBS_CMD_POST_SEND); iwibdev->ibdev.phys_port_cnt = 1; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 451b248560611d..ad064a23dd2fd7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2657,23 +2657,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; - ibdev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | + ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 383cd1e52aae67..656fffb09ac1b4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4139,25 +4139,11 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; int err; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index c4d9cdc4ee97e8..86840afcfba63a 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1158,22 +1158,8 @@ int mthca_register_device(struct mthca_dev *dev) if (ret) return ret; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); dev->ib_dev.node_type = RDMA_NODE_IB_CA; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 9b96661a71435b..e6611a7e8eeb26 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -204,23 +204,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask = - OCRDMA_UVERBS(GET_CONTEXT) | - OCRDMA_UVERBS(QUERY_DEVICE) | - OCRDMA_UVERBS(QUERY_PORT) | - OCRDMA_UVERBS(ALLOC_PD) | - OCRDMA_UVERBS(DEALLOC_PD) | - OCRDMA_UVERBS(REG_MR) | - OCRDMA_UVERBS(DEREG_MR) | - OCRDMA_UVERBS(CREATE_COMP_CHANNEL) | - OCRDMA_UVERBS(CREATE_CQ) | + dev->ibdev.uverbs_cmd_mask |= OCRDMA_UVERBS(RESIZE_CQ) | - OCRDMA_UVERBS(DESTROY_CQ) | OCRDMA_UVERBS(REQ_NOTIFY_CQ) | - OCRDMA_UVERBS(CREATE_QP) | - OCRDMA_UVERBS(MODIFY_QP) | - OCRDMA_UVERBS(QUERY_QP) | - OCRDMA_UVERBS(DESTROY_QP) | OCRDMA_UVERBS(POLL_CQ) | OCRDMA_UVERBS(POST_SEND) | OCRDMA_UVERBS(POST_RECV); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 967641662b24a7..02058f1516122f 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -249,27 +249,14 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.node_guid = dev->attr.node_guid; memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) | - QEDR_UVERBS(QUERY_DEVICE) | - QEDR_UVERBS(QUERY_PORT) | - QEDR_UVERBS(ALLOC_PD) | - QEDR_UVERBS(DEALLOC_PD) | - QEDR_UVERBS(CREATE_COMP_CHANNEL) | - QEDR_UVERBS(CREATE_CQ) | + dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(RESIZE_CQ) | - QEDR_UVERBS(DESTROY_CQ) | QEDR_UVERBS(REQ_NOTIFY_CQ) | - QEDR_UVERBS(CREATE_QP) | - QEDR_UVERBS(MODIFY_QP) | - QEDR_UVERBS(QUERY_QP) | - QEDR_UVERBS(DESTROY_QP) | QEDR_UVERBS(CREATE_SRQ) | QEDR_UVERBS(DESTROY_SRQ) | QEDR_UVERBS(QUERY_SRQ) | QEDR_UVERBS(MODIFY_SRQ) | QEDR_UVERBS(POST_SRQ_RECV) | - QEDR_UVERBS(REG_MR) | - QEDR_UVERBS(DEREG_MR) | QEDR_UVERBS(POLL_CQ) | QEDR_UVERBS(POST_SEND) | QEDR_UVERBS(POST_RECV); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index aa2e65fc5cd652..7db5c1c7d06d2e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -398,21 +398,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; us_ibdev->ib_dev.dev.parent = &dev->dev; - us_ibdev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + us_ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index fa2a3fa0c3e4e1..bfcffbf2185726 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -205,23 +205,9 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->flags = 0; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV) | (1ull << IB_USER_VERBS_CMD_CREATE_AH) | diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 52218684ad4ab0..ef0d131e8f0688 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -593,28 +593,14 @@ int rvt_register_device(struct rvt_dev_info *rdi) * exactly which functions rdmavt supports, nor do they know the ABI * version, so we do all of this sort of stuff here. */ - rdi->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + rdi->ibdev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | (1ull << IB_USER_VERBS_CMD_QUERY_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 1fc022362fbe49..fe7bd2c2e646fd 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1132,31 +1132,18 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dma_set_max_seg_size(&dev->dev, UINT_MAX); dma_set_coherent_mask(&dev->dev, dma_get_required_mask(&dev->dev)); - dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) - | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) - | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) + dev->uverbs_cmd_mask |= + BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) - | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index ca8bc72968672f..4a324b91104e09 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -346,23 +346,9 @@ static struct siw_device *siw_device_create(struct net_device *netdev) addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, addr); } - base_dev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + base_dev->uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | From 44ce37bc8bf30283d16c5e5f20964b638bebd429 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:04 -0300 Subject: [PATCH 005/147] RDMA: Move more uverbs_cmd_mask settings to the core These functions all depend on the driver providing a specific op: - REREG_MR is rereg_user_mr(). bnxt_re set this without providing the op - ATTACH/DEATCH_MCAST is attach_mcast()/detach_mcast(). usnic set this without providing the op - OPEN_QP doesn't involve the driver but requires a XRCD. qedr provides xrcd but forgot to set it, usnic doesn't provide XRCD but set it anyhow. - OPEN/CLOSE_XRCD are the ops alloc_xrcd()/dealloc_xrcd() - CREATE_SRQ/DESTROY_SRQ are the ops create_srq()/destroy_srq() - QUERY/MODIFY_SRQ is op query_srq()/modify_srq(). hns sets this but sometimes supplies a NULL op. - RESIZE_CQ is op resize_cq(). bnxt_re sets this boes doesn't supply an op - ALLOC/DEALLOC_MW is alloc_mw()/dealloc_mw(). cxgb4 provided an (now deleted) implementation but no userspace All drivers were checked that no drivers provide the op without also setting uverbs_cmd_mask so this should have no functional change. Link: https://lore.kernel.org/r/4-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 16 ++++++++++++- drivers/infiniband/core/uverbs_cmd.c | 8 +++---- drivers/infiniband/hw/bnxt_re/main.c | 6 ----- drivers/infiniband/hw/cxgb4/provider.c | 5 +--- drivers/infiniband/hw/hns/hns_roce_main.c | 14 ++--------- drivers/infiniband/hw/mlx4/main.c | 20 ++-------------- drivers/infiniband/hw/mlx5/main.c | 23 +++---------------- drivers/infiniband/hw/mthca/mthca_provider.c | 10 -------- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 5 ---- drivers/infiniband/hw/qedr/main.c | 8 +------ drivers/infiniband/hw/usnic/usnic_ib_main.c | 5 ---- .../infiniband/hw/vmw_pvrdma/pvrdma_main.c | 4 ---- drivers/infiniband/sw/rdmavt/vt.c | 7 ------ drivers/infiniband/sw/rxe/rxe_verbs.c | 9 +------- drivers/infiniband/sw/siw/siw_main.c | 6 +---- 15 files changed, 30 insertions(+), 116 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f99548de881fc1..9e925369215c45 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -284,6 +284,7 @@ static void ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(poll_cq), IB_MANDATORY_FUNC(req_notify_cq), IB_MANDATORY_FUNC(get_dma_mr), + IB_MANDATORY_FUNC(reg_user_mr), IB_MANDATORY_FUNC(dereg_mr), IB_MANDATORY_FUNC(get_port_immutable) }; @@ -601,20 +602,33 @@ struct ib_device *_ib_alloc_device(size_t size) INIT_WORK(&device->unregistration_work, ib_unregister_work); device->uverbs_cmd_mask = + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | + BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | + BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) | BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) | + BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) | - BIT_ULL(IB_USER_VERBS_CMD_REG_MR); + BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); device->uverbs_ex_cmd_mask = BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_FLOW) | diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2f3f9b87922e92..f85a6117577296 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3999,8 +3999,7 @@ const struct uapi_definition uverbs_def_write_intf[] = { DECLARE_UVERBS_WRITE( IB_USER_VERBS_CMD_CLOSE_XRCD, ib_uverbs_close_xrcd, - UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd), - UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)), + UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd)), DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP, ib_uverbs_open_qp, UAPI_DEF_WRITE_UDATA_IO( @@ -4010,8 +4009,9 @@ const struct uapi_definition uverbs_def_write_intf[] = { ib_uverbs_open_xrcd, UAPI_DEF_WRITE_UDATA_IO( struct ib_uverbs_open_xrcd, - struct ib_uverbs_open_xrcd_resp), - UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))), + struct ib_uverbs_open_xrcd_resp)), + UAPI_DEF_OBJ_NEEDS_FN(alloc_xrcd), + UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)), {}, }; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 46dac956eead47..47ebbba4a207e6 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -703,12 +703,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) /* User space */ ibdev->uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | (1ull << IB_USER_VERBS_CMD_QUERY_AH) | diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 3e6ec35f748349..627967dc4fbd00 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -535,10 +535,7 @@ void c4iw_register_device(struct work_struct *work) (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + (1ull << IB_USER_VERBS_CMD_POST_RECV); dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index dc64582a2022f8..a56de14626afcd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -492,18 +492,12 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { - ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); - } /* MW */ - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { - ib_dev->uverbs_cmd_mask |= - (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops); - } /* FRMR */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) @@ -512,10 +506,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) /* SRQ */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { ib_dev->uverbs_cmd_mask |= - (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV); ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops); ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index ad064a23dd2fd7..63007ece56b416 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2658,16 +2658,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); + (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); ibdev->ib_dev.uverbs_ex_cmd_mask |= @@ -2682,17 +2673,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || - dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { - ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops); - } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { - ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops); } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 656fffb09ac1b4..b17c2c8758f262 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4142,16 +4142,7 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); + (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ); dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | @@ -4167,19 +4158,11 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); - if (MLX5_CAP_GEN(mdev, imaicl)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + if (MLX5_CAP_GEN(mdev, imaicl)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); - } - if (MLX5_CAP_GEN(mdev, xrc)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + if (MLX5_CAP_GEN(mdev, xrc)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); - } if (MLX5_CAP_DEV_MEM(mdev, memic) || MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 86840afcfba63a..ea4a8deabfb6ff 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1158,22 +1158,12 @@ int mthca_register_device(struct mthca_dev *dev) if (ret) return ret; - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; if (dev->mthca_flags & MTHCA_FLAG_SRQ) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); - if (mthca_is_memfree(dev)) ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_srq_ops); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index e6611a7e8eeb26..b5c8b5a9a0939f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -205,7 +205,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(RESIZE_CQ) | OCRDMA_UVERBS(REQ_NOTIFY_CQ) | OCRDMA_UVERBS(POLL_CQ) | OCRDMA_UVERBS(POST_SEND) | @@ -228,10 +227,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(CREATE_SRQ) | - OCRDMA_UVERBS(MODIFY_SRQ) | - OCRDMA_UVERBS(QUERY_SRQ) | - OCRDMA_UVERBS(DESTROY_SRQ) | OCRDMA_UVERBS(POST_SRQ_RECV); ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 02058f1516122f..a092e73eb39b62 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -189,8 +189,7 @@ static void qedr_roce_register_device(struct qedr_dev *dev) ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); - dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(OPEN_XRCD) | - QEDR_UVERBS(CLOSE_XRCD) | + dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(CREATE_XSRQ); } @@ -250,12 +249,7 @@ static int qedr_register_device(struct qedr_dev *dev) memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); dev->ibdev.uverbs_cmd_mask |= - QEDR_UVERBS(RESIZE_CQ) | QEDR_UVERBS(REQ_NOTIFY_CQ) | - QEDR_UVERBS(CREATE_SRQ) | - QEDR_UVERBS(DESTROY_SRQ) | - QEDR_UVERBS(QUERY_SRQ) | - QEDR_UVERBS(MODIFY_SRQ) | QEDR_UVERBS(POST_SRQ_RECV) | QEDR_UVERBS(POLL_CQ) | QEDR_UVERBS(POST_SEND) | diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 7db5c1c7d06d2e..1b63a491fa72d5 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -398,11 +398,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; us_ibdev->ib_dev.dev.parent = &dev->dev; - us_ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops); rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index bfcffbf2185726..798e9a4ddd1c07 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -236,10 +236,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) /* Check if SRQ is supported by backend */ if (dev->dsr->caps.max_srq) { dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index ef0d131e8f0688..77afd06166fc2b 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -598,17 +598,10 @@ int rvt_register_device(struct rvt_dev_info *rdi) (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | (1ull << IB_USER_VERBS_CMD_QUERY_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); rdi->ibdev.node_type = RDMA_NODE_IB_CA; if (!rdi->ibdev.num_comp_vectors) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index fe7bd2c2e646fd..02b7f92ac87844 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1133,14 +1133,9 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dma_set_coherent_mask(&dev->dev, dma_get_required_mask(&dev->dev)); dev->uverbs_cmd_mask |= - BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) + BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) @@ -1148,8 +1143,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) - | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) ; ib_set_device_ops(dev, &rxe_dev_ops); diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 4a324b91104e09..2efe83a2f18bfb 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -351,11 +351,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); base_dev->node_type = RDMA_NODE_RNIC; memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON, From 652caba5b5501258a46d4b9279e0dbb6e5d42d75 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:05 -0300 Subject: [PATCH 006/147] RDMA: Check srq_type during create_srq uverbs was blocking srq_types the driver doesn't support based on the CREATE_XSRQ cmd_mask. Fix all drivers to check for supported srq_types during create_srq and move CREATE_XSRQ to the core code. Link: https://lore.kernel.org/r/5-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/hw/cxgb4/qp.c | 3 +++ drivers/infiniband/hw/hns/hns_roce_srq.c | 4 ++++ drivers/infiniband/hw/mlx4/main.c | 3 --- drivers/infiniband/hw/mlx4/srq.c | 4 ++++ drivers/infiniband/hw/mlx5/main.c | 3 +-- drivers/infiniband/hw/mlx5/srq.c | 5 +++++ drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 3 +++ drivers/infiniband/hw/qedr/main.c | 3 --- drivers/infiniband/hw/qedr/verbs.c | 4 ++++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 3 +++ drivers/infiniband/sw/siw/siw_verbs.c | 3 +++ 13 files changed, 32 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 9e925369215c45..efcadbda44095c 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -610,6 +610,7 @@ struct ib_device *_ib_alloc_device(size_t size) BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index f20379e4e2ec22..d2b46c5c1645e4 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2680,6 +2680,9 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, int ret; int wr_len; + if (attrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + pr_debug("%s ib_pd %p\n", __func__, pd); php = to_c4iw_pd(pd); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 8caf74e44efd96..27646b9e35dfd2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -288,6 +288,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, int ret; u32 cqn; + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + /* Check the actual SRQ wqe and SRQ sge num */ if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 63007ece56b416..355c8268d195a1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2657,9 +2657,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; - ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ); - ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index bf618529e734d2..6a381751c0d8ee 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -86,6 +86,10 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, int err; int i; + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b17c2c8758f262..ab469bc835dcb9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4141,8 +4141,7 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ); + (1ull << IB_USER_VERBS_CMD_DESTROY_AH); dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index e2f720eec1e18b..12d485872e7716 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -226,6 +226,11 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC && + init_attr->srq_type != IB_SRQT_TM) + return -EOPNOTSUPP; + /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= max_srq_wqes) { mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 7350fe16f164d3..b392e15d7592b6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1770,6 +1770,9 @@ int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq); + if (init_attr->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (init_attr->attr.max_sge > dev->attr.max_recv_sge) return -EINVAL; if (init_attr->attr.max_wr > dev->attr.max_rqe) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index a092e73eb39b62..f3a5946fe93e3b 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -188,9 +188,6 @@ static void qedr_roce_register_device(struct qedr_dev *dev) dev->ibdev.node_type = RDMA_NODE_IB_CA; ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); - - dev->ibdev.uverbs_cmd_mask |= - QEDR_UVERBS(CREATE_XSRQ); } static const struct ib_device_ops qedr_dev_ops = { diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 019642ff24a704..29a96ff6fc66b6 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1546,6 +1546,10 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, "create SRQ called from %s (pd %p)\n", (udata) ? "User lib" : "kernel", pd); + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + rc = qedr_check_srq_params(dev, init_attr, udata); if (rc) return -EINVAL; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 082208f9aa9006..bdc2703532c6cc 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -121,7 +121,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, dev_warn(&dev->pdev->dev, "shared receive queue type %d not supported\n", init_attr->srq_type); - return -EINVAL; + return -EOPNOTSUPP; } if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr || diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 02b7f92ac87844..d40ebb2e0fce6c 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -265,6 +265,9 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_create_srq_resp __user *uresp = NULL; + if (init->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (udata) { if (udata->outlen < sizeof(*uresp)) return -EINVAL; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 7cf3242ffb41f9..1c469f967ab9b2 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -1555,6 +1555,9 @@ int siw_create_srq(struct ib_srq *base_srq, base_ucontext); int rv; + if (init_attrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) { siw_dbg_pd(base_srq->pd, "too many SRQ's\n"); rv = -ENOMEM; From 26e990badde40b2fb824bfa3cb9d4288a79584bc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:06 -0300 Subject: [PATCH 007/147] RDMA: Check attr_mask during modify_qp Each driver should check that it can support the provided attr_mask during modify_qp. IB_USER_VERBS_EX_CMD_MODIFY_QP was being used to block modify_qp_ex because the driver didn't check RATE_LIMIT. Link: https://lore.kernel.org/r/6-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/uverbs_cmd.c | 8 ++------ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 3 +++ drivers/infiniband/hw/cxgb4/qp.c | 3 +++ drivers/infiniband/hw/efa/efa_verbs.c | 3 +++ drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 +++ drivers/infiniband/hw/i40iw/i40iw_verbs.c | 3 +++ drivers/infiniband/hw/mlx4/qp.c | 3 +++ drivers/infiniband/hw/mlx5/main.c | 3 +-- drivers/infiniband/hw/mlx5/qp.c | 3 +++ drivers/infiniband/hw/mthca/mthca_qp.c | 3 +++ drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 3 +++ drivers/infiniband/hw/qedr/verbs.c | 3 +++ drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 3 +++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 3 +++ drivers/infiniband/sw/rdmavt/qp.c | 3 +++ drivers/infiniband/sw/rxe/rxe_verbs.c | 3 +++ drivers/infiniband/sw/siw/siw_verbs.c | 3 +++ include/rdma/ib_verbs.h | 2 ++ include/uapi/rdma/ib_user_verbs.h | 14 -------------- 21 files changed, 53 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index efcadbda44095c..6d260357177146 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -639,6 +639,7 @@ struct ib_device *_ib_alloc_device(size_t size) BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL) | BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_WQ) | BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_CQ) | + BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_QP) | BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_WQ) | BIT_ULL(IB_USER_VERBS_EX_CMD_QUERY_DEVICE); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index f85a6117577296..54c3eb463da85d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1906,8 +1906,7 @@ static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs) if (ret) return ret; - if (cmd.base.attr_mask & - ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) + if (cmd.base.attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; return modify_qp(attrs, &cmd); @@ -1929,10 +1928,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs) * Last bit is reserved for extending the attr_mask by * using another field. */ - BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31)); - - if (cmd.base.attr_mask & - ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) + if (cmd.base.attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) return -EOPNOTSUPP; ret = modify_qp(attrs, &cmd); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index f9c999d5ba28ef..f3ec6d3fba6259 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1829,6 +1829,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, unsigned int flags; u8 nw_type; + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp->qplib_qp.modify_flags = 0; if (qp_attr_mask & IB_QP_STATE) { curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index d2b46c5c1645e4..79e69d449b0748 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2374,6 +2374,9 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, pr_debug("ib_qp %p\n", ibqp); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* iwarp does not support the RTR state */ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) attr_mask &= ~IB_QP_STATE; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 191e0843f090c8..e3d9a5a5f4d992 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -917,6 +917,9 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, enum ib_qp_state new_state; int err; + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + if (udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen)) { ibdev_dbg(&dev->ibdev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index b3d5ba8ef439a3..f18380f827dd87 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3256,6 +3256,8 @@ static int hns_roce_v1_modify_qp(struct ib_qp *ibqp, enum ib_qp_state cur_state, enum ib_qp_state new_state) { + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 6d30850696c518..a0b679254a8e56 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4757,6 +4757,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, unsigned long rq_flag = 0; int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* * In v2 engine, software pass context and context mask to hardware * when modifying qp. If software need modify some fields in context, diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 76f7d8bb3b6059..acc5e945d30369 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -855,6 +855,9 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, u32 err; unsigned long flags; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + memset(&info, 0, sizeof(info)); ctx_info = &iwqp->ctx_info; iwarp_info = &iwqp->iwarp_info; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 5cb8e602294ca9..8834629615bc6d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2787,6 +2787,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx4_ib_qp *mqp = to_mqp(ibqp); int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ab469bc835dcb9..b9a12a1d1c5c92 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4144,8 +4144,7 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_AH); dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 600e056798c0a1..19361132336cb9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4247,6 +4247,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int err = -EINVAL; int port; + if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) + return -EOPNOTSUPP; + if (ibqp->rwq_ind_tbl) return -ENOSYS; diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 08a2a7afafd3d2..07cfc0934b17d5 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -863,6 +863,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, new_state; int err = -EINVAL; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + mutex_lock(&qp->mutex); if (attr_mask & IB_QP_CUR_STATE) { cur_state = attr->cur_qp_state; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index b392e15d7592b6..244dd22d53efa7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1391,6 +1391,9 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct ocrdma_dev *dev; enum ib_qp_state old_qps, new_qps; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp = get_ocrdma_qp(ibqp); dev = get_ocrdma_dev(ibqp->device); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 29a96ff6fc66b6..34c07a18c2c218 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2472,6 +2472,9 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask, attr->qp_state); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + old_qp_state = qedr_get_ibqp_state(qp->state); if (attr_mask & IB_QP_STATE) new_qp_state = attr->qp_state; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 9e961f8ffa10de..a89d5816685af6 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -557,6 +557,9 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int status; usnic_dbg("\n"); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp_grp = to_uqp_grp(ibqp); mutex_lock(&qp_grp->vf->pf->usdev_lock); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 428256c5506571..9fdec5b9553c4e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -544,6 +544,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_state cur_state, next_state; int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* Sanity checking. Should need lock here */ mutex_lock(&qp->mutex); cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state : diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ee48befc897861..7b93e7bb0072a2 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1469,6 +1469,9 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int pmtu = 0; /* for gcc warning only */ int opa_ah; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index d40ebb2e0fce6c..dafcc0329148ef 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -436,6 +436,9 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct rxe_dev *rxe = to_rdev(ibqp->device); struct rxe_qp *qp = to_rqp(ibqp); + if (mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + err = rxe_qp_chk_attr(rxe, qp, attr, mask); if (err) goto err1; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 1c469f967ab9b2..947b8b1cbe9af6 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -544,6 +544,9 @@ int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr, if (!attr_mask) return 0; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + memset(&new_attrs, 0, sizeof(new_attrs)); if (attr_mask & IB_QP_ACCESS_FLAGS) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9bf6c319a670e2..0f9ce27bedcb5d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1234,6 +1234,8 @@ enum ib_qp_attr_mask { IB_QP_RESERVED3 = (1<<23), IB_QP_RESERVED4 = (1<<24), IB_QP_RATE_LIMIT = (1<<25), + + IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0), }; enum ib_qp_state { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 456438c18c2c35..7ee73a0652f1af 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -596,20 +596,6 @@ enum { IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, }; -enum { - /* - * This value is equal to IB_QP_DEST_QPN. - */ - IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20, -}; - -enum { - /* - * This value is equal to IB_QP_RATE_LIMIT. - */ - IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25, -}; - struct ib_uverbs_ex_create_qp { __aligned_u64 user_handle; __u32 pd_handle; From 1c407cb5d70568a57a32a0e6f5aee27c5083ef37 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:07 -0300 Subject: [PATCH 008/147] RDMA: Check flags during create_cq Each driver should check that the CQ attrs is supported. Unfortuantely when flags was added to the CQ attrs the drivers were not updated, uverbs_ex_cmd_mask was used to block it. This was missed when create CQ was converted to ioctl, so non-zero flags could have been passed into drivers. Check that flags is zero in all drivers that don't use it, remove IB_USER_VERBS_EX_CMD_CREATE_CQ from uverbs_ex_cmd_mask. Fixes: 41b2a71fc848 ("IB/uverbs: Move ioctl path of create_cq and destroy_cq to a new file") Link: https://lore.kernel.org/r/7-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/hw/bnxt_re/ib_verbs.c | 3 +++ drivers/infiniband/hw/cxgb4/cq.c | 2 +- drivers/infiniband/hw/efa/efa_verbs.c | 3 +++ drivers/infiniband/hw/hns/hns_roce_cq.c | 3 +++ drivers/infiniband/hw/i40iw/i40iw_verbs.c | 3 +++ drivers/infiniband/hw/mlx4/main.c | 1 - drivers/infiniband/hw/mlx5/main.c | 1 - drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- drivers/infiniband/hw/qedr/verbs.c | 3 +++ drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 3 +++ drivers/infiniband/sw/rdmavt/cq.c | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +- drivers/infiniband/sw/siw/siw_verbs.c | 3 +++ 16 files changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 6d260357177146..64129646f6a667 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -632,6 +632,7 @@ struct ib_device *_ib_alloc_device(size_t size) BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); device->uverbs_ex_cmd_mask = + BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_CQ) | BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_FLOW) | BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_WQ) | diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index f3ec6d3fba6259..edc47093c63bbd 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2831,6 +2831,9 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct bnxt_qplib_nq *nq = NULL; unsigned int nq_alloc_cnt; + if (attr->flags) + return -EOPNOTSUPP; + /* Validate CQ fields */ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded"); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 28349ed5088540..2cb65be24770f1 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -1006,7 +1006,7 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, pr_debug("ib_dev %p entries %d\n", ibdev, entries); if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (vector >= rhp->rdev.lldi.nciq) return -EINVAL; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index e3d9a5a5f4d992..2fe5708b2d9d8c 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1032,6 +1032,9 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, ibdev_dbg(ibdev, "create_cq entries %d\n", entries); + if (attr->flags) + return -EOPNOTSUPP; + if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { ibdev_dbg(ibdev, "cq: requested entries[%u] non-positive or greater than max[%u]\n", diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 809b22aa5056c4..68f355fba425b5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -251,6 +251,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, u32 cq_entries = attr->cqe; int ret; + if (attr->flags) + return -EOPNOTSUPP; + if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n", cq_entries, hr_dev->caps.max_cqes); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index acc5e945d30369..f3eb60ba078db9 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1107,6 +1107,9 @@ static int i40iw_create_cq(struct ib_cq *ibcq, int err_code; int entries = attr->cqe; + if (attr->flags) + return -EOPNOTSUPP; + if (iwdev->closing) return -ENODEV; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 355c8268d195a1..c2c66b958b836a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2659,7 +2659,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b9a12a1d1c5c92..261cc669aaf1fb 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4143,7 +4143,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH); dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index ea4a8deabfb6ff..e37c965bb030b6 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -612,7 +612,7 @@ static int mthca_create_cq(struct ib_cq *ibcq, udata, struct mthca_ucontext, ibucontext); if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return -EINVAL; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 244dd22d53efa7..29ec0a808a19d8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -974,7 +974,7 @@ int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ocrdma_create_cq_ureq ureq; if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (udata) { if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 34c07a18c2c218..80373170ce51f5 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -928,6 +928,9 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, "create_cq: called from %s. entries=%d, vector=%d\n", udata ? "User Lib" : "Kernel", entries, vector); + if (attr->flags) + return -EOPNOTSUPP; + if (entries > QEDR_MAX_CQES) { DP_ERR(dev, "create cq: the number of entries %d is too high. Must be equal or below %d.\n", diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index a89d5816685af6..d6708d1db73af2 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -584,7 +584,7 @@ int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; return 0; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 319546a39a0d5c..a119ac3e103c8d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -119,6 +119,9 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); + if (attr->flags) + return -EOPNOTSUPP; + entries = roundup_pow_of_two(entries); if (entries < 1 || entries > dev->dsr->caps.max_cqe) return -EINVAL; diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 19248be1409335..20cc0799ac4bc4 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -211,7 +211,7 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, int err; if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (entries < 1 || entries > rdi->dparms.props.max_cqe) return -EINVAL; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index dafcc0329148ef..7d32cb246d9ed2 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -771,7 +771,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector); if (err) diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 947b8b1cbe9af6..bcea5c5ace2b6b 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -1097,6 +1097,9 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct siw_cq *cq = to_siw_cq(base_cq); int rv, size = attr->cqe; + if (attr->flags) + return -EOPNOTSUPP; + if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) { siw_dbg(base_cq->device, "too many CQ's\n"); rv = -ENOMEM; From 1f11a7610e50982150b90b31d1f749f6217fbde6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:08 -0300 Subject: [PATCH 009/147] RDMA: Check create_flags during create_qp Each driver should check that the QP attrs create_flags is supported. Unfortuantely when create_flags was added to the QP attrs the drivers were not updated. uverbs_ex_cmd_mask was used to block it - even though kernel drivers use these flags too. Check that flags is zero in all drivers that don't use it, remove IB_USER_VERBS_EX_CMD_CREATE_QP from uverbs_ex_cmd_mask. Fix the error code to be EOPNOTSUPP. Link: https://lore.kernel.org/r/8-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 +++- drivers/infiniband/hw/cxgb4/qp.c | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 14 +++----------- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- drivers/infiniband/hw/mlx4/main.c | 2 -- drivers/infiniband/hw/mlx4/qp.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 2 -- drivers/infiniband/hw/mlx5/qp.c | 7 ++++--- drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 3 +++ drivers/infiniband/hw/qedr/verbs.c | 3 +++ drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 2 +- drivers/infiniband/sw/rdmavt/qp.c | 7 ++++--- drivers/infiniband/sw/rxe/rxe_verbs.c | 3 +++ drivers/infiniband/sw/siw/siw_verbs.c | 3 +++ 17 files changed, 33 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 64129646f6a667..e5f5d656951df1 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -634,6 +634,7 @@ struct ib_device *_ib_alloc_device(size_t size) device->uverbs_ex_cmd_mask = BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_CQ) | BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_QP) | BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_WQ) | BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index edc47093c63bbd..25a6c8fff91fe6 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1271,10 +1271,12 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, } qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */ - if (init_attr->create_flags) + if (init_attr->create_flags) { ibdev_dbg(&rdev->ibdev, "QP create flags 0x%x not supported", init_attr->create_flags); + return -EOPNOTSUPP; + } /* Setup CQs */ if (init_attr->send_cq) { diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 79e69d449b0748..a7401398cb344a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2126,7 +2126,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, pr_debug("ib_pd %p\n", pd); - if (attrs->qp_type != IB_QPT_RC) + if (attrs->qp_type != IB_QPT_RC || attrs->create_flags) return ERR_PTR(-EOPNOTSUPP); php = to_c4iw_pd(pd); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6c081dd985fc94..04a70681bea1bd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -869,17 +869,6 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (ret) ibdev_err(ibdev, "Failed to set user SQ size\n"); } else { - if (init_attr->create_flags & - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - ibdev_err(ibdev, "Failed to check multicast loopback\n"); - return -EINVAL; - } - - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { - ibdev_err(ibdev, "Failed to check ipoib ud lso\n"); - return -EINVAL; - } - ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) ibdev_err(ibdev, "Failed to set kernel SQ size\n"); @@ -906,6 +895,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->state = IB_QPS_RESET; hr_qp->flush_flag = 0; + if (init_attr->create_flags) + return -EOPNOTSUPP; + ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { ibdev_err(ibdev, "Failed to set QP param\n"); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index f3eb60ba078db9..db81f39f89b856 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -556,7 +556,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(-ENODEV); if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE) init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c2c66b958b836a..84d8fcbb85ef85 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2658,8 +2658,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8834629615bc6d..47b9ed5599b396 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1493,7 +1493,7 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | MLX4_IB_QP_CREATE_ROCE_V2_GSI)) - return -EINVAL; + return -EOPNOTSUPP; if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { if (init_attr->qp_type != IB_QPT_UD) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 261cc669aaf1fb..daa1de6497e76f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4142,8 +4142,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 19361132336cb9..251421dd6f1d5e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2712,11 +2712,12 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, true, qp); - if (create_flags) + if (create_flags) { mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n", create_flags); - - return (create_flags) ? -EINVAL : 0; + return -EOPNOTSUPP; + } + return 0; } static int process_udata_size(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index e37c965bb030b6..5172253bcfa05f 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -470,7 +470,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, int err; if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); switch (init_attr->qp_type) { case IB_QPT_RC: diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 29ec0a808a19d8..bc98bd950d99fa 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1299,6 +1299,9 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, struct ocrdma_create_qp_ureq ureq; u16 dpp_credit_lmt, dpp_offset; + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + status = ocrdma_check_qp_params(ibpd, dev, attrs, udata); if (status) goto gen_err; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 80373170ce51f5..2e85bb5104cb68 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2239,6 +2239,9 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, struct ib_qp *ibqp; int rc = 0; + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + if (attrs->qp_type == IB_QPT_XRC_TGT) { xrcd = get_qedr_xrcd(attrs->xrcd); dev = get_qedr_dev(xrcd->ibxrcd.device); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index d6708d1db73af2..38a37770c01627 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -474,7 +474,7 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, us_ibdev = to_usdev(pd->device); if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); if (err) { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 9fdec5b9553c4e..1d3bdd7bb51d96 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -209,7 +209,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, dev_warn(&dev->pdev->dev, "invalid create queuepair flags %#x\n", init_attr->create_flags); - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } if (init_attr->qp_type != IB_QPT_RC && diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 7b93e7bb0072a2..e9db6bf106186f 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1083,10 +1083,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (!rdi) return ERR_PTR(-EINVAL); + if (init_attr->create_flags & ~IB_QP_CREATE_NETDEV_USE) + return ERR_PTR(-EOPNOTSUPP); + if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || - init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - (init_attr->create_flags && - init_attr->create_flags != IB_QP_CREATE_NETDEV_USE)) + init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr) return ERR_PTR(-EINVAL); /* Check receive queue parameters if no SRQ is specified. */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 7d32cb246d9ed2..21d2ab3956a4a9 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -395,6 +395,9 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, uresp = udata->outbuf; } + if (init->create_flags) + return ERR_PTR(-EOPNOTSUPP); + err = rxe_qp_chk_init(rxe, init); if (err) goto err1; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index bcea5c5ace2b6b..68fd053fc7748a 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -307,6 +307,9 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, siw_dbg(base_dev, "create new QP\n"); + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { siw_dbg(base_dev, "too many QP's\n"); rv = -ENOMEM; From bd2a40cc2463766ed1a55d94a4ccbdcd621da323 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:09 -0300 Subject: [PATCH 010/147] RDMA/core Remove uverbs_ex_cmd_mask No driver sets it, and the core code sets a maximum mask, simply remove it. Disabled operations are now handled either by having a NULL ops pointer, or by having the common driver callbacks check for unsupported extended attributes. Link: https://lore.kernel.org/r/9-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 15 --------------- drivers/infiniband/core/uverbs_uapi.c | 5 +---- include/rdma/ib_verbs.h | 1 - 3 files changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e5f5d656951df1..939664765536c6 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -630,21 +630,6 @@ struct ib_device *_ib_alloc_device(size_t size) BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); - - device->uverbs_ex_cmd_mask = - BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_CQ) | - BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_QP) | - BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_WQ) | - BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | - BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL) | - BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_QP) | - BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - BIT_ULL(IB_USER_VERBS_EX_CMD_QUERY_DEVICE); - return device; } EXPORT_SYMBOL(_ib_alloc_device); diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 5addc8fae3f3bd..62f5bcb712cf17 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -79,10 +79,7 @@ static int uapi_create_write(struct uverbs_api *uapi, method_elm->is_ex = def->write.is_ex; method_elm->handler = def->func_write; - if (def->write.is_ex) - method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask & - BIT_ULL(def->write.command_num)); - else + if (!def->write.is_ex) method_elm->disabled = !(ibdev->uverbs_cmd_mask & BIT_ULL(def->write.command_num)); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0f9ce27bedcb5d..c7c6b06171e934 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2667,7 +2667,6 @@ struct ib_device { const struct attribute_group *groups[3]; u64 uverbs_cmd_mask; - u64 uverbs_ex_cmd_mask; char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; From 628c02bf38aa42c09c3dde61284ba348290e6650 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:10 -0300 Subject: [PATCH 011/147] RDMA: Remove uverbs cmds from drivers that don't use them Allowing userspace to invoke these commands is probably going to crash these drivers as they are not tested and not expecting to use them on a user object. For example pvrdma touches cq->ring_state which is not initialized for user QPs. These commands are effected: - IB_USER_VERBS_CMD_REQ_NOTIFY_CQ is ibv_cmd_req_notify_cq() in rdma-core, only hfi1, ipath and rxe calls it. - IB_USER_VERBS_CMD_POLL_CQ is ibv_cmd_poll_cq() in rdma-core, only ipath and hfi1 calls it. - IB_USER_VERBS_CMD_POST_SEND/RECV is ibv_cmd_post_send/recv() in rdma-core, only ipath and hfi1 call them. - IB_USER_VERBS_CMD_POST_SRQ_RECV is ibv_cmd_post_srq_recv() in rdma-core, only ipath and hfi1 calls it. - IB_USER_VERBS_CMD_PEEK_CQ isn't even implemented anywhere - IB_USER_VERBS_CMD_CREATE/DESTROY_AH is ibv_cmd_create/destroy_ah() in rdma-core, only bnxt_re, efa, hfi1, ipath, mlx5, orcrdma, and rxe call it. Link: https://lore.kernel.org/r/10-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/provider.c | 5 ----- drivers/infiniband/hw/hns/hns_roce_main.c | 2 -- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 7 ------- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 12 ++---------- drivers/infiniband/hw/qedr/main.c | 7 ------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 10 ---------- drivers/infiniband/sw/rxe/rxe_verbs.c | 7 +------ drivers/infiniband/sw/siw/siw_main.c | 6 ------ 8 files changed, 3 insertions(+), 53 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 627967dc4fbd00..8a3c0a0041a8e5 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -531,11 +531,6 @@ void c4iw_register_device(struct work_struct *work) if (fastreg_support) dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.local_dma_lkey = 0; - dev->ibdev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV); dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index a56de14626afcd..6d2c383c4d645f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -505,8 +505,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) /* SRQ */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { - ib_dev->uverbs_cmd_mask |= - (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV); ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops); ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index db81f39f89b856..85d40686588e6e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2690,13 +2690,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.node_type = RDMA_NODE_RNIC; ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr); - iwibdev->ibdev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_POST_SEND); iwibdev->ibdev.phys_port_cnt = 1; iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index b5c8b5a9a0939f..5e67cd66359aae 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -204,11 +204,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(REQ_NOTIFY_CQ) | - OCRDMA_UVERBS(POLL_CQ) | - OCRDMA_UVERBS(POST_SEND) | - OCRDMA_UVERBS(POST_RECV); dev->ibdev.uverbs_cmd_mask |= OCRDMA_UVERBS(CREATE_AH) | @@ -225,12 +220,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops); - if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(POST_SRQ_RECV); - + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); - } + rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1); if (ret) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index f3a5946fe93e3b..cc711c61d9b398 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -245,13 +245,6 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.node_guid = dev->attr.node_guid; memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask |= - QEDR_UVERBS(REQ_NOTIFY_CQ) | - QEDR_UVERBS(POST_SRQ_RECV) | - QEDR_UVERBS(POLL_CQ) | - QEDR_UVERBS(POST_SEND) | - QEDR_UVERBS(POST_RECV); - if (IS_IWARP(dev)) { rc = qedr_iw_register_device(dev); if (rc) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 798e9a4ddd1c07..a86b4f7c9c6e29 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -205,13 +205,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->flags = 0; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; @@ -235,9 +228,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) /* Check if SRQ is supported by backend */ if (dev->dsr->caps.max_srq) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 21d2ab3956a4a9..b3323c305a63f9 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1142,12 +1142,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dma_set_coherent_mask(&dev->dev, dma_get_required_mask(&dev->dev)); dev->uverbs_cmd_mask |= - BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) - | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) + BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 2efe83a2f18bfb..e49faefdee923d 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -346,12 +346,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, addr); } - base_dev->uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); base_dev->node_type = RDMA_NODE_RNIC; memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON, From 676a80adba0131e1603ef3de5f73a19a0d3d0e65 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:11 -0300 Subject: [PATCH 012/147] RDMA: Remove AH from uverbs_cmd_mask Drivers that need a uverbs AH should instead set the create_user_ah() op similar to reg_user_mr(). MODIFY_AH and QUERY_AH cmds were never implemented so are just deleted. Link: https://lore.kernel.org/r/11-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 3 +++ drivers/infiniband/core/uverbs_cmd.c | 8 ++++---- drivers/infiniband/core/verbs.c | 5 ++++- drivers/infiniband/hw/bnxt_re/main.c | 10 +--------- drivers/infiniband/hw/efa/efa_main.c | 5 +---- drivers/infiniband/hw/mlx5/main.c | 5 +---- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 7 +------ drivers/infiniband/sw/rdmavt/ah.c | 1 - drivers/infiniband/sw/rdmavt/vt.c | 5 +---- drivers/infiniband/sw/rxe/rxe_verbs.c | 9 ++------- include/rdma/ib_verbs.h | 2 ++ 11 files changed, 20 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 939664765536c6..ce26564d4edfed 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -606,6 +606,7 @@ struct ib_device *_ib_alloc_device(size_t size) BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | @@ -614,6 +615,7 @@ struct ib_device *_ib_alloc_device(size_t size) BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) | @@ -2606,6 +2608,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_qp); SET_DEVICE_OP(dev_ops, create_rwq_ind_table); SET_DEVICE_OP(dev_ops, create_srq); + SET_DEVICE_OP(dev_ops, create_user_ah); SET_DEVICE_OP(dev_ops, create_wq); SET_DEVICE_OP(dev_ops, dealloc_dm); SET_DEVICE_OP(dev_ops, dealloc_driver); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 54c3eb463da85d..72b209ca77c734 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3689,13 +3689,13 @@ const struct uapi_definition uverbs_def_write_intf[] = { ib_uverbs_create_ah, UAPI_DEF_WRITE_UDATA_IO( struct ib_uverbs_create_ah, - struct ib_uverbs_create_ah_resp), - UAPI_DEF_METHOD_NEEDS_FN(create_ah)), + struct ib_uverbs_create_ah_resp)), DECLARE_UVERBS_WRITE( IB_USER_VERBS_CMD_DESTROY_AH, ib_uverbs_destroy_ah, - UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah), - UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))), + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah)), + UAPI_DEF_OBJ_NEEDS_FN(create_user_ah), + UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)), DECLARE_UVERBS_OBJECT( UVERBS_OBJECT_COMP_CHANNEL, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 740f8454b6b461..cfcfa3ae32cf21 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -533,7 +533,10 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, init_attr.flags = flags; init_attr.xmit_slave = xmit_slave; - ret = device->ops.create_ah(ah, &init_attr, udata); + if (udata) + ret = device->ops.create_user_ah(ah, &init_attr, udata); + else + ret = device->ops.create_ah(ah, &init_attr, NULL); if (ret) { kfree(ah); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 47ebbba4a207e6..838744b6811b9d 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -646,6 +646,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .create_cq = bnxt_re_create_cq, .create_qp = bnxt_re_create_qp, .create_srq = bnxt_re_create_srq, + .create_user_ah = bnxt_re_create_ah, .dealloc_driver = bnxt_re_dealloc_driver, .dealloc_pd = bnxt_re_dealloc_pd, .dealloc_ucontext = bnxt_re_dealloc_ucontext, @@ -701,15 +702,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->dev.parent = &rdev->en_dev->pdev->dev; ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; - /* User space */ - ibdev->uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */ - - rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ib_set_device_ops(ibdev, &bnxt_re_dev_ops); ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 843b44a4887be1..2b3da85fb43c98 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -248,6 +248,7 @@ static const struct ib_device_ops efa_dev_ops = { .create_ah = efa_create_ah, .create_cq = efa_create_cq, .create_qp = efa_create_qp, + .create_user_ah = efa_create_ah, .dealloc_pd = efa_dealloc_pd, .dealloc_ucontext = efa_dealloc_ucontext, .dereg_mr = efa_dereg_mr, @@ -308,10 +309,6 @@ static int efa_ib_device_add(struct efa_dev *dev) dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &pdev->dev; - dev->ibdev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - ib_set_device_ops(&dev->ibdev, &efa_dev_ops); err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index daa1de6497e76f..e07ad30f0aca5f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4022,6 +4022,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .create_cq = mlx5_ib_create_cq, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, + .create_user_ah = mlx5_ib_create_ah, .dealloc_pd = mlx5_ib_dealloc_pd, .dealloc_ucontext = mlx5_ib_dealloc_ucontext, .del_gid = mlx5_ib_del_gid, @@ -4139,10 +4140,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; int err; - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) ib_set_device_ops(&dev->ib_dev, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 5e67cd66359aae..d12ec85ee22639 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -154,6 +154,7 @@ static const struct ib_device_ops ocrdma_dev_ops = { .create_ah = ocrdma_create_ah, .create_cq = ocrdma_create_cq, .create_qp = ocrdma_create_qp, + .create_user_ah = ocrdma_create_ah, .dealloc_pd = ocrdma_dealloc_pd, .dealloc_ucontext = ocrdma_dealloc_ucontext, .dereg_mr = ocrdma_dereg_mr, @@ -205,12 +206,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(CREATE_AH) | - OCRDMA_UVERBS(MODIFY_AH) | - OCRDMA_UVERBS(QUERY_AH) | - OCRDMA_UVERBS(DESTROY_AH); - dev->ibdev.node_type = RDMA_NODE_IB_CA; dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->eq_cnt; diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index b938c4ffa99aa3..f9754dcd250b77 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -129,7 +129,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, * rvt_destory_ah - Destory an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) - * * Return: 0 on success */ int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 77afd06166fc2b..43fbc4e54edfbf 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -384,6 +384,7 @@ static const struct ib_device_ops rvt_dev_ops = { .create_cq = rvt_create_cq, .create_qp = rvt_create_qp, .create_srq = rvt_create_srq, + .create_user_ah = rvt_create_ah, .dealloc_pd = rvt_dealloc_pd, .dealloc_ucontext = rvt_dealloc_ucontext, .dereg_mr = rvt_dereg_mr, @@ -594,10 +595,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) * version, so we do all of this sort of stuff here. */ rdi->ibdev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index b3323c305a63f9..c282b9a927128b 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1079,6 +1079,7 @@ static const struct ib_device_ops rxe_dev_ops = { .create_cq = rxe_create_cq, .create_qp = rxe_create_qp, .create_srq = rxe_create_srq, + .create_user_ah = rxe_create_ah, .dealloc_driver = rxe_dealloc, .dealloc_pd = rxe_dealloc_pd, .dealloc_ucontext = rxe_dealloc_ucontext, @@ -1141,13 +1142,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dma_set_max_seg_size(&dev->dev, UINT_MAX); dma_set_coherent_mask(&dev->dev, dma_get_required_mask(&dev->dev)); - dev->uverbs_cmd_mask |= - BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) - ; + dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); ib_set_device_ops(dev, &rxe_dev_ops); err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c7c6b06171e934..9420827d242188 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2403,6 +2403,8 @@ struct ib_device_ops { int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, struct ib_udata *udata); + int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah, u32 flags); From 1c7fd72687d619207e0f075dd1f1c749879d8021 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 7 Oct 2020 19:36:24 -0700 Subject: [PATCH 013/147] RDMA: Convert sysfs device * show functions to use sysfs_emit() Done with cocci script: @@ identifier d_show; identifier dev, attr, buf; @@ ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf) { <... return - sprintf(buf, + sysfs_emit(buf, ...); ...> } @@ identifier d_show; identifier dev, attr, buf; @@ ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf) { <... return - snprintf(buf, PAGE_SIZE, + sysfs_emit(buf, ...); ...> } @@ identifier d_show; identifier dev, attr, buf; @@ ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf) { <... return - scnprintf(buf, PAGE_SIZE, + sysfs_emit(buf, ...); ...> } @@ identifier d_show; identifier dev, attr, buf; expression chr; @@ ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf) { <... return - strcpy(buf, chr); + sysfs_emit(buf, chr); ...> } @@ identifier d_show; identifier dev, attr, buf; identifier len; @@ ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf) { <... len = - sprintf(buf, + sysfs_emit(buf, ...); ...> return len; } @@ identifier d_show; identifier dev, attr, buf; identifier len; @@ ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf) { <... len = - snprintf(buf, PAGE_SIZE, + sysfs_emit(buf, ...); ...> return len; } @@ identifier d_show; identifier dev, attr, buf; identifier len; @@ ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf) { <... len = - scnprintf(buf, PAGE_SIZE, + sysfs_emit(buf, ...); ...> return len; } @@ identifier d_show; identifier dev, attr, buf; identifier len; @@ ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf) { <... - len += scnprintf(buf + len, PAGE_SIZE - len, + len += sysfs_emit_at(buf, len, ...); ...> return len; } @@ identifier d_show; identifier dev, attr, buf; expression chr; @@ ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf) { ... - strcpy(buf, chr); - return strlen(buf); + return sysfs_emit(buf, chr); } Link: https://lore.kernel.org/r/7f406fa8e3aa2552c022bec680f621e38d1fe414.1602122879.git.joe@perches.com Signed-off-by: Joe Perches Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sysfs.c | 47 +++++++++++-------- drivers/infiniband/core/ucma.c | 2 +- drivers/infiniband/core/user_mad.c | 4 +- drivers/infiniband/core/uverbs_main.c | 4 +- drivers/infiniband/hw/bnxt_re/main.c | 4 +- drivers/infiniband/hw/cxgb4/provider.c | 11 +++-- drivers/infiniband/hw/hfi1/sysfs.c | 16 +++---- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 6 +-- drivers/infiniband/hw/mlx4/main.c | 7 ++- drivers/infiniband/hw/mlx4/sysfs.c | 30 ++++++------ drivers/infiniband/hw/mlx5/main.c | 12 ++--- drivers/infiniband/hw/mthca/mthca_provider.c | 14 +++--- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 4 +- drivers/infiniband/hw/qedr/main.c | 9 ++-- drivers/infiniband/hw/qib/qib_sysfs.c | 30 ++++++------ drivers/infiniband/hw/usnic/usnic_ib_sysfs.c | 15 +++--- .../infiniband/hw/vmw_pvrdma/pvrdma_main.c | 6 +-- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 +- drivers/infiniband/ulp/ipoib/ipoib_main.c | 7 +-- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 2 +- drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c | 18 ++++--- drivers/infiniband/ulp/srp/ib_srp.c | 40 ++++++++-------- 23 files changed, 152 insertions(+), 142 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 914cddea525df1..4e335fc4d016ee 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1230,14 +1230,22 @@ static ssize_t node_type_show(struct device *device, struct ib_device *dev = rdma_device_to_ibdev(device); switch (dev->node_type) { - case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); - case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); - case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type); - case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type); - case RDMA_NODE_UNSPECIFIED: return sprintf(buf, "%d: unspecified\n", dev->node_type); - case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); - case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); - default: return sprintf(buf, "%d: \n", dev->node_type); + case RDMA_NODE_IB_CA: + return sysfs_emit(buf, "%d: CA\n", dev->node_type); + case RDMA_NODE_RNIC: + return sysfs_emit(buf, "%d: RNIC\n", dev->node_type); + case RDMA_NODE_USNIC: + return sysfs_emit(buf, "%d: usNIC\n", dev->node_type); + case RDMA_NODE_USNIC_UDP: + return sysfs_emit(buf, "%d: usNIC UDP\n", dev->node_type); + case RDMA_NODE_UNSPECIFIED: + return sysfs_emit(buf, "%d: unspecified\n", dev->node_type); + case RDMA_NODE_IB_SWITCH: + return sysfs_emit(buf, "%d: switch\n", dev->node_type); + case RDMA_NODE_IB_ROUTER: + return sysfs_emit(buf, "%d: router\n", dev->node_type); + default: + return sysfs_emit(buf, "%d: \n", dev->node_type); } } static DEVICE_ATTR_RO(node_type); @@ -1247,11 +1255,12 @@ static ssize_t sys_image_guid_show(struct device *device, { struct ib_device *dev = rdma_device_to_ibdev(device); - return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3])); + return sysfs_emit( + buf, "%04x:%04x:%04x:%04x\n", + be16_to_cpu(((__be16 *)&dev->attrs.sys_image_guid)[0]), + be16_to_cpu(((__be16 *)&dev->attrs.sys_image_guid)[1]), + be16_to_cpu(((__be16 *)&dev->attrs.sys_image_guid)[2]), + be16_to_cpu(((__be16 *)&dev->attrs.sys_image_guid)[3])); } static DEVICE_ATTR_RO(sys_image_guid); @@ -1260,11 +1269,11 @@ static ssize_t node_guid_show(struct device *device, { struct ib_device *dev = rdma_device_to_ibdev(device); - return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) &dev->node_guid)[0]), - be16_to_cpu(((__be16 *) &dev->node_guid)[1]), - be16_to_cpu(((__be16 *) &dev->node_guid)[2]), - be16_to_cpu(((__be16 *) &dev->node_guid)[3])); + return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", + be16_to_cpu(((__be16 *)&dev->node_guid)[0]), + be16_to_cpu(((__be16 *)&dev->node_guid)[1]), + be16_to_cpu(((__be16 *)&dev->node_guid)[2]), + be16_to_cpu(((__be16 *)&dev->node_guid)[3])); } static DEVICE_ATTR_RO(node_guid); @@ -1273,7 +1282,7 @@ static ssize_t node_desc_show(struct device *device, { struct ib_device *dev = rdma_device_to_ibdev(device); - return sprintf(buf, "%.64s\n", dev->node_desc); + return sysfs_emit(buf, "%.64s\n", dev->node_desc); } static ssize_t node_desc_store(struct device *device, diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ffe2563ad34565..7dab9a27a145a8 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1825,7 +1825,7 @@ static ssize_t show_abi_version(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); + return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); } static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index b0d0b522cc7647..7e759f5b2a75c1 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1191,7 +1191,7 @@ static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr, if (!port) return -ENODEV; - return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev)); + return sysfs_emit(buf, "%s\n", dev_name(&port->ib_dev->dev)); } static DEVICE_ATTR_RO(ibdev); @@ -1203,7 +1203,7 @@ static ssize_t port_show(struct device *dev, struct device_attribute *attr, if (!port) return -ENODEV; - return sprintf(buf, "%d\n", port->port_num); + return sysfs_emit(buf, "%d\n", port->port_num); } static DEVICE_ATTR_RO(port); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 4bb7c642f80c4e..f173ecd102dcd6 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1046,7 +1046,7 @@ static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) - ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev)); + ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev)); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; @@ -1065,7 +1065,7 @@ static ssize_t abi_version_show(struct device *device, srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) - ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); + ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 838744b6811b9d..fdb8c247825855 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -608,7 +608,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); + return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor); } static DEVICE_ATTR_RO(hw_rev); @@ -618,7 +618,7 @@ static ssize_t hca_type_show(struct device *device, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); + return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc); } static DEVICE_ATTR_RO(hca_type); diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 8a3c0a0041a8e5..1f1f856f8715d1 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -322,8 +322,9 @@ static ssize_t hw_rev_show(struct device *dev, rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); pr_debug("dev 0x%p\n", dev); - return sprintf(buf, "%d\n", - CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); + return sysfs_emit( + buf, "%d\n", + CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); } static DEVICE_ATTR_RO(hw_rev); @@ -337,7 +338,7 @@ static ssize_t hca_type_show(struct device *dev, pr_debug("dev 0x%p\n", dev); lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.driver); + return sysfs_emit(buf, "%s\n", info.driver); } static DEVICE_ATTR_RO(hca_type); @@ -348,8 +349,8 @@ static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); pr_debug("dev 0x%p\n", dev); - return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, - c4iw_dev->rdev.lldi.pdev->device); + return sysfs_emit(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, + c4iw_dev->rdev.lldi.pdev->device); } static DEVICE_ATTR_RO(board_id); diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 074ec71772d2a5..e2a88f3cea7fcb 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -500,7 +500,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct hfi1_ibdev *dev = rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); } static DEVICE_ATTR_RO(hw_rev); @@ -515,7 +515,7 @@ static ssize_t board_id_show(struct device *device, if (!dd->boardname) ret = -EINVAL; else - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); + ret = sysfs_emit(buf, "%s\n", dd->boardname); return ret; } static DEVICE_ATTR_RO(board_id); @@ -528,7 +528,7 @@ static ssize_t boardversion_show(struct device *device, struct hfi1_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); + return sysfs_emit(buf, "%s", dd->boardversion); } static DEVICE_ATTR_RO(boardversion); @@ -545,9 +545,9 @@ static ssize_t nctxts_show(struct device *device, * and a receive context, so returning the smaller of the two counts * give a more accurate picture of total contexts available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", - min(dd->num_user_contexts, - (u32)dd->sc_sizes[SC_USER].count)); + return sysfs_emit(buf, "%u\n", + min(dd->num_user_contexts, + (u32)dd->sc_sizes[SC_USER].count)); } static DEVICE_ATTR_RO(nctxts); @@ -559,7 +559,7 @@ static ssize_t nfreectxts_show(struct device *device, struct hfi1_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); + return sysfs_emit(buf, "%u\n", dd->freectxts); } static DEVICE_ATTR_RO(nfreectxts); @@ -570,7 +570,7 @@ static ssize_t serial_show(struct device *device, rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); - return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); + return sysfs_emit(buf, "%s", dd->serial); } static DEVICE_ATTR_RO(serial); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 85d40686588e6e..5ad381800f4d9e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2062,7 +2062,7 @@ static ssize_t hw_rev_show(struct device *dev, rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev); u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev; - return sprintf(buf, "%x\n", hw_rev); + return sysfs_emit(buf, "%x\n", hw_rev); } static DEVICE_ATTR_RO(hw_rev); @@ -2072,7 +2072,7 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "I40IW\n"); + return sysfs_emit(buf, "I40IW\n"); } static DEVICE_ATTR_RO(hca_type); @@ -2082,7 +2082,7 @@ static DEVICE_ATTR_RO(hca_type); static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%.*s\n", 32, "I40IW Board ID"); + return sysfs_emit(buf, "%.*s\n", 32, "I40IW Board ID"); } static DEVICE_ATTR_RO(board_id); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 84d8fcbb85ef85..2ac932834f9891 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2024,7 +2024,7 @@ static ssize_t hca_type_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); + return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2033,7 +2033,7 @@ static ssize_t hw_rev_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->dev->rev_id); + return sysfs_emit(buf, "%x\n", dev->dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2043,8 +2043,7 @@ static ssize_t board_id_show(struct device *device, struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, - dev->dev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id); } static DEVICE_ATTR_RO(board_id); diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index ea1f3a081b05a7..22ae363088ed65 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -56,7 +56,7 @@ static ssize_t show_admin_alias_guid(struct device *dev, mlx4_ib_iov_dentry->entry_num, port->num); - return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); + return sysfs_emit(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); } /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. @@ -123,15 +123,15 @@ static ssize_t show_port_gid(struct device *dev, mlx4_ib_iov_dentry->entry_num, &gid, 1); if (ret) return ret; - ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) gid.raw)[0]), - be16_to_cpu(((__be16 *) gid.raw)[1]), - be16_to_cpu(((__be16 *) gid.raw)[2]), - be16_to_cpu(((__be16 *) gid.raw)[3]), - be16_to_cpu(((__be16 *) gid.raw)[4]), - be16_to_cpu(((__be16 *) gid.raw)[5]), - be16_to_cpu(((__be16 *) gid.raw)[6]), - be16_to_cpu(((__be16 *) gid.raw)[7])); + ret = sysfs_emit(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + be16_to_cpu(((__be16 *)gid.raw)[0]), + be16_to_cpu(((__be16 *)gid.raw)[1]), + be16_to_cpu(((__be16 *)gid.raw)[2]), + be16_to_cpu(((__be16 *)gid.raw)[3]), + be16_to_cpu(((__be16 *)gid.raw)[4]), + be16_to_cpu(((__be16 *)gid.raw)[5]), + be16_to_cpu(((__be16 *)gid.raw)[6]), + be16_to_cpu(((__be16 *)gid.raw)[7])); return ret; } @@ -151,7 +151,7 @@ static ssize_t show_phys_port_pkey(struct device *dev, if (ret) return ret; - return sprintf(buf, "0x%04x\n", pkey); + return sysfs_emit(buf, "0x%04x\n", pkey); } #define DENTRY_REMOVE(_dentry) \ @@ -545,9 +545,9 @@ static ssize_t sysfs_show_smi_enabled(struct device *dev, ssize_t len = 0; if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) - len = sprintf(buf, "%d\n", 1); + len = sysfs_emit(buf, "%d\n", 1); else - len = sprintf(buf, "%d\n", 0); + len = sysfs_emit(buf, "%d\n", 0); return len; } @@ -561,9 +561,9 @@ static ssize_t sysfs_show_enable_smi_admin(struct device *dev, ssize_t len = 0; if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) - len = sprintf(buf, "%d\n", 1); + len = sysfs_emit(buf, "%d\n", 1); else - len = sprintf(buf, "%d\n", 0); + len = sysfs_emit(buf, "%d\n", 0); return len; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e07ad30f0aca5f..92a419425a102f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2628,7 +2628,7 @@ static ssize_t fw_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); + return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages); } static DEVICE_ATTR_RO(fw_pages); @@ -2638,7 +2638,7 @@ static ssize_t reg_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); + return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } static DEVICE_ATTR_RO(reg_pages); @@ -2648,7 +2648,7 @@ static ssize_t hca_type_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); + return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2658,7 +2658,7 @@ static ssize_t hw_rev_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->mdev->rev_id); + return sysfs_emit(buf, "%x\n", dev->mdev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2668,8 +2668,8 @@ static ssize_t board_id_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, - dev->mdev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN, + dev->mdev->board_id); } static DEVICE_ATTR_RO(board_id); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 5172253bcfa05f..12179a41456dd7 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -961,7 +961,7 @@ static ssize_t hw_rev_show(struct device *device, struct mthca_dev *dev = rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - return sprintf(buf, "%x\n", dev->rev_id); + return sysfs_emit(buf, "%x\n", dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -973,16 +973,16 @@ static ssize_t hca_type_show(struct device *device, switch (dev->pdev->device) { case PCI_DEVICE_ID_MELLANOX_TAVOR: - return sprintf(buf, "MT23108\n"); + return sysfs_emit(buf, "MT23108\n"); case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT: - return sprintf(buf, "MT25208 (MT23108 compat mode)\n"); + return sysfs_emit(buf, "MT25208 (MT23108 compat mode)\n"); case PCI_DEVICE_ID_MELLANOX_ARBEL: - return sprintf(buf, "MT25208\n"); + return sysfs_emit(buf, "MT25208\n"); case PCI_DEVICE_ID_MELLANOX_SINAI: case PCI_DEVICE_ID_MELLANOX_SINAI_OLD: - return sprintf(buf, "MT25204\n"); + return sysfs_emit(buf, "MT25204\n"); default: - return sprintf(buf, "unknown\n"); + return sysfs_emit(buf, "unknown\n"); } } static DEVICE_ATTR_RO(hca_type); @@ -993,7 +993,7 @@ static ssize_t board_id_show(struct device *device, struct mthca_dev *dev = rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); + return sysfs_emit(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); } static DEVICE_ATTR_RO(board_id); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index d12ec85ee22639..9a834a9cca0e81 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -119,7 +119,7 @@ static ssize_t hw_rev_show(struct device *device, struct ocrdma_dev *dev = rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); + return sysfs_emit(buf, "0x%x\n", dev->nic_info.pdev->vendor); } static DEVICE_ATTR_RO(hw_rev); @@ -129,7 +129,7 @@ static ssize_t hca_type_show(struct device *device, struct ocrdma_dev *dev = rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); + return sysfs_emit(buf, "%s\n", &dev->model_number[0]); } static DEVICE_ATTR_RO(hca_type); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index cc711c61d9b398..e8b812bb3a7ace 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -124,7 +124,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->attr.hw_ver); + return sysfs_emit(buf, "0x%x\n", dev->attr.hw_ver); } static DEVICE_ATTR_RO(hw_rev); @@ -134,10 +134,9 @@ static ssize_t hca_type_show(struct device *device, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "FastLinQ QL%x %s\n", - dev->pdev->device, - rdma_protocol_iwarp(&dev->ibdev, 1) ? - "iWARP" : "RoCE"); + return sysfs_emit(buf, "FastLinQ QL%x %s\n", dev->pdev->device, + rdma_protocol_iwarp(&dev->ibdev, 1) ? "iWARP" : + "RoCE"); } static DEVICE_ATTR_RO(hca_type); diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 021df0654ba757..818282bc7c5bcb 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -565,7 +565,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); } static DEVICE_ATTR_RO(hw_rev); @@ -580,7 +580,7 @@ static ssize_t hca_type_show(struct device *device, if (!dd->boardname) ret = -EINVAL; else - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); + ret = sysfs_emit(buf, "%s\n", dd->boardname); return ret; } static DEVICE_ATTR_RO(hca_type); @@ -590,7 +590,7 @@ static ssize_t version_show(struct device *device, struct device_attribute *attr, char *buf) { /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version); + return sysfs_emit(buf, "%s", (char *)ib_qib_version); } static DEVICE_ATTR_RO(version); @@ -602,7 +602,7 @@ static ssize_t boardversion_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); + return sysfs_emit(buf, "%s", dd->boardversion); } static DEVICE_ATTR_RO(boardversion); @@ -614,7 +614,7 @@ static ssize_t localbus_info_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info); + return sysfs_emit(buf, "%s", dd->lbus_info); } static DEVICE_ATTR_RO(localbus_info); @@ -628,9 +628,10 @@ static ssize_t nctxts_show(struct device *device, /* Return the number of user ports (contexts) available. */ /* The calculation below deals with a special case where * cfgctxts is set to 1 on a single-port board. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", - (dd->first_user_ctxt > dd->cfgctxts) ? 0 : - (dd->cfgctxts - dd->first_user_ctxt)); + return sysfs_emit(buf, "%u\n", + (dd->first_user_ctxt > dd->cfgctxts) ? + 0 : + (dd->cfgctxts - dd->first_user_ctxt)); } static DEVICE_ATTR_RO(nctxts); @@ -642,7 +643,7 @@ static ssize_t nfreectxts_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); + return sysfs_emit(buf, "%u\n", dd->freectxts); } static DEVICE_ATTR_RO(nfreectxts); @@ -703,12 +704,11 @@ static ssize_t tempsense_show(struct device *device, regvals[idx] = ret; } if (idx == 8) - ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n", - *(signed char *)(regvals), - *(signed char *)(regvals + 1), - regvals[2], regvals[3], - *(signed char *)(regvals + 5), - *(signed char *)(regvals + 7)); + ret = sysfs_emit(buf, "%d %d %02X %02X %d %d\n", + *(signed char *)(regvals), + *(signed char *)(regvals + 1), regvals[2], + regvals[3], *(signed char *)(regvals + 5), + *(signed char *)(regvals + 7)); return ret; } static DEVICE_ATTR_RO(tempsense); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index c85d48ae744277..0af72fca00ff45 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -57,7 +57,7 @@ static ssize_t board_id_show(struct device *device, subsystem_device_id = us_ibdev->pdev->subsystem_device; mutex_unlock(&us_ibdev->usdev_lock); - return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id); + return sysfs_emit(buf, "%hu\n", subsystem_device_id); } static DEVICE_ATTR_RO(board_id); @@ -132,8 +132,7 @@ iface_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", - netdev_name(us_ibdev->netdev)); + return sysfs_emit(buf, "%s\n", netdev_name(us_ibdev->netdev)); } static DEVICE_ATTR_RO(iface); @@ -143,8 +142,7 @@ max_vf_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", - kref_read(&us_ibdev->vf_cnt)); + return sysfs_emit(buf, "%u\n", kref_read(&us_ibdev->vf_cnt)); } static DEVICE_ATTR_RO(max_vf); @@ -158,8 +156,7 @@ qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); - return scnprintf(buf, PAGE_SIZE, - "%d\n", qp_per_vf); + return sysfs_emit(buf, "%d\n", qp_per_vf); } static DEVICE_ATTR_RO(qp_per_vf); @@ -169,8 +166,8 @@ cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%d\n", - us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); + return sysfs_emit(buf, "%d\n", + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); } static DEVICE_ATTR_RO(cq_per_vf); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index a86b4f7c9c6e29..28954a6b5ed3f9 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -68,21 +68,21 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); + return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); } static DEVICE_ATTR_RO(hca_type); static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", PVRDMA_REV_ID); + return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID); } static DEVICE_ATTR_RO(hw_rev); static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); + return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID); } static DEVICE_ATTR_RO(board_id); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index c282b9a927128b..7652d53af2c1d9 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1042,7 +1042,7 @@ static ssize_t parent_show(struct device *device, struct rxe_dev *rxe = rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", rxe_parent_name(rxe, 1)); + return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1)); } static DEVICE_ATTR_RO(parent); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 8f0b598a46ec07..d5d592bdab3585 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1514,9 +1514,9 @@ static ssize_t show_mode(struct device *d, struct device_attribute *attr, struct ipoib_dev_priv *priv = ipoib_priv(dev); if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) - return sprintf(buf, "connected\n"); + return sysfs_emit(buf, "connected\n"); else - return sprintf(buf, "datagram\n"); + return sysfs_emit(buf, "datagram\n"); } static ssize_t set_mode(struct device *d, struct device_attribute *attr, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index abfab89423f41c..a6f41349132168 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2266,7 +2266,7 @@ static ssize_t show_pkey(struct device *dev, struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); - return sprintf(buf, "0x%04x\n", priv->pkey); + return sysfs_emit(buf, "0x%04x\n", priv->pkey); } static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); @@ -2276,7 +2276,8 @@ static ssize_t show_umcast(struct device *dev, struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); - return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); + return sysfs_emit(buf, "%d\n", + test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); } void ipoib_set_umcast(struct net_device *ndev, int umcast_val) @@ -2446,7 +2447,7 @@ static ssize_t dev_id_show(struct device *dev, "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n", current->comm); - return sprintf(buf, "%#x\n", ndev->dev_id); + return sysfs_emit(buf, "%#x\n", ndev->dev_id); } static DEVICE_ATTR_RO(dev_id); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 4c50a87ed7cc24..5958840dbeed73 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -46,7 +46,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, struct net_device *dev = to_net_dev(d); struct ipoib_dev_priv *priv = ipoib_priv(dev); - return sprintf(buf, "%s\n", priv->parent->name); + return sysfs_emit(buf, "%s\n", priv->parent->name); } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index ac4c49cbf1538a..9222a481ed4da4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -52,7 +52,8 @@ static ssize_t max_reconnect_attempts_show(struct device *dev, { struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); - return sprintf(page, "%d\n", rtrs_clt_get_max_reconnect_attempts(clt)); + return sysfs_emit(page, "%d\n", + rtrs_clt_get_max_reconnect_attempts(clt)); } static ssize_t max_reconnect_attempts_store(struct device *dev, @@ -95,11 +96,13 @@ static ssize_t mpath_policy_show(struct device *dev, switch (clt->mp_policy) { case MP_POLICY_RR: - return sprintf(page, "round-robin (RR: %d)\n", clt->mp_policy); + return sysfs_emit(page, "round-robin (RR: %d)\n", + clt->mp_policy); case MP_POLICY_MIN_INFLIGHT: - return sprintf(page, "min-inflight (MI: %d)\n", clt->mp_policy); + return sysfs_emit(page, "min-inflight (MI: %d)\n", + clt->mp_policy); default: - return sprintf(page, "Unknown (%d)\n", clt->mp_policy); + return sysfs_emit(page, "Unknown (%d)\n", clt->mp_policy); } } @@ -138,9 +141,10 @@ static DEVICE_ATTR_RW(mpath_policy); static ssize_t add_path_show(struct device *dev, struct device_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, - "Usage: echo [@] > %s\n\n*addr ::= [ ip: | gid: ]\n", - attr->attr.name); + return sysfs_emit( + page, + "Usage: echo [@] > %s\n\n*addr ::= [ ip: | gid: ]\n", + attr->attr.name); } static ssize_t add_path_store(struct device *dev, diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d8fcd21ab472f5..f686d1d6e91cfb 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2896,7 +2896,7 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); } static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, @@ -2904,7 +2904,7 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); } static ssize_t show_service_id(struct device *dev, @@ -2914,8 +2914,8 @@ static ssize_t show_service_id(struct device *dev, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "0x%016llx\n", - be64_to_cpu(target->ib_cm.service_id)); + return sysfs_emit(buf, "0x%016llx\n", + be64_to_cpu(target->ib_cm.service_id)); } static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, @@ -2925,7 +2925,7 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); + return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); } static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, @@ -2933,7 +2933,7 @@ static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%pI6\n", target->sgid.raw); + return sysfs_emit(buf, "%pI6\n", target->sgid.raw); } static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, @@ -2944,7 +2944,7 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); + return sysfs_emit(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); } static ssize_t show_orig_dgid(struct device *dev, @@ -2954,7 +2954,7 @@ static ssize_t show_orig_dgid(struct device *dev, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); + return sysfs_emit(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); } static ssize_t show_req_lim(struct device *dev, @@ -2968,7 +2968,7 @@ static ssize_t show_req_lim(struct device *dev, ch = &target->ch[i]; req_lim = min(req_lim, ch->req_lim); } - return sprintf(buf, "%d\n", req_lim); + return sysfs_emit(buf, "%d\n", req_lim); } static ssize_t show_zero_req_lim(struct device *dev, @@ -2976,7 +2976,7 @@ static ssize_t show_zero_req_lim(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->zero_req_lim); + return sysfs_emit(buf, "%d\n", target->zero_req_lim); } static ssize_t show_local_ib_port(struct device *dev, @@ -2984,7 +2984,7 @@ static ssize_t show_local_ib_port(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->srp_host->port); + return sysfs_emit(buf, "%d\n", target->srp_host->port); } static ssize_t show_local_ib_device(struct device *dev, @@ -2992,8 +2992,8 @@ static ssize_t show_local_ib_device(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%s\n", - dev_name(&target->srp_host->srp_dev->dev->dev)); + return sysfs_emit(buf, "%s\n", + dev_name(&target->srp_host->srp_dev->dev->dev)); } static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, @@ -3001,7 +3001,7 @@ static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->ch_count); + return sysfs_emit(buf, "%d\n", target->ch_count); } static ssize_t show_comp_vector(struct device *dev, @@ -3009,7 +3009,7 @@ static ssize_t show_comp_vector(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->comp_vector); + return sysfs_emit(buf, "%d\n", target->comp_vector); } static ssize_t show_tl_retry_count(struct device *dev, @@ -3017,7 +3017,7 @@ static ssize_t show_tl_retry_count(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->tl_retry_count); + return sysfs_emit(buf, "%d\n", target->tl_retry_count); } static ssize_t show_cmd_sg_entries(struct device *dev, @@ -3025,7 +3025,7 @@ static ssize_t show_cmd_sg_entries(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%u\n", target->cmd_sg_cnt); + return sysfs_emit(buf, "%u\n", target->cmd_sg_cnt); } static ssize_t show_allow_ext_sg(struct device *dev, @@ -3033,7 +3033,7 @@ static ssize_t show_allow_ext_sg(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); + return sysfs_emit(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); } static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); @@ -3893,7 +3893,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, { struct srp_host *host = container_of(dev, struct srp_host, dev); - return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); + return sysfs_emit(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); @@ -3903,7 +3903,7 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr, { struct srp_host *host = container_of(dev, struct srp_host, dev); - return sprintf(buf, "%d\n", host->port); + return sysfs_emit(buf, "%d\n", host->port); } static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); From 3c6bff3cf988fff5b19eee72435ac3251958229b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 7 Oct 2020 19:36:25 -0700 Subject: [PATCH 014/147] RDMA: Convert sysfs kobject * show functions to use sysfs_emit() Done with cocci script: @@ identifier k_show; identifier arg1, arg2, arg3; @@ ssize_t k_show(struct kobject * - arg1 + kobj , struct kobj_attribute * - arg2 + attr , char * - arg3 + buf ) { ... ( - arg1 + kobj | - arg2 + attr | - arg3 + buf ) ... } @@ identifier k_show; identifier kobj, attr, buf; @@ ssize_t k_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { <... return - sprintf(buf, + sysfs_emit(buf, ...); ...> } @@ identifier k_show; identifier kobj, attr, buf; @@ ssize_t k_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { <... return - snprintf(buf, PAGE_SIZE, + sysfs_emit(buf, ...); ...> } @@ identifier k_show; identifier kobj, attr, buf; @@ ssize_t k_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { <... return - scnprintf(buf, PAGE_SIZE, + sysfs_emit(buf, ...); ...> } @@ identifier k_show; identifier kobj, attr, buf; expression chr; @@ ssize_t k_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { <... return - strcpy(buf, chr); + sysfs_emit(buf, chr); ...> } @@ identifier k_show; identifier kobj, attr, buf; identifier len; @@ ssize_t k_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { <... len = - sprintf(buf, + sysfs_emit(buf, ...); ...> return len; } @@ identifier k_show; identifier kobj, attr, buf; identifier len; @@ ssize_t k_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { <... len = - snprintf(buf, PAGE_SIZE, + sysfs_emit(buf, ...); ...> return len; } @@ identifier k_show; identifier kobj, attr, buf; identifier len; @@ ssize_t k_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { <... len = - scnprintf(buf, PAGE_SIZE, + sysfs_emit(buf, ...); ...> return len; } @@ identifier k_show; identifier kobj, attr, buf; identifier len; @@ ssize_t k_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { <... - len += scnprintf(buf + len, PAGE_SIZE - len, + len += sysfs_emit_at(buf, len, ...); ...> return len; } @@ identifier k_show; identifier kobj, attr, buf; expression chr; @@ ssize_t k_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { ... - strcpy(buf, chr); - return strlen(buf); + return sysfs_emit(buf, chr); } Link: https://lore.kernel.org/r/7761c1efaebb96c432c85171d58405c25a824ccd.1602122880.git.joe@perches.com Signed-off-by: Joe Perches Reviewed-by: Jason Gunthorpe Acked-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c | 26 ++++++++------------ drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 12 +++------ 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index 9222a481ed4da4..f80898cc7b64c7 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -188,20 +188,18 @@ static ssize_t rtrs_clt_state_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_clt_sess, kobj); if (sess->state == RTRS_CLT_CONNECTED) - return sprintf(page, "connected\n"); + return sysfs_emit(page, "connected\n"); - return sprintf(page, "disconnected\n"); + return sysfs_emit(page, "disconnected\n"); } static struct kobj_attribute rtrs_clt_state_attr = __ATTR(state, 0444, rtrs_clt_state_show, NULL); static ssize_t rtrs_clt_reconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_reconnect_store(struct kobject *kobj, @@ -229,11 +227,9 @@ static struct kobj_attribute rtrs_clt_reconnect_attr = rtrs_clt_reconnect_store); static ssize_t rtrs_clt_disconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj, @@ -261,11 +257,9 @@ static struct kobj_attribute rtrs_clt_disconnect_attr = rtrs_clt_disconnect_store); static ssize_t rtrs_clt_remove_path_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_remove_path_store(struct kobject *kobj, @@ -328,7 +322,7 @@ static ssize_t rtrs_clt_hca_port_show(struct kobject *kobj, sess = container_of(kobj, typeof(*sess), kobj); - return scnprintf(page, PAGE_SIZE, "%u\n", sess->hca_port); + return sysfs_emit(page, "%u\n", sess->hca_port); } static struct kobj_attribute rtrs_clt_hca_port_attr = @@ -342,7 +336,7 @@ static ssize_t rtrs_clt_hca_name_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_clt_sess, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", sess->hca_name); + return sysfs_emit(page, "%s\n", sess->hca_name); } static struct kobj_attribute rtrs_clt_hca_name_attr = diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 07fbb063555d3b..b7c1b37ed9a8f6 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -27,11 +27,9 @@ static struct kobj_type ktype = { }; static ssize_t rtrs_srv_disconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj, @@ -72,8 +70,7 @@ static ssize_t rtrs_srv_hca_port_show(struct kobject *kobj, sess = container_of(kobj, typeof(*sess), kobj); usr_con = sess->s.con[0]; - return scnprintf(page, PAGE_SIZE, "%u\n", - usr_con->cm_id->port_num); + return sysfs_emit(page, "%u\n", usr_con->cm_id->port_num); } static struct kobj_attribute rtrs_srv_hca_port_attr = @@ -87,8 +84,7 @@ static ssize_t rtrs_srv_hca_name_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_srv_sess, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", - sess->s.dev->ib_dev->name); + return sysfs_emit(page, "%s\n", sess->s.dev->ib_dev->name); } static struct kobj_attribute rtrs_srv_hca_name_attr = From eeed6965071bfe686c36d414d9e48926baf3e757 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 9 Oct 2020 11:51:13 -0500 Subject: [PATCH 015/147] RDMA/rxe: Remove unused RXE_MR_TYPE_FMR This is a left over from the past. It is no longer used. Link: https://lore.kernel.org/r/20201009165112.271143-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 1 - drivers/infiniband/sw/rxe/rxe_verbs.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index d2ce852447c134..6e8c41567ba08e 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -31,7 +31,6 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) return 0; case RXE_MEM_TYPE_MR: - case RXE_MEM_TYPE_FMR: if (iova < mem->iova || length > mem->length || iova > mem->iova + mem->length - length) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 3414b341b7091f..57967fc39c045c 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -273,7 +273,6 @@ enum rxe_mem_type { RXE_MEM_TYPE_NONE, RXE_MEM_TYPE_DMA, RXE_MEM_TYPE_MR, - RXE_MEM_TYPE_FMR, RXE_MEM_TYPE_MW, }; From b898d5c50cab1f985e77d053eb5c4d2c4a7694ae Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 12 Oct 2020 23:15:52 -0700 Subject: [PATCH 016/147] RDMA/bnxt_re: Fix entry size during SRQ create Only static WQE is supported for SRQ. So always use the max supported SGEs while calculating SRQ entry size. Fixes: 2bb3c32c5c5f ("RDMA/bnxt_re: Change wr posting logic to accommodate variable wqes") Link: https://lore.kernel.org/r/1602569752-12745-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 25a6c8fff91fe6..401bdc9e931e51 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1659,8 +1659,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.max_wqe = entries; srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; - srq->qplib_srq.wqe_size = - bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); + /* 128 byte wqe size for SRQ . So use max sges */ + srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges); srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; From bfb972c5e1cba88c93912f271ed5ecc114e31431 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:15:39 +0100 Subject: [PATCH 017/147] IB/verbs: avoid nested container_of() Nested container_of() calls work correctly but cause a warning when building with W=2. Invoking it from an inline function like in drivers/infiniband/hw/mlx5/mlx5_ib.h means we get hundreds of warnings like: include/linux/kernel.h:852:8: warning: declaration of '__mptr' shadows a previous local [-Wshadow] 852 | void *__mptr = (void *)(ptr); \ | ^~~~~~ include/rdma/uverbs_ioctl.h:651:11: note: in expansion of macro 'container_of' 651 | (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ | ^~~~~~~~~~~~ include/rdma/uverbs_ioctl.h:651:24: note: in expansion of macro 'container_of' 651 | (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ | ^~~~~~~~~~~~ drivers/infiniband/hw/mthca/mthca_qp.c:564:35: note: in expansion of macro 'rdma_udata_to_drv_context' 564 | struct mthca_ucontext *context = rdma_udata_to_drv_context( | ^~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/kernel.h:852:8: note: shadowed declaration is here 852 | void *__mptr = (void *)(ptr); \ | ^~~~~~ include/rdma/uverbs_ioctl.h:651:11: note: in expansion of macro 'container_of' 651 | (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ | ^~~~~~~~~~~~ drivers/infiniband/hw/mthca/mthca_qp.c:564:35: note: in expansion of macro 'rdma_udata_to_drv_context' 564 | struct mthca_ucontext *context = rdma_udata_to_drv_context( | ^~~~~~~~~~~~~~~~~~~~~~~~~ In file included from : include/linux/kernel.h:852:8: warning: declaration of '__mptr' shadows a previous local [-Wshadow] 852 | void *__mptr = (void *)(ptr); \ | ^~~~~~ Rewrite the macro to use an inline function internally, which makes it more readable and reduces the amount of useless output from make W=2. Fixes: 730623f4a56f ("IB/verbs: Add helper function rdma_udata_to_drv_context") Link: https://lore.kernel.org/r/20201026161549.3709175-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_ioctl.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index b00270c72740f5..bf167ef6c68891 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -647,12 +647,15 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b * 'ucontext'. * */ -#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ - (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ - driver_udata) \ - ->context, \ - drv_dev_struct, member) : \ - (drv_dev_struct *)NULL) +static inline struct uverbs_attr_bundle * +rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata) +{ + return container_of(udata, struct uverbs_attr_bundle, driver_udata); +} + +#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ + (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \ + drv_dev_struct, member) : (drv_dev_struct *)NULL) #define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) From 5333499c6014224756e97fa1a1047dfa592d76d3 Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Fri, 16 Oct 2020 15:58:45 +0800 Subject: [PATCH 018/147] RDMA/core: Fix error return in _ib_modify_qp() Fix to return error code PTR_ERR() from the error handling case instead of 0. Fixes: 51aab12631dd ("RDMA/core: Get xmit slave for LAG") Link: https://lore.kernel.org/r/20201016075845.129562-1-jingxiangfeng@huawei.com Signed-off-by: Jing Xiangfeng Reviewed-by: Maor Gottlieb Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index cfcfa3ae32cf21..ab1e6048685ee5 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1701,8 +1701,10 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, slave = rdma_lag_get_ah_roce_slave(qp->device, &attr->ah_attr, GFP_KERNEL); - if (IS_ERR(slave)) + if (IS_ERR(slave)) { + ret = PTR_ERR(slave); goto out_av; + } attr->xmit_slave = slave; } } From bb3ab2979fd69db23328691cb10067861df89037 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 13 Oct 2020 12:07:42 -0500 Subject: [PATCH 019/147] RDMA/rxe: Compute PSN windows correctly The code which limited the number of unacknowledged PSNs was incorrect. The PSNs are limited to 24 bits and wrap back to zero from 0x00ffffff. The test was computing a 32 bit value which wraps at 32 bits so that qp->req.psn can appear smaller than the limit when it is actually larger. Replace '>' test with psn_compare which is used for other PSN comparisons and correctly handles the 24 bit size. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20201013170741.3590-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_req.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index af3923bf0a36bf..d4917646641aad 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -634,7 +634,8 @@ int rxe_requester(void *arg) } if (unlikely(qp_type(qp) == IB_QPT_RC && - qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) { + psn_compare(qp->req.psn, (qp->comp.psn + + RXE_MAX_UNACKED_PSNS)) > 0)) { qp->req.wait_psn = 1; goto exit; } From dae7a75f1f19bffb579daf148f8d8addd2726772 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 19 Oct 2020 12:46:28 +0300 Subject: [PATCH 020/147] IB/isert: add module param to set sg_tablesize for IO cmd Currently, iser target support max IO size of 16MiB by default. For some adapters, allocating this amount of resources might reduce the total number of possible connections that can be created. For those adapters, it's preferred to reduce the max IO size to be able to create more connections. Since there is no handshake procedure for max IO size in iser protocol, set the default max IO size to 1MiB and add a module parameter for enabling the option to control it for suitable adapters. Fixes: 317000b926b0 ("IB/isert: allocate RW ctxs according to max IO size") Link: https://lore.kernel.org/r/20201019094628.17202-1-mgurtovoy@nvidia.com Reported-by: Krishnamraju Eraparaju Reviewed-by: Sagi Grimberg Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/isert/ib_isert.c | 27 ++++++++++++++++++++++++- drivers/infiniband/ulp/isert/ib_isert.h | 6 ++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 436e17f1d0e53c..bd478947b93a5e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -28,6 +28,18 @@ static int isert_debug_level; module_param_named(debug_level, isert_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)"); +static int isert_sg_tablesize_set(const char *val, + const struct kernel_param *kp); +static const struct kernel_param_ops sg_tablesize_ops = { + .set = isert_sg_tablesize_set, + .get = param_get_int, +}; + +static int isert_sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE; +module_param_cb(sg_tablesize, &sg_tablesize_ops, &isert_sg_tablesize, 0644); +MODULE_PARM_DESC(sg_tablesize, + "Number of gather/scatter entries in a single scsi command, should >= 128 (default: 256, max: 4096)"); + static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); static struct workqueue_struct *isert_comp_wq; @@ -47,6 +59,19 @@ static void isert_send_done(struct ib_cq *cq, struct ib_wc *wc); static void isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void isert_login_send_done(struct ib_cq *cq, struct ib_wc *wc); +static int isert_sg_tablesize_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (ret != 0 || n < ISCSI_ISER_MIN_SG_TABLESIZE || + n > ISCSI_ISER_MAX_SG_TABLESIZE) + return -EINVAL; + + return param_set_int(val, kp); +} + + static inline bool isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) { @@ -101,7 +126,7 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; factor = rdma_rw_mr_factor(device->ib_device, cma_id->port_num, - ISCSI_ISER_MAX_SG_TABLESIZE); + isert_sg_tablesize); attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX * factor; attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; attr.cap.max_recv_sge = 1; diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 7fee4a65e181ab..6c5af13db4e0d2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -65,6 +65,12 @@ */ #define ISER_RX_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 1024) +/* Default I/O size is 1MB */ +#define ISCSI_ISER_DEF_SG_TABLESIZE 256 + +/* Minimum I/O size is 512KB */ +#define ISCSI_ISER_MIN_SG_TABLESIZE 128 + /* Maximum support is 16MB I/O size */ #define ISCSI_ISER_MAX_SG_TABLESIZE 4096 From aba457ca890c6a8042ba941a71129337b858d993 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 20 Oct 2020 20:04:53 +0800 Subject: [PATCH 021/147] RDMA/hns: Support owner mode doorbell The doorbell needs to store PI information into QPC, so the RoCEE should wait for the results of storing, that is, it needs two bus operations to complete a doorbell. When ROCEE is in SDI mode, multiple doorbells may be interlocked because the RoCEE can only handle bus operations serially. So a flag to mark if HIP09 is working in SDI mode is added. When the SDI flag is set, the ROCEE will ignore the PI information of the doorbell, continue to fetch wqe and verify its validity by it's owner_bit. Link: https://lore.kernel.org/r/1603195493-22741-1-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 28 ++++++++++++++++----- drivers/infiniband/hw/hns/hns_roce_qp.c | 3 +++ 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6d2acff69f982f..07c95d837033eb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -129,9 +129,10 @@ enum { SERV_TYPE_UD, }; -enum { +enum hns_roce_qp_caps { HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), + HNS_ROCE_QP_CAP_OWNER_DB = BIT(2), }; enum hns_roce_cq_flags { @@ -221,6 +222,7 @@ enum { HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), + HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), }; #define HNS_ROCE_DB_TYPE_COUNT 2 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a0b679254a8e56..7741d9f9244e29 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -474,9 +474,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); - roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); @@ -517,7 +514,18 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, set_extend_sge(qp, wr, &curr_idx, valid_num_sge); + /* + * The pipeline can sequentially post all valid WQEs into WQ buffer, + * including new WQEs waiting for the doorbell to update the PI again. + * Therefore, the owner bit of WQE MUST be updated after all fields + * and extSGEs have been written into DDR instead of cache. + */ + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) + dma_wmb(); + *sge_idx = curr_idx; + roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); return 0; } @@ -591,9 +599,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); @@ -601,7 +606,18 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, &curr_idx, valid_num_sge); + /* + * The pipeline can sequentially post all valid WQEs into WQ buffer, + * including new WQEs waiting for the doorbell to update the PI again. + * Therefore, the owner bit of WQE MUST be updated after all fields + * and extSGEs have been written into DDR instead of cache. + */ + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) + dma_wmb(); + *sge_idx = curr_idx; + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 04a70681bea1bd..e288946c6138fa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -725,6 +725,9 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_device *ibdev = &hr_dev->ib_dev; int ret; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE) + hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB; + if (udata) { if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { ret = hns_roce_db_map_user(uctx, udata, ucmd->sdb_addr, From 2b3062e4d997f201c1ad2bbde88b7271dd9ef35f Mon Sep 17 00:00:00 2001 From: Danil Kipnis Date: Fri, 23 Oct 2020 09:43:42 +0200 Subject: [PATCH 022/147] RDMA/rtrs-clt: Remove destroy_con_cq_qp in case route resolving failed We call destroy_con_cq_qp(con) in rtrs_rdma_addr_resolved() in case route couldn't be resolved and then again in create_cm() because nothing happens. Don't call destroy_con_cq_qp from rtrs_rdma_addr_resolved, create_cm() does the clean up already. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20201023074353.21946-2-jinpu.wang@cloud.ionos.com Signed-off-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 776e89231c52f7..9980bb4a6f78e8 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1640,10 +1640,8 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con) return err; } err = rdma_resolve_route(con->c.cm_id, RTRS_CONNECT_TIMEOUT_MS); - if (err) { + if (err) rtrs_err(s, "Resolving route failed, err: %d\n", err); - destroy_con_cq_qp(con); - } return err; } From 73385fdbc43df2e9ba07d4a459d6e0e2110ad2d8 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 23 Oct 2020 09:43:43 +0200 Subject: [PATCH 023/147] RDMA/rtrs-clt: Remove outdated comment in create_con_cq_qp As run destroy_con_cq_qp many times doesn't work, remove the comments. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20201023074353.21946-3-jinpu.wang@cloud.ionos.com Suggested-by: Gioh Kim Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 9980bb4a6f78e8..fb840b152b37c3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1520,15 +1520,6 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; - /* - * This function can fail, but still destroy_con_cq_qp() should - * be called, this is because create_con_cq_qp() is called on cm - * event path, thus caller/waiter never knows: have we failed before - * create_con_cq_qp() or after. To solve this dilemma without - * creating any additional flags just allow destroy_con_cq_qp() be - * called many times. - */ - if (con->c.cid == 0) { /* * One completion for each receive and two for each send From fcf2959da6a74e71a85ab666e732fa1ed4da2c9a Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 23 Oct 2020 09:43:44 +0200 Subject: [PATCH 024/147] RDMA/rtrs-clt: Avoid run destroy_con_cq_qp/create_con_cq_qp in parallel It could happen two kworkers race with each other: CPU0 CPU1 addr_resolver kworker reconnect kworker rtrs_clt_rdma_cm_handler rtrs_rdma_addr_resolved create_con_cq_qp: s.dev_ref++ "s.dev_ref is 1" wait in create_cm fails with TIMEOUT destroy_con_cq_qp: --s.dev_ref "s.dev_ref is 0" destroy_con_cq_qp: sess->s.dev = NULL rtrs_cq_qp_create -> create_qp(con, sess->dev->ib_pd...) sess->dev is NULL, panic. To fix the problem using mutex to serialize create_con_cq_qp and destroy_con_cq_qp. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20201023074353.21946-4-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Reviewed-by: Gioh Kim Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 15 +++++++++++++-- drivers/infiniband/ulp/rtrs/rtrs-clt.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index fb840b152b37c3..4677e8ed29ae91 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1499,6 +1499,7 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid) con->c.cid = cid; con->c.sess = &sess->s; atomic_set(&con->io_cnt, 0); + mutex_init(&con->con_mutex); sess->s.con[cid] = &con->c; @@ -1510,6 +1511,7 @@ static void destroy_con(struct rtrs_clt_con *con) struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); sess->s.con[con->c.cid] = NULL; + mutex_destroy(&con->con_mutex); kfree(con); } @@ -1520,6 +1522,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; + lockdep_assert_held(&con->con_mutex); if (con->c.cid == 0) { /* * One completion for each receive and two for each send @@ -1593,7 +1596,7 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con) * Be careful here: destroy_con_cq_qp() can be called even * create_con_cq_qp() failed, see comments there. */ - + lockdep_assert_held(&con->con_mutex); rtrs_cq_qp_destroy(&con->c); if (con->rsp_ius) { rtrs_iu_free(con->rsp_ius, DMA_FROM_DEVICE, @@ -1625,7 +1628,9 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con) struct rtrs_sess *s = con->c.sess; int err; + mutex_lock(&con->con_mutex); err = create_con_cq_qp(con); + mutex_unlock(&con->con_mutex); if (err) { rtrs_err(s, "create_con_cq_qp(), err: %d\n", err); return err; @@ -1938,8 +1943,9 @@ static int create_cm(struct rtrs_clt_con *con) errr: stop_cm(con); - /* Is safe to call destroy if cq_qp is not inited */ + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm: destroy_cm(con); @@ -2046,7 +2052,9 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) if (!sess->s.con[cid]) break; con = to_clt_con(sess->s.con[cid]); + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm(con); destroy_con(con); } @@ -2213,7 +2221,10 @@ static int init_conns(struct rtrs_clt_sess *sess) struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]); stop_cm(con); + + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm(con); destroy_con(con); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index 167acd3c90fcce..b8dbd701b3cb29 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -72,6 +72,7 @@ struct rtrs_clt_con { struct rtrs_iu *rsp_ius; u32 queue_size; unsigned int cpu; + struct mutex con_mutex; atomic_t io_cnt; int cm_err; }; From f553e7601df9566ba7644541fc09152a3a81f793 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Fri, 23 Oct 2020 09:43:45 +0200 Subject: [PATCH 025/147] RDMA/rtrs-clt: Missing error from rtrs_rdma_conn_established When rtrs_rdma_conn_established returns error (non-zero value), the error value is stored in con->cm_err and it cannot trigger rtrs_rdma_error_recovery. Finally the error of rtrs_rdma_con_established will be forgot. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20201023074353.21946-5-jinpu.wang@cloud.ionos.com Signed-off-by: Gioh Kim Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 4677e8ed29ae91..ffca004625bba2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1831,8 +1831,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, cm_err = rtrs_rdma_route_resolved(con); break; case RDMA_CM_EVENT_ESTABLISHED: - con->cm_err = rtrs_rdma_conn_established(con, ev); - if (likely(!con->cm_err)) { + cm_err = rtrs_rdma_conn_established(con, ev); + if (likely(!cm_err)) { /* * Report success and wake up. Here we abuse state_wq, * i.e. wake up without state change, but we set cm_err. From d715ff8acbd5876549ef2b21b755ed919f40dcc1 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 23 Oct 2020 09:43:46 +0200 Subject: [PATCH 026/147] RDMA/rtrs-srv: Don't guard the whole __alloc_srv with srv_mutex The purpose of srv_mutex is to protect srv_list as in put_srv, so no need to hold it when allocate memory for srv since it could be time consuming. Otherwise if one machine has limited memory, rsrv_close_work could be blocked for a longer time due to the mutex is held by get_or_create_srv since it can't get memory in time. INFO: task kworker/1:1:27478 blocked for more than 120 seconds. Tainted: G O 4.14.171-1-storage #4.14.171-1.3~deb9 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/1:1 D 0 27478 2 0x80000000 Workqueue: rtrs_server_wq rtrs_srv_close_work [rtrs_server] Call Trace: ? __schedule+0x38c/0x7e0 schedule+0x32/0x80 schedule_preempt_disabled+0xa/0x10 __mutex_lock.isra.2+0x25e/0x4d0 ? put_srv+0x44/0x100 [rtrs_server] put_srv+0x44/0x100 [rtrs_server] rtrs_srv_close_work+0x16c/0x280 [rtrs_server] process_one_work+0x1c5/0x3c0 worker_thread+0x47/0x3e0 kthread+0xfc/0x130 ? trace_event_raw_event_workqueue_execute_start+0xa0/0xa0 ? kthread_create_on_node+0x70/0x70 ret_from_fork+0x1f/0x30 Let's move all the logics from __find_srv_and_get and __alloc_srv to get_or_create_srv, and remove the two functions. Then it should be safe for multiple processes to access the same srv since it is protected with srv_mutex. And since we don't want to allocate chunks with srv_mutex held, let's check the srv->refcount after get srv because the chunks could not be allocated yet. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20201023074353.21946-6-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 86 +++++++++++--------------- 1 file changed, 37 insertions(+), 49 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index d6f93601712e49..1cb778aff3c59a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1328,17 +1328,42 @@ static void rtrs_srv_dev_release(struct device *dev) kfree(srv); } -static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) +static void free_srv(struct rtrs_srv *srv) +{ + int i; + + WARN_ON(refcount_read(&srv->refcount)); + for (i = 0; i < srv->queue_depth; i++) + mempool_free(srv->chunks[i], chunk_pool); + kfree(srv->chunks); + mutex_destroy(&srv->paths_mutex); + mutex_destroy(&srv->paths_ev_mutex); + /* last put to release the srv structure */ + put_device(&srv->dev); +} + +static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, + const uuid_t *paths_uuid) { struct rtrs_srv *srv; int i; + mutex_lock(&ctx->srv_mutex); + list_for_each_entry(srv, &ctx->srv_list, ctx_list) { + if (uuid_equal(&srv->paths_uuid, paths_uuid) && + refcount_inc_not_zero(&srv->refcount)) { + mutex_unlock(&ctx->srv_mutex); + return srv; + } + } + + /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); - if (!srv) + if (!srv) { + mutex_unlock(&ctx->srv_mutex); return NULL; + } - refcount_set(&srv->refcount, 1); INIT_LIST_HEAD(&srv->paths_list); mutex_init(&srv->paths_mutex); mutex_init(&srv->paths_ev_mutex); @@ -1347,6 +1372,8 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, srv->ctx = ctx; device_initialize(&srv->dev); srv->dev.release = rtrs_srv_dev_release; + list_add(&srv->ctx_list, &ctx->srv_list); + mutex_unlock(&ctx->srv_mutex); srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); @@ -1358,7 +1385,7 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, if (!srv->chunks[i]) goto err_free_chunks; } - list_add(&srv->ctx_list, &ctx->srv_list); + refcount_set(&srv->refcount, 1); return srv; @@ -1369,52 +1396,9 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, err_free_srv: kfree(srv); - return NULL; } -static void free_srv(struct rtrs_srv *srv) -{ - int i; - - WARN_ON(refcount_read(&srv->refcount)); - for (i = 0; i < srv->queue_depth; i++) - mempool_free(srv->chunks[i], chunk_pool); - kfree(srv->chunks); - mutex_destroy(&srv->paths_mutex); - mutex_destroy(&srv->paths_ev_mutex); - /* last put to release the srv structure */ - put_device(&srv->dev); -} - -static inline struct rtrs_srv *__find_srv_and_get(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - list_for_each_entry(srv, &ctx->srv_list, ctx_list) { - if (uuid_equal(&srv->paths_uuid, paths_uuid) && - refcount_inc_not_zero(&srv->refcount)) - return srv; - } - - return NULL; -} - -static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - mutex_lock(&ctx->srv_mutex); - srv = __find_srv_and_get(ctx, paths_uuid); - if (!srv) - srv = __alloc_srv(ctx, paths_uuid); - mutex_unlock(&ctx->srv_mutex); - - return srv; -} - static void put_srv(struct rtrs_srv *srv) { if (refcount_dec_and_test(&srv->refcount)) { @@ -1813,7 +1797,11 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, } recon_cnt = le16_to_cpu(msg->recon_cnt); srv = get_or_create_srv(ctx, &msg->paths_uuid); - if (!srv) { + /* + * "refcount == 0" happens if a previous thread calls get_or_create_srv + * allocate srv, but chunks of srv are not allocated yet. + */ + if (!srv || refcount_read(&srv->refcount) == 0) { err = -ENOMEM; goto reject_w_err; } From 3c8483f5a436ce00f122378ef84aa7c6b20066f1 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 23 Oct 2020 09:43:47 +0200 Subject: [PATCH 027/147] RDMA/rtrs-srv: Fix typo It should mean region here. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20201023074353.21946-7-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 08b0b8a6eebe6d..9543ae19996c42 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -62,7 +62,7 @@ struct rtrs_srv_op { /* * server side memory region context, when always_invalidate=Y, we need - * queue_depth of memory regrion to invalidate each memory region. + * queue_depth of memory region to invalidate each memory region. */ struct rtrs_srv_mr { struct ib_mr *mr; From 8bd372ace32ec88fe3ad1421929ae1604f2a2c2c Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Fri, 23 Oct 2020 09:43:48 +0200 Subject: [PATCH 028/147] RDMA/rtrs: Remove unnecessary argument dir of rtrs_iu_free The direction of DMA operation is already in the rtrs_iu Link: https://lore.kernel.org/r/20201023074353.21946-8-jinpu.wang@cloud.ionos.com Signed-off-by: Gioh Kim Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 14 ++++++-------- drivers/infiniband/ulp/rtrs/rtrs-pri.h | 3 +-- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 14 ++++++-------- drivers/infiniband/ulp/rtrs/rtrs.c | 9 ++++----- 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index ffca004625bba2..4e5da834034a63 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1236,8 +1236,7 @@ static void free_sess_reqs(struct rtrs_clt_sess *sess) if (req->mr) ib_dereg_mr(req->mr); kfree(req->sge); - rtrs_iu_free(req->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(req->iu, sess->s.dev->ib_dev, 1); } kfree(sess->reqs); sess->reqs = NULL; @@ -1599,8 +1598,7 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con) lockdep_assert_held(&con->con_mutex); rtrs_cq_qp_destroy(&con->c); if (con->rsp_ius) { - rtrs_iu_free(con->rsp_ius, DMA_FROM_DEVICE, - sess->s.dev->ib_dev, con->queue_size); + rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_size); con->rsp_ius = NULL; con->queue_size = 0; } @@ -2245,7 +2243,7 @@ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); if (unlikely(wc->status != IB_WC_SUCCESS)) { rtrs_err(sess->clt, "Sess info request send failed: %s\n", @@ -2374,7 +2372,7 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) out: rtrs_clt_update_wc_stats(con); - rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); rtrs_clt_change_state(sess, state); } @@ -2436,9 +2434,9 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) out: if (tx_iu) - rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); if (rx_iu) - rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); if (unlikely(err)) /* If we've never taken async path because of malloc problems */ rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index b8e43dc4d95abd..3f2918671dbedf 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -287,8 +287,7 @@ struct rtrs_msg_rdma_hdr { struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t t, struct ib_device *dev, enum dma_data_direction, void (*done)(struct ib_cq *cq, struct ib_wc *wc)); -void rtrs_iu_free(struct rtrs_iu *iu, enum dma_data_direction dir, - struct ib_device *dev, u32 queue_size); +void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_size); int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu); int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, struct ib_send_wr *head); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 1cb778aff3c59a..42ee3bf7dc521c 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -577,8 +577,7 @@ static void unmap_cont_bufs(struct rtrs_srv_sess *sess) struct rtrs_srv_mr *srv_mr; srv_mr = &sess->mrs[i]; - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); ib_dereg_mr(srv_mr->mr); ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl, srv_mr->sgt.nents, DMA_BIDIRECTIONAL); @@ -682,8 +681,7 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) sgt = &srv_mr->sgt; mr = srv_mr->mr; free_iu: - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); dereg_mr: ib_dereg_mr(mr); unmap_sg: @@ -735,7 +733,7 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); if (unlikely(wc->status != IB_WC_SUCCESS)) { rtrs_err(s, "Sess info response send failed: %s\n", @@ -861,7 +859,7 @@ static int process_info_req(struct rtrs_srv_con *con, if (unlikely(err)) { rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err); iu_free: - rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); } rwr_free: kfree(rwr); @@ -906,7 +904,7 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) goto close; out: - rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); return; close: close_sess(sess); @@ -929,7 +927,7 @@ static int post_recv_info_req(struct rtrs_srv_con *con) err = rtrs_iu_post_recv(&con->c, rx_iu); if (unlikely(err)) { rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err); - rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); return err; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index ff1093d6e4bc9a..48f648f573b6f6 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -31,6 +31,7 @@ struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask, return NULL; for (i = 0; i < queue_size; i++) { iu = &ius[i]; + iu->direction = dir; iu->buf = kzalloc(size, gfp_mask); if (!iu->buf) goto err; @@ -41,17 +42,15 @@ struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask, iu->cqe.done = done; iu->size = size; - iu->direction = dir; } return ius; err: - rtrs_iu_free(ius, dir, dma_dev, i); + rtrs_iu_free(ius, dma_dev, i); return NULL; } EXPORT_SYMBOL_GPL(rtrs_iu_alloc); -void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir, - struct ib_device *ibdev, u32 queue_size) +void rtrs_iu_free(struct rtrs_iu *ius, struct ib_device *ibdev, u32 queue_size) { struct rtrs_iu *iu; int i; @@ -61,7 +60,7 @@ void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir, for (i = 0; i < queue_size; i++) { iu = &ius[i]; - ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, dir); + ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, iu->direction); kfree(iu->buf); } kfree(ius); From 16101b60e71782b2a314a87114cdca8248b89cb3 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Fri, 23 Oct 2020 09:43:49 +0200 Subject: [PATCH 029/147] RDMA/rtrs-clt: Remove duplicated switch-case handling for CM error events The events returning the same error value are put together. Link: https://lore.kernel.org/r/20201023074353.21946-9-jinpu.wang@cloud.ionos.com Signed-off-by: Gioh Kim Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 4e5da834034a63..30eda2f355e1c0 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1843,20 +1843,22 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_REJECTED: cm_err = rtrs_rdma_conn_rejected(con, ev); break; + case RDMA_CM_EVENT_DISCONNECTED: + /* No message for disconnecting */ + cm_err = -ECONNRESET; + break; case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_TIMEWAIT_EXIT: rtrs_wrn(s, "CM error event %d\n", ev->event); cm_err = -ECONNRESET; break; case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: + rtrs_wrn(s, "CM error event %d\n", ev->event); cm_err = -EHOSTUNREACH; break; - case RDMA_CM_EVENT_DISCONNECTED: - case RDMA_CM_EVENT_ADDR_CHANGE: - case RDMA_CM_EVENT_TIMEWAIT_EXIT: - cm_err = -ECONNRESET; - break; case RDMA_CM_EVENT_DEVICE_REMOVAL: /* * Device removal is a special case. Queue close and return 0. From c3b16b67d12f938408172ac0c47470f09c3f39ea Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Fri, 23 Oct 2020 09:43:50 +0200 Subject: [PATCH 030/147] RDMA/rtrs-clt: Remove duplicated code process_info_rsp checks that sg_cnt is zero twice. Link: https://lore.kernel.org/r/20201023074353.21946-10-jinpu.wang@cloud.ionos.com Signed-off-by: Gioh Kim Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 30eda2f355e1c0..1b2821d71b4665 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2264,8 +2264,12 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, int i, sgi; sg_cnt = le16_to_cpu(msg->sg_cnt); - if (unlikely(!sg_cnt)) + if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { + rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", + sg_cnt); return -EINVAL; + } + /* * Check if IB immediate data size is enough to hold the mem_id and * the offset inside the memory chunk. @@ -2278,11 +2282,6 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size); return -EINVAL; } - if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { - rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", - sg_cnt); - return -EINVAL; - } total_len = 0; for (sgi = 0, i = 0; sgi < sg_cnt && i < sess->queue_depth; sgi++) { const struct rtrs_sg_desc *desc = &msg->desc[sgi]; From ffea6ad1335b90be91e837aecbde730e1612087a Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 23 Oct 2020 09:43:51 +0200 Subject: [PATCH 031/147] RDMA/rtrs-srv: Kill rtrs_srv_change_state_get_old This function isn't needed since no caller checks the old_state of sess. Link: https://lore.kernel.org/r/20201023074353.21946-11-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 42ee3bf7dc521c..c42fd470c4eb46 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -113,28 +113,18 @@ static bool __rtrs_srv_change_state(struct rtrs_srv_sess *sess, return changed; } -static bool rtrs_srv_change_state_get_old(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state, - enum rtrs_srv_state *old_state) +static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, + enum rtrs_srv_state new_state) { bool changed; spin_lock_irq(&sess->state_lock); - *old_state = sess->state; changed = __rtrs_srv_change_state(sess, new_state); spin_unlock_irq(&sess->state_lock); return changed; } -static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state) -{ - enum rtrs_srv_state old_state; - - return rtrs_srv_change_state_get_old(sess, new_state, &old_state); -} - static void free_id(struct rtrs_srv_op *id) { if (!id) @@ -471,10 +461,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, void close_sess(struct rtrs_srv_sess *sess) { - enum rtrs_srv_state old_state; - - if (rtrs_srv_change_state_get_old(sess, RTRS_SRV_CLOSING, - &old_state)) + if (rtrs_srv_change_state(sess, RTRS_SRV_CLOSING)) queue_work(rtrs_wq, &sess->close_work); WARN_ON(sess->state != RTRS_SRV_CLOSING); } From e6ab8cf50fa1c38652feba3e4921c60538236f30 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 23 Oct 2020 09:43:52 +0200 Subject: [PATCH 032/147] RDMA/rtrs: Introduce rtrs_post_send Since the three functions share the similar logic, let's introduce one common function for it. Link: https://lore.kernel.org/r/20201023074353.21946-12-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs.c | 52 +++++++++++------------------- 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 48f648f573b6f6..2e3a849e0a77c2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -104,6 +104,22 @@ int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe) } EXPORT_SYMBOL_GPL(rtrs_post_recv_empty); +static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head, + struct ib_send_wr *wr) +{ + if (head) { + struct ib_send_wr *tail = head; + + while (tail->next) + tail = tail->next; + tail->next = wr; + } else { + head = wr; + } + + return ib_post_send(qp, head, NULL); +} + int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, struct ib_send_wr *head) { @@ -126,17 +142,7 @@ int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, .send_flags = IB_SEND_SIGNALED, }; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = ≀ - } else { - head = ≀ - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr); } EXPORT_SYMBOL_GPL(rtrs_iu_post_send); @@ -168,17 +174,7 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, if (WARN_ON(sge[i].length == 0)) return -EINVAL; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = &wr.wr; - } else { - head = &wr.wr; - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr.wr); } EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm); @@ -195,17 +191,7 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, .ex.imm_data = cpu_to_be32(imm_data), }; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = ≀ - } else { - head = ≀ - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr); } EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty); From 3f4e3d962dfda68e024d57cf2408cacf081cd9df Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 23 Oct 2020 09:43:53 +0200 Subject: [PATCH 033/147] RDMA/rtrs-clt: Remove 'addr' from rtrs_clt_add_path_to_arr Remove the argument since it is not used in the function. Link: https://lore.kernel.org/r/20201023074353.21946-13-jinpu.wang@cloud.ionos.com Signed-off-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 1b2821d71b4665..9d359c8f2f81f1 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2161,8 +2161,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) mutex_unlock(&clt->paths_mutex); } -static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess, - struct rtrs_addr *addr) +static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess) { struct rtrs_clt *clt = sess->clt; @@ -2937,7 +2936,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, * IO will never grab it. Also it is very important to add * path before init, since init fires LINK_CONNECTED event. */ - rtrs_clt_add_path_to_arr(sess, addr); + rtrs_clt_add_path_to_arr(sess); err = init_sess(sess); if (err) From d6d91e46210f3adb7b6d4c667cb72bf847b5783a Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Sat, 24 Oct 2020 11:07:15 +0800 Subject: [PATCH 034/147] RDMA/hns: Add support for configuring GMV table HIP09 supports to store SGID/SMAC/VLAN together in a table named GMV. The driver needs to allocate memory for it and tell the information about this region to hardware. Link: https://lore.kernel.org/r/1603508836-33054-2-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 10 ++ drivers/infiniband/hw/hns/hns_roce_hem.c | 15 +++ drivers/infiniband/hw/hns/hns_roce_hem.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 115 +++++++++++++++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 29 ++++- drivers/infiniband/hw/hns/hns_roce_main.c | 17 +++ 6 files changed, 159 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 07c95d837033eb..1d99022f70f796 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -827,6 +827,7 @@ struct hns_roce_caps { u32 cqc_timer_bt_num; u32 mpt_bt_num; u32 sccc_bt_num; + u32 gmv_bt_num; u32 qpc_ba_pg_sz; u32 qpc_buf_pg_sz; u32 qpc_hop_num; @@ -866,6 +867,11 @@ struct hns_roce_caps { u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; + u32 gmv_entry_num; + u32 gmv_entry_sz; + u32 gmv_ba_pg_sz; + u32 gmv_buf_pg_sz; + u32 gmv_hop_num; u32 sl_num; u32 tsq_buf_pg_sz; u32 tpq_buf_pg_sz; @@ -1001,6 +1007,10 @@ struct hns_roce_dev { struct hns_roce_eq_table eq_table; struct hns_roce_hem_table qpc_timer_table; struct hns_roce_hem_table cqc_timer_table; + /* GMV is the memory area that the driver allocates for the hardware + * to store SGID, SMAC and VLAN information. + */ + struct hns_roce_hem_table gmv_table; int cmd_mod; int loop_idc; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 7487cf3d2c37aa..5c302aecf05030 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -75,6 +75,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) case HEM_TYPE_CQC_TIMER: hop_num = hr_dev->caps.cqc_timer_hop_num; break; + case HEM_TYPE_GMV: + hop_num = hr_dev->caps.gmv_hop_num; + break; default: return false; } @@ -183,6 +186,14 @@ static int get_hem_table_config(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = hr_dev->caps.srqc_bt_num; mhop->hop_num = hr_dev->caps.srqc_hop_num; break; + case HEM_TYPE_GMV: + mhop->buf_chunk_size = 1 << (hr_dev->caps.gmv_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.gmv_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.gmv_bt_num; + mhop->hop_num = hr_dev->caps.gmv_hop_num; + break; default: dev_err(dev, "Table %d not support multi-hop addressing!\n", type); @@ -1033,6 +1044,10 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.trrl_table); + + if (hr_dev->caps.gmv_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->gmv_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index b34c940077bb56..c6bd98228a440a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -47,6 +47,7 @@ enum { HEM_TYPE_SCCC, HEM_TYPE_QPC_TIMER, HEM_TYPE_CQC_TIMER, + HEM_TYPE_GMV, /* UNMAP HEM */ HEM_TYPE_MTT, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7741d9f9244e29..c2b35e62844048 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1589,6 +1589,10 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) PF_RES_DATA_4_PF_SCCC_BT_NUM_M, PF_RES_DATA_4_PF_SCCC_BT_NUM_S); + hr_dev->caps.gmv_bt_num = roce_get_field(req_b->gmv_idx_num, + PF_RES_DATA_5_PF_GMV_BT_NUM_M, + PF_RES_DATA_5_PF_GMV_BT_NUM_S); + return 0; } @@ -1912,6 +1916,15 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; + caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_sz); + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; + caps->gmv_ba_pg_sz = 0; + caps->gmv_buf_pg_sz = 0; + caps->gid_table_len[0] = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / + caps->gmv_entry_sz); } } @@ -2138,6 +2151,14 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_sz); + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; + caps->gmv_ba_pg_sz = 0; + caps->gmv_buf_pg_sz = 0; + caps->gid_table_len[0] = caps->gmv_bt_num * + (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); } calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num, @@ -2481,24 +2502,13 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, link_tbl->table.map); } -static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +static int get_hem_table(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = hr_dev->priv; - int qpc_count, cqc_count; - int ret, i; - - /* TSQ includes SQ doorbell and ack doorbell */ - ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); - if (ret) { - dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret); - return ret; - } - - ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); - if (ret) { - dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret); - goto err_tpq_init_failed; - } + unsigned int qpc_count; + unsigned int cqc_count; + unsigned int gmv_count; + int ret; + int i; /* Alloc memory for QPC Timer buffer space chunk */ for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num; @@ -2522,8 +2532,23 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) } } + /* Alloc memory for GMV(GID/MAC/VLAN) table buffer space chunk */ + for (gmv_count = 0; gmv_count < hr_dev->caps.gmv_entry_num; + gmv_count++) { + ret = hns_roce_table_get(hr_dev, &hr_dev->gmv_table, gmv_count); + if (ret) { + dev_err(hr_dev->dev, + "failed to get gmv table, ret = %d.\n", ret); + goto err_gmv_failed; + } + } + return 0; +err_gmv_failed: + for (i = 0; i < gmv_count; i++) + hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i); + err_cqc_timer_failed: for (i = 0; i < cqc_count; i++) hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i); @@ -2532,6 +2557,32 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) for (i = 0; i < qpc_count; i++) hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i); + return ret; +} + +static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + int ret; + + /* TSQ includes SQ doorbell and ack doorbell */ + ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "failed to init TSQ, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "failed to init TPQ, ret = %d.\n", ret); + goto err_tpq_init_failed; + } + + ret = get_hem_table(hr_dev); + if (ret) + goto err_get_hem_table_failed; + +err_get_hem_table_failed: hns_roce_free_link_table(hr_dev, &priv->tpq); err_tpq_init_failed: @@ -3598,9 +3649,25 @@ static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba, u32 hem_type, int step_idx) { struct hns_roce_cmd_mailbox *mailbox; + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_gmv_bt *gmv_bt = + (struct hns_roce_cfg_gmv_bt *)desc.data; int ret; int op; + if (hem_type == HEM_TYPE_GMV) { + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, + false); + + gmv_bt->gmv_ba_l = cpu_to_le32(bt_ba >> HNS_HW_PAGE_SHIFT); + gmv_bt->gmv_ba_h = cpu_to_le32(bt_ba >> (HNS_HW_PAGE_SHIFT + + 32)); + gmv_bt->gmv_bt_idx = cpu_to_le32(obj / + (HNS_HW_PAGE_SIZE / hr_dev->caps.gmv_entry_sz)); + + return hns_roce_cmq_send(hr_dev, &desc, 1); + } + op = get_op_for_set_hem(hr_dev, hem_type, step_idx); if (op < 0) return 0; @@ -3698,24 +3765,20 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, case HEM_TYPE_CQC: op = HNS_ROCE_CMD_DESTROY_CQC_BT0; break; - case HEM_TYPE_SCCC: - case HEM_TYPE_QPC_TIMER: - case HEM_TYPE_CQC_TIMER: - break; case HEM_TYPE_SRQC: op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; break; + case HEM_TYPE_SCCC: + case HEM_TYPE_QPC_TIMER: + case HEM_TYPE_CQC_TIMER: + case HEM_TYPE_GMV: + return 0; default: dev_warn(dev, "Table %d not to be destroyed by mailbox!\n", table->type); return 0; } - if (table->type == HEM_TYPE_SCCC || - table->type == HEM_TYPE_QPC_TIMER || - table->type == HEM_TYPE_CQC_TIMER) - return 0; - op += step_idx; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 29c9dd4bcbc66e..65204e7acf5c60 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -44,6 +44,7 @@ #define HNS_ROCE_VF_SMAC_NUM 32 #define HNS_ROCE_VF_SGID_NUM 32 #define HNS_ROCE_VF_SL_NUM 8 +#define HNS_ROCE_VF_GMV_BT_NUM 256 #define HNS_ROCE_V2_MAX_QP_NUM 0x100000 #define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200 @@ -89,6 +90,7 @@ #define HNS_ROCE_V2_SCCC_SZ 32 #define HNS_ROCE_V3_SCCC_SZ 64 +#define HNS_ROCE_V3_GMV_ENTRY_SZ 32 #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE @@ -241,6 +243,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CLR_SCCC = 0x8509, HNS_ROCE_OPC_QUERY_SCCC = 0x850a, HNS_ROCE_OPC_RESET_SCCC = 0x850b, + HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, HNS_SWITCH_PARAMETER_CFG = 0x1033, }; @@ -1334,7 +1337,7 @@ struct hns_roce_pf_res_b { __le32 sgid_idx_num; __le32 qid_idx_sl_num; __le32 sccc_bt_idx_num; - __le32 rsv; + __le32 gmv_idx_num; }; #define PF_RES_DATA_1_PF_SMAC_IDX_S 0 @@ -1361,6 +1364,12 @@ struct hns_roce_pf_res_b { #define PF_RES_DATA_4_PF_SCCC_BT_NUM_S 9 #define PF_RES_DATA_4_PF_SCCC_BT_NUM_M GENMASK(17, 9) +#define PF_RES_DATA_5_PF_GMV_BT_IDX_S 0 +#define PF_RES_DATA_5_PF_GMV_BT_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_5_PF_GMV_BT_NUM_S 8 +#define PF_RES_DATA_5_PF_GMV_BT_NUM_M GENMASK(16, 8) + struct hns_roce_pf_timer_res_a { __le32 rsv0; __le32 qpc_timer_bt_idx_num; @@ -1425,7 +1434,7 @@ struct hns_roce_vf_res_b { __le32 vf_sgid_idx_num; __le32 vf_qid_idx_sl_num; __le32 vf_sccc_idx_num; - __le32 rsv1; + __le32 vf_gmv_idx_num; }; #define VF_RES_B_DATA_0_VF_ID_S 0 @@ -1455,6 +1464,12 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S 9 #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M GENMASK(17, 9) +#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_S 0 +#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_M GENMASK(7, 0) + +#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_S 16 +#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_M GENMASK(24, 16) + struct hns_roce_vf_switch { __le32 rocee_sel; __le32 fun_id; @@ -1577,6 +1592,16 @@ struct hns_roce_cfg_smac_tb { #define CFG_SMAC_TB_VF_SMAC_H_S 0 #define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) +struct hns_roce_cfg_gmv_bt { + __le32 gmv_ba_l; + __le32 gmv_ba_h; + __le32 gmv_bt_idx; + __le32 rsv[3]; +}; + +#define CFG_GMV_BA_H_S 0 +#define CFG_GMV_BA_H_M GENMASK(19, 0) + #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 struct hns_roce_query_pf_caps_a { u8 number_ports; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 6d2c383c4d645f..92d0f29e2d92c3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -651,8 +651,25 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } + if (hr_dev->caps.gmv_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table, + HEM_TYPE_GMV, + hr_dev->caps.gmv_entry_sz, + hr_dev->caps.gmv_entry_num, 1); + if (ret) { + dev_err(dev, + "failed to init gmv table memory, ret = %d\n", + ret); + goto err_unmap_cqc_timer; + } + } + return 0; +err_unmap_cqc_timer: + if (hr_dev->caps.cqc_timer_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); + err_unmap_qpc_timer: if (hr_dev->caps.qpc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table); From 32053e584e4a342be37a0932ffc1f9b13e914515 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Sat, 24 Oct 2020 11:07:16 +0800 Subject: [PATCH 035/147] RDMA/hns: Add support for filling GMV table Add a interface to fill GMV(SGID/SMAC/VLAN) table for HIP09, all of above source address information is stored as an entry in GMV table. The users just need to provide the index to the hardware when POST SEND. Link: https://lore.kernel.org/r/1603508836-33054-3-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 107 +++++++++++++++------ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 31 ++++++ drivers/infiniband/hw/hns/hns_roce_main.c | 9 +- 3 files changed, 116 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c2b35e62844048..83842cdfe906c5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2704,14 +2704,27 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } -static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, - int gid_index, const union ib_gid *gid, - enum hns_roce_sgid_type sgid_type) +static void copy_gid(void *dest, const union ib_gid *gid) +{ +#define GID_SIZE 4 + const union ib_gid *src = gid; + __le32 (*p)[GID_SIZE] = dest; + int i; + + if (!gid) + src = &zgid; + + for (i = 0; i < GID_SIZE; i++) + (*p)[i] = cpu_to_le32(*(u32 *)&src->raw[i * sizeof(u32)]); +} + +static int config_sgid_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type) { struct hns_roce_cmq_desc desc; struct hns_roce_cfg_sgid_tb *sgid_tb = (struct hns_roce_cfg_sgid_tb *)desc.data; - u32 *p; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); @@ -2720,19 +2733,54 @@ static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M, CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); - p = (u32 *)&gid->raw[0]; - sgid_tb->vf_sgid_l = cpu_to_le32(*p); + copy_gid(&sgid_tb->vf_sgid_l, gid); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} - p = (u32 *)&gid->raw[4]; - sgid_tb->vf_sgid_ml = cpu_to_le32(*p); +static int config_gmv_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type, + const struct ib_gid_attr *attr) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_cfg_gmv_tb_a *tb_a = + (struct hns_roce_cfg_gmv_tb_a *)desc[0].data; + struct hns_roce_cfg_gmv_tb_b *tb_b = + (struct hns_roce_cfg_gmv_tb_b *)desc[1].data; - p = (u32 *)&gid->raw[8]; - sgid_tb->vf_sgid_mh = cpu_to_le32(*p); + u16 vlan_id = VLAN_CFI_MASK; + u8 mac[ETH_ALEN] = {}; + int ret; - p = (u32 *)&gid->raw[0xc]; - sgid_tb->vf_sgid_h = cpu_to_le32(*p); + if (gid) { + ret = rdma_read_gid_l2_fields(attr, &vlan_id, mac); + if (ret) + return ret; + } - return hns_roce_cmq_send(hr_dev, &desc, 1); + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_CFG_GMV_TBL, false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_CFG_GMV_TBL, false); + + copy_gid(&tb_a->vf_sgid_l, gid); + + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M, + CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type); + roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S, + vlan_id < VLAN_CFI_MASK); + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M, + CFG_GMV_TB_VF_VLAN_ID_S, vlan_id); + + tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac); + roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M, + CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]); + + roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M, + CFG_GMV_TB_SGID_IDX_S, gid_index); + + return hns_roce_cmq_send(hr_dev, desc, 2); } static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, @@ -2742,23 +2790,24 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; int ret; - if (!gid || !attr) - return -EINVAL; - - if (attr->gid_type == IB_GID_TYPE_ROCE) - sgid_type = GID_TYPE_FLAG_ROCE_V1; - - if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { - if (ipv6_addr_v4mapped((void *)gid)) - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; - else - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; + if (gid) { + if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + if (ipv6_addr_v4mapped((void *)gid)) + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; + else + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; + } else if (attr->gid_type == IB_GID_TYPE_ROCE) { + sgid_type = GID_TYPE_FLAG_ROCE_V1; + } } - ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + ret = config_gmv_table(hr_dev, gid_index, gid, sgid_type, attr); + else + ret = config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (ret) - ibdev_err(&hr_dev->ib_dev, - "failed to configure sgid table, ret = %d!\n", + ibdev_err(&hr_dev->ib_dev, "failed to set gid, ret = %d!\n", ret); return ret; @@ -4499,7 +4548,9 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, IB_GID_TYPE_ROCE_UDP_ENCAP); } - if (vlan_id < VLAN_N_VID) { + /* Only HIP08 needs to set the vlan_en bits in QPC */ + if (vlan_id < VLAN_N_VID && + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 65204e7acf5c60..1409d05a0fc107 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -243,6 +243,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CLR_SCCC = 0x8509, HNS_ROCE_OPC_QUERY_SCCC = 0x850a, HNS_ROCE_OPC_RESET_SCCC = 0x850b, + HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f, HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, HNS_SWITCH_PARAMETER_CFG = 0x1033, }; @@ -1602,6 +1603,36 @@ struct hns_roce_cfg_gmv_bt { #define CFG_GMV_BA_H_S 0 #define CFG_GMV_BA_H_M GENMASK(19, 0) +struct hns_roce_cfg_gmv_tb_a { + __le32 vf_sgid_l; + __le32 vf_sgid_ml; + __le32 vf_sgid_mh; + __le32 vf_sgid_h; + __le32 vf_sgid_type_vlan; + __le32 resv; +}; + +#define CFG_GMV_TB_SGID_IDX_S 0 +#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0) + +#define CFG_GMV_TB_VF_SGID_TYPE_S 0 +#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0) + +#define CFG_GMV_TB_VF_VLAN_EN_S 2 + +#define CFG_GMV_TB_VF_VLAN_ID_S 16 +#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16) + +struct hns_roce_cfg_gmv_tb_b { + __le32 vf_smac_l; + __le32 vf_smac_h; + __le32 table_idx_rsv; + __le32 resv[3]; +}; + +#define CFG_GMV_TB_SMAC_H_S 0 +#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0) + #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 struct hns_roce_query_pf_caps_a { u8 number_ports; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 92d0f29e2d92c3..97fdc55e992a92 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -61,7 +62,10 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; - u32 i = 0; + u32 i; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN)) return 0; @@ -90,14 +94,13 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); - struct ib_gid_attr zattr = {}; u8 port = attr->port_num - 1; int ret; if (port >= hr_dev->caps.num_ports) return -EINVAL; - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr); + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, NULL, NULL); return ret; } From 45808361d4491217de11cdf0661d657081f8f422 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 7 Oct 2020 19:36:26 -0700 Subject: [PATCH 036/147] RDMA: Manual changes for sysfs_emit and neatening Make changes to use sysfs_emit in the RDMA code as cocci scripts can not be written to handle _all_ the possible variants of various sprintf family uses in sysfs show functions. While there, make the code more legible and update its style to be more like the typical kernel styles. Miscellanea: o Use intermediate pointers for dereferences o Add and use string lookup functions o return early when any intermediate call fails so normal return is at the bottom of the function o mlx4/mcg.c:sysfs_show_group: use scnprintf to format intermediate strings Link: https://lore.kernel.org/r/f5c9e4c9d8dafca1b7b70bd597ee7f8f219c31c8.1602122880.git.joe@perches.com Signed-off-by: Joe Perches Acked-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sysfs.c | 65 +++++++++------- drivers/infiniband/hw/hfi1/sysfs.c | 38 ++++----- drivers/infiniband/hw/mlx4/main.c | 2 + drivers/infiniband/hw/mlx4/mcg.c | 82 +++++++++++--------- drivers/infiniband/hw/mlx4/sysfs.c | 44 +++++------ drivers/infiniband/hw/mthca/mthca_provider.c | 29 ++++--- drivers/infiniband/hw/qib/qib_sysfs.c | 48 ++++++------ drivers/infiniband/hw/usnic/usnic_ib_sysfs.c | 56 ++++++------- drivers/infiniband/ulp/srp/ib_srp.c | 4 + 9 files changed, 186 insertions(+), 182 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 4e335fc4d016ee..4dd803f23aaa80 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1224,29 +1224,34 @@ static int add_port(struct ib_core_device *coredev, int port_num) return ret; } -static ssize_t node_type_show(struct device *device, - struct device_attribute *attr, char *buf) +static const char *node_type_string(int node_type) { - struct ib_device *dev = rdma_device_to_ibdev(device); - - switch (dev->node_type) { + switch (node_type) { case RDMA_NODE_IB_CA: - return sysfs_emit(buf, "%d: CA\n", dev->node_type); + return "CA"; + case RDMA_NODE_IB_SWITCH: + return "switch"; + case RDMA_NODE_IB_ROUTER: + return "router"; case RDMA_NODE_RNIC: - return sysfs_emit(buf, "%d: RNIC\n", dev->node_type); + return "RNIC"; case RDMA_NODE_USNIC: - return sysfs_emit(buf, "%d: usNIC\n", dev->node_type); + return "usNIC"; case RDMA_NODE_USNIC_UDP: - return sysfs_emit(buf, "%d: usNIC UDP\n", dev->node_type); + return "usNIC UDP"; case RDMA_NODE_UNSPECIFIED: - return sysfs_emit(buf, "%d: unspecified\n", dev->node_type); - case RDMA_NODE_IB_SWITCH: - return sysfs_emit(buf, "%d: switch\n", dev->node_type); - case RDMA_NODE_IB_ROUTER: - return sysfs_emit(buf, "%d: router\n", dev->node_type); - default: - return sysfs_emit(buf, "%d: \n", dev->node_type); + return "unspecified"; } + return ""; +} + +static ssize_t node_type_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct ib_device *dev = rdma_device_to_ibdev(device); + + return sysfs_emit(buf, "%d: %s\n", dev->node_type, + node_type_string(dev->node_type)); } static DEVICE_ATTR_RO(node_type); @@ -1254,13 +1259,13 @@ static ssize_t sys_image_guid_show(struct device *device, struct device_attribute *dev_attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + __be16 *guid = (__be16 *)&dev->attrs.sys_image_guid; - return sysfs_emit( - buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *)&dev->attrs.sys_image_guid)[0]), - be16_to_cpu(((__be16 *)&dev->attrs.sys_image_guid)[1]), - be16_to_cpu(((__be16 *)&dev->attrs.sys_image_guid)[2]), - be16_to_cpu(((__be16 *)&dev->attrs.sys_image_guid)[3])); + return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", + be16_to_cpu(guid[0]), + be16_to_cpu(guid[1]), + be16_to_cpu(guid[2]), + be16_to_cpu(guid[3])); } static DEVICE_ATTR_RO(sys_image_guid); @@ -1268,12 +1273,13 @@ static ssize_t node_guid_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + __be16 *node_guid = (__be16 *)&dev->node_guid; return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *)&dev->node_guid)[0]), - be16_to_cpu(((__be16 *)&dev->node_guid)[1]), - be16_to_cpu(((__be16 *)&dev->node_guid)[2]), - be16_to_cpu(((__be16 *)&dev->node_guid)[3])); + be16_to_cpu(node_guid[0]), + be16_to_cpu(node_guid[1]), + be16_to_cpu(node_guid[2]), + be16_to_cpu(node_guid[3])); } static DEVICE_ATTR_RO(node_guid); @@ -1309,10 +1315,11 @@ static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + char version[IB_FW_VERSION_NAME_MAX] = {}; + + ib_get_device_fw_str(dev, version); - ib_get_device_fw_str(dev, buf); - strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); - return strlen(buf); + return sysfs_emit(buf, "%s\n", version); } static DEVICE_ATTR_RO(fw_ver); diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index e2a88f3cea7fcb..9a695e1571c041 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -510,13 +510,11 @@ static ssize_t board_id_show(struct device *device, struct hfi1_ibdev *dev = rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); - int ret; if (!dd->boardname) - ret = -EINVAL; - else - ret = sysfs_emit(buf, "%s\n", dd->boardname); - return ret; + return -EINVAL; + + return sysfs_emit(buf, "%s\n", dd->boardname); } static DEVICE_ATTR_RO(board_id); @@ -570,6 +568,7 @@ static ssize_t serial_show(struct device *device, rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); + /* dd->serial is already newline terminated in chip.c */ return sysfs_emit(buf, "%s", dd->serial); } static DEVICE_ATTR_RO(serial); @@ -598,9 +597,8 @@ static DEVICE_ATTR_WO(chip_reset); * Convert the reported temperature from an integer (reported in * units of 0.25C) to a floating point number. */ -#define temp2str(temp, buf, size, idx) \ - scnprintf((buf) + (idx), (size) - (idx), "%u.%02u ", \ - ((temp) >> 2), ((temp) & 0x3) * 25) +#define temp_d(t) ((t) >> 2) +#define temp_f(t) (((t)&0x3) * 25u) /* * Dump tempsense values, in decimal, to ease shell-scripts. @@ -615,19 +613,17 @@ static ssize_t tempsense_show(struct device *device, int ret; ret = hfi1_tempsense_rd(dd, &temp); - if (!ret) { - int idx = 0; - - idx += temp2str(temp.curr, buf, PAGE_SIZE, idx); - idx += temp2str(temp.lo_lim, buf, PAGE_SIZE, idx); - idx += temp2str(temp.hi_lim, buf, PAGE_SIZE, idx); - idx += temp2str(temp.crit_lim, buf, PAGE_SIZE, idx); - idx += scnprintf(buf + idx, PAGE_SIZE - idx, - "%u %u %u\n", temp.triggers & 0x1, - temp.triggers & 0x2, temp.triggers & 0x4); - ret = idx; - } - return ret; + if (ret) + return ret; + + return sysfs_emit(buf, "%u.%02u %u.%02u %u.%02u %u.%02u %u %u %u\n", + temp_d(temp.curr), temp_f(temp.curr), + temp_d(temp.lo_lim), temp_f(temp.lo_lim), + temp_d(temp.hi_lim), temp_f(temp.hi_lim), + temp_d(temp.crit_lim), temp_f(temp.crit_lim), + temp.triggers & 0x1, + temp.triggers & 0x2, + temp.triggers & 0x4); } static DEVICE_ATTR_RO(tempsense); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2ac932834f9891..f0864f40ea1ac0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2024,6 +2024,7 @@ static ssize_t hca_type_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); + return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2033,6 +2034,7 @@ static ssize_t hw_rev_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); + return sysfs_emit(buf, "%x\n", dev->dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 5e4ec9786081cf..33f525b744f286 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -988,53 +988,63 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, } static ssize_t sysfs_show_group(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { struct mcast_group *group = container_of(attr, struct mcast_group, dentry); struct mcast_req *req = NULL; - char pending_str[40]; char state_str[40]; - ssize_t len = 0; - int f; + char pending_str[40]; + int len; + int i; + u32 hoplimit; if (group->state == MCAST_IDLE) - sprintf(state_str, "%s", get_state_string(group->state)); + scnprintf(state_str, sizeof(state_str), "%s", + get_state_string(group->state)); else - sprintf(state_str, "%s(TID=0x%llx)", - get_state_string(group->state), - be64_to_cpu(group->last_req_tid)); + scnprintf(state_str, sizeof(state_str), "%s(TID=0x%llx)", + get_state_string(group->state), + be64_to_cpu(group->last_req_tid)); + if (list_empty(&group->pending_list)) { - sprintf(pending_str, "No"); + scnprintf(pending_str, sizeof(pending_str), "No"); } else { - req = list_first_entry(&group->pending_list, struct mcast_req, group_list); - sprintf(pending_str, "Yes(TID=0x%llx)", - be64_to_cpu(req->sa_mad.mad_hdr.tid)); + req = list_first_entry(&group->pending_list, struct mcast_req, + group_list); + scnprintf(pending_str, sizeof(pending_str), "Yes(TID=0x%llx)", + be64_to_cpu(req->sa_mad.mad_hdr.tid)); } - len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ", - group->rec.scope_join_state & 0xf, - group->members[2], group->members[1], group->members[0], - atomic_read(&group->refcount), - pending_str, - state_str); - for (f = 0; f < MAX_VFS; ++f) - if (group->func[f].state == MCAST_MEMBER) - len += sprintf(buf + len, "%d[%1x] ", - f, group->func[f].join_state); - - len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x " - "%4x %4x %2x %2x)\n", - be16_to_cpu(group->rec.pkey), - be32_to_cpu(group->rec.qkey), - (group->rec.mtusel_mtu & 0xc0) >> 6, - group->rec.mtusel_mtu & 0x3f, - group->rec.tclass, - (group->rec.ratesel_rate & 0xc0) >> 6, - group->rec.ratesel_rate & 0x3f, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8, - be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff, - group->rec.proxy_join); + + len = sysfs_emit(buf, "%1d [%02d,%02d,%02d] %4d %4s %5s ", + group->rec.scope_join_state & 0xf, + group->members[2], + group->members[1], + group->members[0], + atomic_read(&group->refcount), + pending_str, + state_str); + + for (i = 0; i < MAX_VFS; i++) { + if (group->func[i].state == MCAST_MEMBER) + len += sysfs_emit_at(buf, len, "%d[%1x] ", i, + group->func[i].join_state); + } + + hoplimit = be32_to_cpu(group->rec.sl_flowlabel_hoplimit); + len += sysfs_emit_at(buf, len, + "\t\t(%4hx %4x %2x %2x %2x %2x %2x %4x %4x %2x %2x)\n", + be16_to_cpu(group->rec.pkey), + be32_to_cpu(group->rec.qkey), + (group->rec.mtusel_mtu & 0xc0) >> 6, + (group->rec.mtusel_mtu & 0x3f), + group->rec.tclass, + (group->rec.ratesel_rate & 0xc0) >> 6, + (group->rec.ratesel_rate & 0x3f), + (hoplimit & 0xf0000000) >> 28, + (hoplimit & 0x0fffff00) >> 8, + (hoplimit & 0x000000ff), + group->rec.proxy_join); return len; } diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 22ae363088ed65..0279b780c4461e 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -117,22 +117,24 @@ static ssize_t show_port_gid(struct device *dev, struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_dev *mdev = port->dev; union ib_gid gid; - ssize_t ret; + int ret; + __be16 *raw; ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num, mlx4_ib_iov_dentry->entry_num, &gid, 1); if (ret) return ret; - ret = sysfs_emit(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *)gid.raw)[0]), - be16_to_cpu(((__be16 *)gid.raw)[1]), - be16_to_cpu(((__be16 *)gid.raw)[2]), - be16_to_cpu(((__be16 *)gid.raw)[3]), - be16_to_cpu(((__be16 *)gid.raw)[4]), - be16_to_cpu(((__be16 *)gid.raw)[5]), - be16_to_cpu(((__be16 *)gid.raw)[6]), - be16_to_cpu(((__be16 *)gid.raw)[7])); - return ret; + + raw = (__be16 *)gid.raw; + return sysfs_emit(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + be16_to_cpu(raw[0]), + be16_to_cpu(raw[1]), + be16_to_cpu(raw[2]), + be16_to_cpu(raw[3]), + be16_to_cpu(raw[4]), + be16_to_cpu(raw[5]), + be16_to_cpu(raw[6]), + be16_to_cpu(raw[7])); } static ssize_t show_phys_port_pkey(struct device *dev, @@ -542,14 +544,10 @@ static ssize_t sysfs_show_smi_enabled(struct device *dev, { struct mlx4_port *p = container_of(attr, struct mlx4_port, smi_enabled); - ssize_t len = 0; - if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) - len = sysfs_emit(buf, "%d\n", 1); - else - len = sysfs_emit(buf, "%d\n", 0); - - return len; + return sysfs_emit(buf, "%d\n", + !!mlx4_vf_smi_enabled(p->dev->dev, p->slave, + p->port_num)); } static ssize_t sysfs_show_enable_smi_admin(struct device *dev, @@ -558,14 +556,10 @@ static ssize_t sysfs_show_enable_smi_admin(struct device *dev, { struct mlx4_port *p = container_of(attr, struct mlx4_port, enable_smi_admin); - ssize_t len = 0; - - if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) - len = sysfs_emit(buf, "%d\n", 1); - else - len = sysfs_emit(buf, "%d\n", 0); - return len; + return sysfs_emit(buf, "%d\n", + !!mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, + p->port_num)); } static ssize_t sysfs_store_enable_smi_admin(struct device *dev, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 12179a41456dd7..1a3dd07f993b3b 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -965,25 +965,30 @@ static ssize_t hw_rev_show(struct device *device, } static DEVICE_ATTR_RO(hw_rev); -static ssize_t hca_type_show(struct device *device, - struct device_attribute *attr, char *buf) +static const char *hca_type_string(int hca_type) { - struct mthca_dev *dev = - rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - - switch (dev->pdev->device) { + switch (hca_type) { case PCI_DEVICE_ID_MELLANOX_TAVOR: - return sysfs_emit(buf, "MT23108\n"); + return "MT23108"; case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT: - return sysfs_emit(buf, "MT25208 (MT23108 compat mode)\n"); + return "MT25208 (MT23108 compat mode)"; case PCI_DEVICE_ID_MELLANOX_ARBEL: - return sysfs_emit(buf, "MT25208\n"); + return "MT25208"; case PCI_DEVICE_ID_MELLANOX_SINAI: case PCI_DEVICE_ID_MELLANOX_SINAI_OLD: - return sysfs_emit(buf, "MT25204\n"); - default: - return sysfs_emit(buf, "unknown\n"); + return "MT25204"; } + + return "unknown"; +} + +static ssize_t hca_type_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mthca_dev *dev = + rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); + + return sysfs_emit(buf, "%s\n", hca_type_string(dev->pdev->device)); } static DEVICE_ATTR_RO(hca_type); diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 818282bc7c5bcb..3761f21cef97b3 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -575,13 +575,10 @@ static ssize_t hca_type_show(struct device *device, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); - int ret; if (!dd->boardname) - ret = -EINVAL; - else - ret = sysfs_emit(buf, "%s\n", dd->boardname); - return ret; + return -EINVAL; + return sysfs_emit(buf, "%s\n", dd->boardname); } static DEVICE_ATTR_RO(hca_type); static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL); @@ -647,17 +644,16 @@ static ssize_t nfreectxts_show(struct device *device, } static DEVICE_ATTR_RO(nfreectxts); -static ssize_t serial_show(struct device *device, - struct device_attribute *attr, char *buf) +static ssize_t serial_show(struct device *device, struct device_attribute *attr, + char *buf) { struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); + const u8 *end = memchr(dd->serial, 0, ARRAY_SIZE(dd->serial)); + int size = end ? end - dd->serial : ARRAY_SIZE(dd->serial); - buf[sizeof(dd->serial)] = '\0'; - memcpy(buf, dd->serial, sizeof(dd->serial)); - strcat(buf, "\n"); - return strlen(buf); + return sysfs_emit(buf, ".%*s\n", size, dd->serial); } static DEVICE_ATTR_RO(serial); @@ -690,26 +686,26 @@ static ssize_t tempsense_show(struct device *device, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); - int ret; - int idx; + int i; u8 regvals[8]; - ret = -ENXIO; - for (idx = 0; idx < 8; ++idx) { - if (idx == 6) + for (i = 0; i < 8; i++) { + int ret; + + if (i == 6) continue; - ret = dd->f_tempsense_rd(dd, idx); + ret = dd->f_tempsense_rd(dd, i); if (ret < 0) - break; - regvals[idx] = ret; + return ret; /* return error on bad read */ + regvals[i] = ret; } - if (idx == 8) - ret = sysfs_emit(buf, "%d %d %02X %02X %d %d\n", - *(signed char *)(regvals), - *(signed char *)(regvals + 1), regvals[2], - regvals[3], *(signed char *)(regvals + 5), - *(signed char *)(regvals + 7)); - return ret; + return sysfs_emit(buf, "%d %d %02X %02X %d %d\n", + (signed char)regvals[0], + (signed char)regvals[1], + regvals[2], + regvals[3], + (signed char)regvals[5], + (signed char)regvals[7]); } static DEVICE_ATTR_RO(tempsense); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 0af72fca00ff45..250948c382f9a6 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -57,7 +57,7 @@ static ssize_t board_id_show(struct device *device, subsystem_device_id = us_ibdev->pdev->subsystem_device; mutex_unlock(&us_ibdev->usdev_lock); - return sysfs_emit(buf, "%hu\n", subsystem_device_id); + return sysfs_emit(buf, "%u\n", subsystem_device_id); } static DEVICE_ATTR_RO(board_id); @@ -69,19 +69,13 @@ config_show(struct device *device, struct device_attribute *attr, char *buf) { struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - char *ptr; - unsigned left; - unsigned n; enum usnic_vnic_res_type res_type; - - /* Buffer space limit is 1 page */ - ptr = buf; - left = PAGE_SIZE; + int len; mutex_lock(&us_ibdev->usdev_lock); if (kref_read(&us_ibdev->vf_cnt) > 0) { char *busname; - + char *sep = ""; /* * bus name seems to come with annoying prefix. * Remove it if it is predictable @@ -90,39 +84,35 @@ config_show(struct device *device, struct device_attribute *attr, char *buf) if (strncmp(busname, "PCI Bus ", 8) == 0) busname += 8; - n = scnprintf(ptr, left, - "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:", - dev_name(&us_ibdev->ib_dev.dev), - busname, - PCI_SLOT(us_ibdev->pdev->devfn), - PCI_FUNC(us_ibdev->pdev->devfn), - netdev_name(us_ibdev->netdev), - us_ibdev->ufdev->mac, - kref_read(&us_ibdev->vf_cnt)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "%s: %s:%d.%d, %s, %pM, %u VFs\n", + dev_name(&us_ibdev->ib_dev.dev), + busname, + PCI_SLOT(us_ibdev->pdev->devfn), + PCI_FUNC(us_ibdev->pdev->devfn), + netdev_name(us_ibdev->netdev), + us_ibdev->ufdev->mac, + kref_read(&us_ibdev->vf_cnt)); + len += sysfs_emit_at(buf, len, " Per VF:"); for (res_type = USNIC_VNIC_RES_TYPE_EOL; - res_type < USNIC_VNIC_RES_TYPE_MAX; - res_type++) { + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { if (us_ibdev->vf_res_cnt[res_type] == 0) continue; - n = scnprintf(ptr, left, " %d %s%s", - us_ibdev->vf_res_cnt[res_type], - usnic_vnic_res_type_to_str(res_type), - (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ? - "," : ""); - UPDATE_PTR_LEFT(n, ptr, left); + len += sysfs_emit_at(buf, len, "%s %d %s", + sep, + us_ibdev->vf_res_cnt[res_type], + usnic_vnic_res_type_to_str(res_type)); + sep = ","; } - n = scnprintf(ptr, left, "\n"); - UPDATE_PTR_LEFT(n, ptr, left); + len += sysfs_emit_at(buf, len, "\n"); } else { - n = scnprintf(ptr, left, "%s: no VFs\n", - dev_name(&us_ibdev->ib_dev.dev)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "%s: no VFs\n", + dev_name(&us_ibdev->ib_dev.dev)); } + mutex_unlock(&us_ibdev->usdev_lock); - return ptr - buf; + return len; } static DEVICE_ATTR_RO(config); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index f686d1d6e91cfb..ee22ea44a40a15 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2925,6 +2925,7 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, if (target->using_rdma_cm) return -ENOENT; + return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); } @@ -2944,6 +2945,7 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, if (target->using_rdma_cm) return -ENOENT; + return sysfs_emit(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); } @@ -2954,6 +2956,7 @@ static ssize_t show_orig_dgid(struct device *dev, if (target->using_rdma_cm) return -ENOENT; + return sysfs_emit(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); } @@ -2968,6 +2971,7 @@ static ssize_t show_req_lim(struct device *dev, ch = &target->ch[i]; req_lim = min(req_lim, ch->req_lim); } + return sysfs_emit(buf, "%d\n", req_lim); } From e28bf1f03b01b135dc0052b3a195c2860e10f216 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 7 Oct 2020 19:36:27 -0700 Subject: [PATCH 037/147] RDMA: Convert various random sprintf sysfs _show uses to sysfs_emit Manual changes for sysfs_emit as cocci scripts can't easily convert them. Link: https://lore.kernel.org/r/ecde7791467cddb570c6f6d2c908ffbab9145cac.1602122880.git.joe@perches.com Signed-off-by: Joe Perches Reviewed-by: Jason Gunthorpe Acked-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 4 +- drivers/infiniband/core/cma_configfs.c | 4 +- drivers/infiniband/core/sysfs.c | 98 +++++++++++--------- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/hw/hfi1/sysfs.c | 10 +- drivers/infiniband/hw/mlx4/sysfs.c | 18 ++-- drivers/infiniband/hw/qib/qib_sysfs.c | 30 +++--- drivers/infiniband/hw/usnic/usnic_ib_sysfs.c | 31 +++---- drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c | 18 ++-- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 9 +- drivers/infiniband/ulp/srp/ib_srp.c | 4 +- drivers/infiniband/ulp/srpt/ib_srpt.c | 14 +-- 12 files changed, 118 insertions(+), 124 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 5740d1ba356870..0201364974594f 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4271,8 +4271,8 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, group = container_of(obj, struct cm_counter_group, obj); cm_attr = container_of(attr, struct cm_counter_attribute, attr); - return sprintf(buf, "%ld\n", - atomic_long_read(&group->counter[cm_attr->index])); + return sysfs_emit(buf, "%ld\n", + atomic_long_read(&group->counter[cm_attr->index])); } static const struct sysfs_ops cm_counter_ops = { diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7ec4af2ed87abb..7f70e5a7de1054 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -115,7 +115,7 @@ static ssize_t default_roce_mode_show(struct config_item *item, if (gid_type < 0) return gid_type; - return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type)); + return sysfs_emit(buf, "%s\n", ib_cache_gid_type_str(gid_type)); } static ssize_t default_roce_mode_store(struct config_item *item, @@ -157,7 +157,7 @@ static ssize_t default_roce_tos_show(struct config_item *item, char *buf) tos = cma_get_default_roce_tos(cma_dev, group->port_num); cma_configfs_params_put(cma_dev); - return sprintf(buf, "%u\n", tos); + return sysfs_emit(buf, "%u\n", tos); } static ssize_t default_roce_tos_store(struct config_item *item, diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 4dd803f23aaa80..88460d76a84afd 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -165,9 +165,11 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d: %s\n", attr.state, - attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ? - state_name[attr.state] : "UNKNOWN"); + return sysfs_emit(buf, "%d: %s\n", attr.state, + attr.state >= 0 && + attr.state < ARRAY_SIZE(state_name) ? + state_name[attr.state] : + "UNKNOWN"); } static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, @@ -180,7 +182,7 @@ static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%x\n", attr.lid); + return sysfs_emit(buf, "0x%x\n", attr.lid); } static ssize_t lid_mask_count_show(struct ib_port *p, @@ -194,7 +196,7 @@ static ssize_t lid_mask_count_show(struct ib_port *p, if (ret) return ret; - return sprintf(buf, "%d\n", attr.lmc); + return sysfs_emit(buf, "%d\n", attr.lmc); } static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, @@ -207,7 +209,7 @@ static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%x\n", attr.sm_lid); + return sysfs_emit(buf, "0x%x\n", attr.sm_lid); } static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, @@ -220,7 +222,7 @@ static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d\n", attr.sm_sl); + return sysfs_emit(buf, "%d\n", attr.sm_sl); } static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, @@ -233,7 +235,7 @@ static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%08x\n", attr.port_cap_flags); + return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags); } static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, @@ -284,9 +286,9 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, if (rate < 0) return -EINVAL; - return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", - rate / 10, rate % 10 ? ".5" : "", - ib_width_enum_to_int(attr.active_width), speed); + return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10, + rate % 10 ? ".5" : "", + ib_width_enum_to_int(attr.active_width), speed); } static const char *phys_state_to_str(enum ib_port_phys_state phys_state) @@ -318,21 +320,28 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d: %s\n", attr.phys_state, - phys_state_to_str(attr.phys_state)); + return sysfs_emit(buf, "%d: %s\n", attr.phys_state, + phys_state_to_str(attr.phys_state)); } static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, char *buf) { + const char *output; + switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) { case IB_LINK_LAYER_INFINIBAND: - return sprintf(buf, "%s\n", "InfiniBand"); + output = "InfiniBand"; + break; case IB_LINK_LAYER_ETHERNET: - return sprintf(buf, "%s\n", "Ethernet"); + output = "Ethernet"; + break; default: - return sprintf(buf, "%s\n", "Unknown"); + output = "Unknown"; + break; } + + return sysfs_emit(buf, "%s\n", output); } static PORT_ATTR_RO(state); @@ -358,27 +367,28 @@ static struct attribute *port_default_attrs[] = { NULL }; -static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) +static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { struct net_device *ndev; - size_t ret = -EINVAL; + int ret = -EINVAL; rcu_read_lock(); ndev = rcu_dereference(gid_attr->ndev); if (ndev) - ret = sprintf(buf, "%s\n", ndev->name); + ret = sysfs_emit(buf, "%s\n", ndev->name); rcu_read_unlock(); return ret; } -static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) +static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) { - return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); + return sysfs_emit(buf, "%s\n", + ib_cache_gid_type_str(gid_attr->gid_type)); } static ssize_t _show_port_gid_attr( struct ib_port *p, struct port_attribute *attr, char *buf, - size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) + ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); @@ -401,7 +411,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); const struct ib_gid_attr *gid_attr; - ssize_t ret; + int len; gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); if (IS_ERR(gid_attr)) { @@ -416,12 +426,12 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, * space throwing such error on fail to read gid, return zero * GID as before. This maintains backward compatibility. */ - return sprintf(buf, "%pI6\n", zgid.raw); + return sysfs_emit(buf, "%pI6\n", zgid.raw); } - ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw); + len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw); rdma_put_gid_attr(gid_attr); - return ret; + return len; } static ssize_t show_port_gid_attr_ndev(struct ib_port *p, @@ -443,13 +453,13 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); u16 pkey; - ssize_t ret; + int ret; ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey); if (ret) return ret; - return sprintf(buf, "0x%04x\n", pkey); + return sysfs_emit(buf, "0x%04x\n", pkey); } #define PORT_PMA_ATTR(_name, _counter, _width, _offset) \ @@ -521,8 +531,9 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, container_of(attr, struct port_table_attribute, attr); int offset = tab_attr->index & 0xffff; int width = (tab_attr->index >> 16) & 0xff; - ssize_t ret; + int ret; u8 data[8]; + int len; ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data, 40 + offset / 8, sizeof(data)); @@ -531,30 +542,27 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, switch (width) { case 4: - ret = sprintf(buf, "%u\n", (*data >> - (4 - (offset % 8))) & 0xf); + len = sysfs_emit(buf, "%u\n", + (*data >> (4 - (offset % 8))) & 0xf); break; case 8: - ret = sprintf(buf, "%u\n", *data); + len = sysfs_emit(buf, "%u\n", *data); break; case 16: - ret = sprintf(buf, "%u\n", - be16_to_cpup((__be16 *)data)); + len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data)); break; case 32: - ret = sprintf(buf, "%u\n", - be32_to_cpup((__be32 *)data)); + len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data)); break; case 64: - ret = sprintf(buf, "%llu\n", - be64_to_cpup((__be64 *)data)); + len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data)); break; - default: - ret = 0; + len = 0; + break; } - return ret; + return len; } static PORT_PMA_ATTR(symbol_error , 0, 16, 32); @@ -815,12 +823,12 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, return 0; } -static ssize_t print_hw_stat(struct ib_device *dev, int port_num, - struct rdma_hw_stats *stats, int index, char *buf) +static int print_hw_stat(struct ib_device *dev, int port_num, + struct rdma_hw_stats *stats, int index, char *buf) { u64 v = rdma_counter_get_hwstat_value(dev, port_num, index); - return sprintf(buf, "%llu\n", stats->value[index] + v); + return sysfs_emit(buf, "%llu\n", stats->value[index] + v); } static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, @@ -877,7 +885,7 @@ static ssize_t show_stats_lifespan(struct kobject *kobj, msecs = jiffies_to_msecs(stats->lifespan); mutex_unlock(&stats->lock); - return sprintf(buf, "%d\n", msecs); + return sysfs_emit(buf, "%d\n", msecs); } static ssize_t set_stats_lifespan(struct kobject *kobj, diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 7e759f5b2a75c1..19104a6756915b 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1222,7 +1222,7 @@ static char *umad_devnode(struct device *dev, umode_t *mode) static ssize_t abi_version_show(struct class *class, struct class_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); + return sysfs_emit(buf, "%d\n", IB_USER_MAD_ABI_VERSION); } static CLASS_ATTR_RO(abi_version); diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 9a695e1571c041..5650130e68d439 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -151,7 +151,7 @@ struct hfi1_port_attr { static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf) { - return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); + return sysfs_emit(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); } static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf, @@ -296,7 +296,7 @@ static ssize_t sc2vl_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, sc2vl_kobj); struct hfi1_devdata *dd = ppd->dd; - return sprintf(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); + return sysfs_emit(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); } static const struct sysfs_ops hfi1_sc2vl_ops = { @@ -401,7 +401,7 @@ static ssize_t sl2sc_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, sl2sc_kobj); struct hfi1_ibport *ibp = &ppd->ibport_data; - return sprintf(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); + return sysfs_emit(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); } static const struct sysfs_ops hfi1_sl2sc_ops = { @@ -475,7 +475,7 @@ static ssize_t vl2mtu_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, vl2mtu_kobj); struct hfi1_devdata *dd = ppd->dd; - return sprintf(buf, "%u\n", dd->vld[vlattr->vl].mtu); + return sysfs_emit(buf, "%u\n", dd->vld[vlattr->vl].mtu); } static const struct sysfs_ops hfi1_vl2mtu_ops = { @@ -813,7 +813,7 @@ static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf) if (vl < 0) return vl; - return snprintf(buf, PAGE_SIZE, "%d\n", vl); + return sysfs_emit(buf, "%d\n", vl); } static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO, diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 0279b780c4461e..1b5891130aaba1 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -443,16 +443,12 @@ static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr, { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - ssize_t ret = -ENODEV; - - if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >= - (p->dev->dev->caps.pkey_table_len[p->port_num])) - ret = sprintf(buf, "none\n"); - else - ret = sprintf(buf, "%d\n", - p->dev->pkeys.virt2phys_pkey[p->slave] - [p->port_num - 1][tab_attr->index]); - return ret; + struct pkey_mgt *m = &p->dev->pkeys; + u8 key = m->virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index]; + + if (key >= p->dev->dev->caps.pkey_table_len[p->port_num]) + return sysfs_emit(buf, "none\n"); + return sysfs_emit(buf, "%d\n", key); } static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, @@ -490,7 +486,7 @@ static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, static ssize_t show_port_gid_idx(struct mlx4_port *p, struct port_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", p->slave); + return sysfs_emit(buf, "%d\n", p->slave); } static struct attribute ** diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 3761f21cef97b3..62c179fc764ba4 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -43,11 +43,8 @@ static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf) { struct qib_devdata *dd = ppd->dd; - int ret; - ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT); - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; + return sysfs_emit(buf, "%d\n", dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT)); } static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, @@ -106,14 +103,10 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf, static ssize_t show_status(struct qib_pportdata *ppd, char *buf) { - ssize_t ret; - if (!ppd->statusp) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", - (unsigned long long) *(ppd->statusp)); - return ret; + return -EINVAL; + + return sysfs_emit(buf, "0x%llx\n", (unsigned long long)*(ppd->statusp)); } /* @@ -392,7 +385,7 @@ static ssize_t sl2vl_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct qib_pportdata, sl2vl_kobj); struct qib_ibport *qibp = &ppd->ibport_data; - return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); + return sysfs_emit(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); } static const struct sysfs_ops qib_sl2vl_ops = { @@ -501,17 +494,18 @@ static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, diagc_kobj); struct qib_ibport *qibp = &ppd->ibport_data; + u64 val; if (!strncmp(dattr->attr.name, "rc_acks", 7)) - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks)); + val = READ_PER_CPU_CNTR(rc_acks); else if (!strncmp(dattr->attr.name, "rc_qacks", 8)) - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks)); + val = READ_PER_CPU_CNTR(rc_qacks); else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15)) - return sprintf(buf, "%llu\n", - READ_PER_CPU_CNTR(rc_delayed_comp)); + val = READ_PER_CPU_CNTR(rc_delayed_comp); else - return sprintf(buf, "%u\n", - *(u32 *)((char *)qibp + dattr->counter)); + val = *(u32 *)((char *)qibp + dattr->counter); + + return sysfs_emit(buf, "%llu\n", val); } static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 250948c382f9a6..e59615a4c9d98e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -204,43 +204,36 @@ struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME) static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf) { - return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx); + return sysfs_emit(buf, "0x%p\n", qp_grp->ctx); } static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) { - int i, j, n; - int left; - char *ptr; + int i, j; struct usnic_vnic_res_chunk *res_chunk; struct usnic_vnic_res *vnic_res; + int len; - left = PAGE_SIZE; - ptr = buf; - - n = scnprintf(ptr, left, - "QPN: %d State: (%s) PID: %u VF Idx: %hu ", - qp_grp->ibqp.qp_num, - usnic_ib_qp_grp_state_to_string(qp_grp->state), - qp_grp->owner_pid, - usnic_vnic_get_index(qp_grp->vf->vnic)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "QPN: %d State: (%s) PID: %u VF Idx: %hu ", + qp_grp->ibqp.qp_num, + usnic_ib_qp_grp_state_to_string(qp_grp->state), + qp_grp->owner_pid, + usnic_vnic_get_index(qp_grp->vf->vnic)); for (i = 0; qp_grp->res_chunk_list[i]; i++) { res_chunk = qp_grp->res_chunk_list[i]; for (j = 0; j < res_chunk->cnt; j++) { vnic_res = res_chunk->res[j]; - n = scnprintf(ptr, left, "%s[%d] ", + len += sysfs_emit_at( + buf, len, "%s[%d] ", usnic_vnic_res_type_to_str(vnic_res->type), vnic_res->vnic_idx); - UPDATE_PTR_LEFT(n, ptr, left); } } - n = scnprintf(ptr, left, "\n"); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit_at(buf, len, "\n"); - return ptr - buf; + return len; } static QPN_ATTR_RO(context); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index f80898cc7b64c7..ba00f0de14caa6 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -347,12 +347,13 @@ static ssize_t rtrs_clt_src_addr_show(struct kobject *kobj, char *page) { struct rtrs_clt_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_clt_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_clt_src_addr_attr = @@ -363,12 +364,13 @@ static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj, char *page) { struct rtrs_clt_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_clt_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_clt_dst_addr_attr = diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index b7c1b37ed9a8f6..d2edff3b8f0df0 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -111,12 +111,13 @@ static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj, char *page) { struct rtrs_srv_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_srv_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_srv_dst_addr_attr = diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index ee22ea44a40a15..5492b66a815321 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -169,9 +169,9 @@ static int srp_tmo_get(char *buffer, const struct kernel_param *kp) int tmo = *(int *)kp->arg; if (tmo >= 0) - return sprintf(buffer, "%d\n", tmo); + return sysfs_emit(buffer, "%d\n", tmo); else - return sprintf(buffer, "off\n"); + return sysfs_emit(buffer, "off\n"); } static int srp_tmo_set(const char *val, const struct kernel_param *kp) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 0065eb17ae36b4..6017d525084a0c 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3445,7 +3445,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rdma_size); } static ssize_t srpt_tpg_attrib_srp_max_rdma_size_store(struct config_item *item, @@ -3482,7 +3482,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rsp_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rsp_size); } static ssize_t srpt_tpg_attrib_srp_max_rsp_size_store(struct config_item *item, @@ -3519,7 +3519,7 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_sq_size); } static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item, @@ -3556,7 +3556,7 @@ static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%d\n", sport->port_attrib.use_srq); + return sysfs_emit(page, "%d\n", sport->port_attrib.use_srq); } static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item, @@ -3646,7 +3646,7 @@ static struct rdma_cm_id *srpt_create_rdma_id(struct sockaddr *listen_addr) static ssize_t srpt_rdma_cm_port_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", rdma_cm_port); + return sysfs_emit(page, "%d\n", rdma_cm_port); } static ssize_t srpt_rdma_cm_port_store(struct config_item *item, @@ -3702,7 +3702,7 @@ static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page) struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return snprintf(page, PAGE_SIZE, "%d\n", sport->enabled); + return sysfs_emit(page, "%d\n", sport->enabled); } static ssize_t srpt_tpg_enable_store(struct config_item *item, @@ -3809,7 +3809,7 @@ static void srpt_drop_tport(struct se_wwn *wwn) static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf) { - return scnprintf(buf, PAGE_SIZE, "\n"); + return sysfs_emit(buf, "\n"); } CONFIGFS_ATTR_RO(srpt_wwn_, version); From b4d031cdae1301a8e5e9dba2a862ef028717cb17 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:19:30 +0200 Subject: [PATCH 038/147] RDMA/mlx5: Remove mlx5_ib_mr->order The is only ever set to non-zero if the MR is from the cache, and if it is cached then the order is in cached_ent->order. Make it clearer that use_umr_mtt_update() only returns true for cached MRs and remove the redundant data. Link: https://lore.kernel.org/r/20201026131936.1335664-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - drivers/infiniband/hw/mlx5/mr.c | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b1f2b34e59552d..93310dda01b630 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -601,7 +601,6 @@ struct mlx5_ib_mr { struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; - unsigned int order; struct mlx5_cache_ent *cache_ent; int npages; struct mlx5_ib_dev *dev; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b261797b258fd7..50bbd07b97476f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -126,7 +126,9 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start, u64 length) { - return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= + if (!mr->cache_ent) + return false; + return ((u64)1 << mr->cache_ent->order) * MLX5_ADAPTER_PAGE_SIZE >= length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } @@ -172,7 +174,6 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return NULL; - mr->order = ent->order; mr->cache_ent = ent; mr->dev = ent->dev; From fc3325701a6353594083f08e297d4c1965c601aa Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:19:31 +0200 Subject: [PATCH 039/147] RDMA/mlx5: Fix corruption of reg_pages in mlx5_ib_rereg_user_mr() reg_pages should always contain mr->npage since when the mr is finally de-reg'd it is always subtracted out. If there were any error exits then mlx5_ib_rereg_user_mr() would leave the reg_pages adjusted and this will cause it to be double subtracted eventually. The manipulation of reg_pages is inherently connected to the umem, so lift it out of set_mr_fields() and only adjust it around creating/destroying a umem. reg_pages is only used for diagnostics in sysfs. Fixes: 7d0cc6edcc70 ("IB/mlx5: Add MR cache for large UMR regions") Link: https://lore.kernel.org/r/20201026131936.1335664-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 50bbd07b97476f..f3a28119d14587 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1248,10 +1248,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, } static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - int npages, u64 length, int access_flags) + u64 length, int access_flags) { - mr->npages = npages; - atomic_add(npages, &dev->mdev->priv.reg_pages); mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->ibmr.length = length; @@ -1291,8 +1289,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, kfree(in); - mr->umem = NULL; - set_mr_fields(dev, mr, 0, length, acc); + set_mr_fields(dev, mr, length, acc); return &mr->ibmr; @@ -1420,7 +1417,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; - set_mr_fields(dev, mr, npages, length, access_flags); + mr->npages = npages; + atomic_add(mr->npages, &dev->mdev->priv.reg_pages); + set_mr_fields(dev, mr, length, access_flags); if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { /* @@ -1532,8 +1531,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); - atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); - if (!mr->umem) return -EINVAL; @@ -1554,12 +1551,17 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * used. */ flags |= IB_MR_REREG_TRANS; + atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + mr->npages = 0; ib_umem_release(mr->umem); mr->umem = NULL; + err = mr_umem_get(dev, addr, len, access_flags, &mr->umem, &npages, &page_shift, &ncont, &order); if (err) goto err; + mr->npages = ncont; + atomic_add(mr->npages, &dev->mdev->priv.reg_pages); } if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags, @@ -1610,7 +1612,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - set_mr_fields(dev, mr, npages, len, access_flags); + set_mr_fields(dev, mr, len, access_flags); return 0; From 1c3d247eee746016f268a3e7f6b4a11cfa205e8e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:19:32 +0200 Subject: [PATCH 040/147] RDMA/mlx5: Remove mlx5_ib_mr->npages This is the same value as ib_umem_num_pages(mr->umem), use that instead. Link: https://lore.kernel.org/r/20201026131936.1335664-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - drivers/infiniband/hw/mlx5/mr.c | 23 ++++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 93310dda01b630..0eaf992a4e1540 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -602,7 +602,6 @@ struct mlx5_ib_mr { struct mlx5_shared_mr_info *smr_info; struct list_head list; struct mlx5_cache_ent *cache_ent; - int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index f3a28119d14587..b6d9419e05a449 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1417,8 +1417,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; - mr->npages = npages; - atomic_add(mr->npages, &dev->mdev->priv.reg_pages); + atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); set_mr_fields(dev, mr, length, access_flags); if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { @@ -1551,8 +1550,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * used. */ flags |= IB_MR_REREG_TRANS; - atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); - mr->npages = 0; + atomic_sub(ib_umem_num_pages(mr->umem), + &dev->mdev->priv.reg_pages); ib_umem_release(mr->umem); mr->umem = NULL; @@ -1560,8 +1559,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, &npages, &page_shift, &ncont, &order); if (err) goto err; - mr->npages = ncont; - atomic_add(mr->npages, &dev->mdev->priv.reg_pages); + atomic_add(ib_umem_num_pages(mr->umem), + &dev->mdev->priv.reg_pages); } if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags, @@ -1694,7 +1693,6 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int npages = mr->npages; struct ib_umem *umem = mr->umem; /* Stop all DMA */ @@ -1703,14 +1701,17 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) else clean_mr(dev, mr); + if (umem) { + if (!is_odp_mr(mr)) + atomic_sub(ib_umem_num_pages(umem), + &dev->mdev->priv.reg_pages); + ib_umem_release(umem); + } + if (mr->cache_ent) mlx5_mr_cache_free(dev, mr); else kfree(mr); - - ib_umem_release(umem); - atomic_sub(npages, &dev->mdev->priv.reg_pages); - } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) From f0093fb1a7cbff4bbfa47c1499a9e76f75359dbe Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:19:33 +0200 Subject: [PATCH 041/147] RDMA/mlx5: Move mlx5_ib_cont_pages() to the creation of the mlx5_ib_mr For the user MR path, instead of calling this after getting the umem, call it as part of creating the struct mlx5_ib_mr and distill its output to a single page_shift stored inside the mr. This avoids passing around the tuple of its output. Based on the umem and page_shift, the output arguments can be computed using: count == ib_umem_num_pages(mr->umem) shift == mr->page_shift ncont == ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift) order == order_base_2(ncont) And since mr->page_shift == umem_odp->page_shift then ncont == ib_umem_num_dma_blocks() == ib_umem_odp_num_pages() for ODP umems. Link: https://lore.kernel.org/r/20201026131936.1335664-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mem.c | 11 +++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/mr.c | 135 +++++++++++---------------- 3 files changed, 69 insertions(+), 78 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 13de3d2edd34e3..e63af1b05c0b00 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -57,6 +57,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, struct scatterlist *sg; int entry; + if (umem->is_odp) { + struct ib_umem_odp *odp = to_ib_umem_odp(umem); + + *shift = odp->page_shift; + *ncont = ib_umem_odp_num_pages(odp); + *count = *ncont << (*shift - PAGE_SHIFT); + if (order) + *order = ilog2(roundup_pow_of_two(*count)); + return; + } + addr = addr >> PAGE_SHIFT; tmp = (unsigned long)addr; m = find_first_bit(&tmp, BITS_PER_LONG); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 0eaf992a4e1540..9c1d206cb90065 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -597,6 +597,7 @@ struct mlx5_ib_mr { int max_descs; int desc_size; int access_mode; + unsigned int page_shift; struct mlx5_core_mkey mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b6d9419e05a449..3a373e1eef2b03 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -868,14 +868,11 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, - int access_flags, struct ib_umem **umem, int *npages, - int *page_shift, int *ncont, int *order) +static struct ib_umem *mr_umem_get(struct mlx5_ib_dev *dev, u64 start, + u64 length, int access_flags) { struct ib_umem *u; - *umem = NULL; - if (access_flags & IB_ACCESS_ON_DEMAND) { struct ib_umem_odp *odp; @@ -884,39 +881,17 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, if (IS_ERR(odp)) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(odp)); - return PTR_ERR(odp); - } - - u = &odp->umem; - - *page_shift = odp->page_shift; - *ncont = ib_umem_odp_num_pages(odp); - *npages = *ncont << (*page_shift - PAGE_SHIFT); - if (order) - *order = ilog2(roundup_pow_of_two(*ncont)); - } else { - u = ib_umem_get(&dev->ib_dev, start, length, access_flags); - if (IS_ERR(u)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); - return PTR_ERR(u); + return ERR_CAST(odp); } - - mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, - page_shift, ncont, order); + return &odp->umem; } - if (!*npages) { - mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(u); - return -EINVAL; + u = ib_umem_get(&dev->ib_dev, start, length, access_flags); + if (IS_ERR(u)) { + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); + return u; } - - *umem = u; - - mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", - *npages, *ncont, *order, *page_shift); - - return 0; + return u; } static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) @@ -975,15 +950,21 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, return &cache->ent[order]; } -static struct mlx5_ib_mr * -alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, - u64 len, int npages, int page_shift, unsigned int order, - int access_flags) +static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, + struct ib_umem *umem, u64 iova, + int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); + struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; + int npages; + int page_shift; + int ncont; + int order; + mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &npages, + &page_shift, &ncont, &order); + ent = mr_cache_ent_from_order(dev, order); if (!ent) return ERR_PTR(-E2BIG); @@ -1002,9 +983,10 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, mr->umem = umem; mr->access_flags = access_flags; mr->desc_size = sizeof(struct mlx5_mtt); - mr->mmkey.iova = virt_addr; - mr->mmkey.size = len; + mr->mmkey.iova = iova; + mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; + mr->page_shift = page_shift; return mr; } @@ -1163,14 +1145,15 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, * Else, the given ibmr will be used. */ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, - u64 virt_addr, u64 length, - struct ib_umem *umem, int npages, - int page_shift, int access_flags, - bool populate) + struct ib_umem *umem, u64 iova, + int access_flags, bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr; + int page_shift; __be64 *pas; + int npages; + int ncont; void *mkc; int inlen; u32 *in; @@ -1181,6 +1164,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, if (!mr) return ERR_PTR(-ENOMEM); + mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &npages, + &page_shift, &ncont, NULL); + + mr->page_shift = page_shift; mr->ibmr.pd = pd; mr->access_flags = access_flags; @@ -1207,20 +1194,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr, + set_mkc_access_pd_addr_fields(mkc, access_flags, iova, populate ? pd : dev->umrc.pd); MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET64(mkc, mkc, len, length); + MLX5_SET64(mkc, mkc, len, umem->length); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, - get_octo_len(virt_addr, length, page_shift)); + get_octo_len(iova, umem->length, page_shift)); MLX5_SET(mkc, mkc, log_page_size, page_shift); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(virt_addr, length, page_shift)); + get_octo_len(iova, umem->length, page_shift)); } err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); @@ -1358,10 +1345,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct mlx5_ib_mr *mr = NULL; bool xlt_with_umr; struct ib_umem *umem; - int page_shift; - int npages; - int ncont; - int order; int err; if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) @@ -1389,23 +1372,20 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mr->ibmr; } - err = mr_umem_get(dev, start, length, access_flags, &umem, - &npages, &page_shift, &ncont, &order); - - if (err < 0) - return ERR_PTR(err); + umem = mr_umem_get(dev, start, length, access_flags); + if (IS_ERR(umem)) + return ERR_CAST(umem); if (xlt_with_umr) { - mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, - page_shift, order, access_flags); + mr = alloc_mr_from_cache(pd, umem, virt_addr, access_flags); if (IS_ERR(mr)) mr = NULL; } if (!mr) { mutex_lock(&dev->slow_path_mutex); - mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, !xlt_with_umr); + mr = reg_create(NULL, pd, umem, virt_addr, access_flags, + !xlt_with_umr); mutex_unlock(&dev->slow_path_mutex); } @@ -1428,8 +1408,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, */ int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; - err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, - update_xlt_flags); + err = mlx5_ib_update_xlt( + mr, 0, + ib_umem_num_dma_blocks(umem, 1UL << mr->page_shift), + mr->page_shift, update_xlt_flags); if (err) { dereg_mr(dev, mr); return ERR_PTR(err); @@ -1519,11 +1501,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, int access_flags = flags & IB_MR_REREG_ACCESS ? new_access_flags : mr->access_flags; - int page_shift = 0; int upd_flags = 0; - int npages = 0; - int ncont = 0; - int order = 0; u64 addr, len; int err; @@ -1553,12 +1531,12 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, atomic_sub(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); ib_umem_release(mr->umem); - mr->umem = NULL; - - err = mr_umem_get(dev, addr, len, access_flags, &mr->umem, - &npages, &page_shift, &ncont, &order); - if (err) + mr->umem = mr_umem_get(dev, addr, len, access_flags); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + mr->umem = NULL; goto err; + } atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); } @@ -1577,9 +1555,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (err) goto err; - mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, - page_shift, access_flags, true); - + mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, true); if (IS_ERR(mr)) { err = PTR_ERR(mr); mr = to_mmr(ib_mr); @@ -1601,8 +1577,11 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, upd_flags |= MLX5_IB_UPD_XLT_PD; if (flags & IB_MR_REREG_ACCESS) upd_flags |= MLX5_IB_UPD_XLT_ACCESS; - err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, - upd_flags); + err = mlx5_ib_update_xlt( + mr, 0, + ib_umem_num_dma_blocks(mr->umem, + 1UL << mr->page_shift), + mr->page_shift, upd_flags); } else { err = rereg_umr(pd, mr, access_flags, flags); } From 95741ee3f0f1f437720626131d866ac8dc66ab14 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:19:34 +0200 Subject: [PATCH 042/147] RDMA/mlx5: Remove order from mlx5_ib_cont_pages() Only alloc_mr_from_cache() needs order and can trivially compute it, so lift it to the one call site and remove the NULL arguments. Link: https://lore.kernel.org/r/20201026131936.1335664-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cq.c | 5 ++--- drivers/infiniband/hw/mlx5/devx.c | 2 +- drivers/infiniband/hw/mlx5/mem.c | 13 +------------ drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/mr.c | 8 ++++---- drivers/infiniband/hw/mlx5/qp.c | 4 ++-- drivers/infiniband/hw/mlx5/srq.c | 2 +- 7 files changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index fb62f1d04afa70..f993b8f55231c0 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -747,7 +747,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, goto err_umem; mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, - &ncont, NULL); + &ncont); mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); @@ -1155,8 +1155,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, - npas, NULL); + mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, npas); cq->resize_umem = umem; *cqe_size = ucmd.cqe_size; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9e3d8b82649809..ebd6e6c4127e62 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2083,7 +2083,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, mlx5_ib_cont_pages(obj->umem, obj->umem->address, MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &obj->page_shift, &obj->ncont, NULL); + &obj->page_shift, &obj->ncont); if (!npages) { ib_umem_release(obj->umem); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index e63af1b05c0b00..2faee44cca999e 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -42,12 +42,11 @@ * @count: number of PAGE_SIZE pages covered by umem * @shift: page shift for the compound pages found in the region * @ncont: number of compund pages - * @order: log2 of the number of compound pages */ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, unsigned long max_page_shift, int *count, int *shift, - int *ncont, int *order) + int *ncont) { unsigned long tmp; unsigned long m; @@ -63,8 +62,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, *shift = odp->page_shift; *ncont = ib_umem_odp_num_pages(odp); *count = *ncont << (*shift - PAGE_SHIFT); - if (order) - *order = ilog2(roundup_pow_of_two(*count)); return; } @@ -95,17 +92,9 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, if (i) { m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); - - if (order) - *order = ilog2(roundup_pow_of_two(i) >> m); - *ncont = DIV_ROUND_UP(i, (1 << m)); } else { m = 0; - - if (order) - *order = 0; - *ncont = 0; } *shift = PAGE_SHIFT + m; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 9c1d206cb90065..15f95900e83cf3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1232,7 +1232,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, unsigned long max_page_shift, int *count, int *shift, - int *ncont, int *order); + int *ncont); void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, size_t offset, size_t num_pages, __be64 *pas, int access_flags); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3a373e1eef2b03..4b05143dedda34 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -960,11 +960,11 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, int npages; int page_shift; int ncont; - int order; mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &page_shift, &ncont, &order); - ent = mr_cache_ent_from_order(dev, order); + &page_shift, &ncont); + ent = mr_cache_ent_from_order(dev, order_base_2(ib_umem_num_dma_blocks( + umem, 1UL << page_shift))); if (!ent) return ERR_PTR(-E2BIG); @@ -1165,7 +1165,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, return ERR_PTR(-ENOMEM); mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &page_shift, &ncont, NULL); + &page_shift, &ncont); mr->page_shift = page_shift; mr->ibmr.pd = pd; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 251421dd6f1d5e..1a1999b721eaa9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -791,7 +791,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, return PTR_ERR(*umem); } - mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL); + mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont); err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); if (err) { @@ -850,7 +850,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, } mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, - &ncont, NULL); + &ncont); err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, &rwq->rq_page_offset); if (err) { diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 12d485872e7716..95fc16aa0d31a1 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -88,7 +88,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, } mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, - &page_shift, &ncont, NULL); + &page_shift, &ncont); err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); if (err) { From 7db0eea916dcc050811f7a498b4e268d764c6d24 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:19:35 +0200 Subject: [PATCH 043/147] RDMA/mlx5: Remove ncont from mlx5_ib_cont_pages() This is the same as ib_umem_num_dma_blocks(umem, 1UL << page_shift), have the callers compute it directly. Link: https://lore.kernel.org/r/20201026131936.1335664-7-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cq.c | 30 +++++++++++++++------------- drivers/infiniband/hw/mlx5/devx.c | 12 ++++++----- drivers/infiniband/hw/mlx5/mem.c | 15 ++++---------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +-- drivers/infiniband/hw/mlx5/mr.c | 6 ++---- drivers/infiniband/hw/mlx5/qp.c | 26 ++++++++++++------------ drivers/infiniband/hw/mlx5/srq.c | 6 +++--- 7 files changed, 46 insertions(+), 52 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index f993b8f55231c0..e2d28081bd2a9f 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -746,8 +746,9 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, - &ncont); + mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, + &page_shift); + ncont = ib_umem_num_dma_blocks(cq->buf.umem, 1UL << page_shift); mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); @@ -1128,7 +1129,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) } static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, - int entries, struct ib_udata *udata, int *npas, + int entries, struct ib_udata *udata, int *page_shift, int *cqe_size) { struct mlx5_ib_resize_cq ucmd; @@ -1155,7 +1156,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, npas); + mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift); cq->resize_umem = umem; *cqe_size = ucmd.cqe_size; @@ -1276,22 +1277,23 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) mutex_lock(&cq->resize_mutex); if (udata) { - err = resize_user(dev, cq, entries, udata, &npas, &page_shift, + err = resize_user(dev, cq, entries, udata, &page_shift, &cqe_size); + if (err) + goto ex; + npas = ib_umem_num_dma_blocks(cq->resize_umem, 1UL << page_shift); } else { + struct mlx5_frag_buf *frag_buf; + cqe_size = 64; err = resize_kernel(dev, cq, entries, cqe_size); - if (!err) { - struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf; - - npas = frag_buf->npages; - page_shift = frag_buf->page_shift; - } + if (err) + goto ex; + frag_buf = &cq->resize_buf->frag_buf; + npas = frag_buf->npages; + page_shift = frag_buf->page_shift; } - if (err) - goto ex; - inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ebd6e6c4127e62..7d99b5b298da8a 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -95,7 +95,6 @@ struct devx_umem { struct ib_umem *umem; u32 page_offset; int page_shift; - int ncont; u32 dinlen; u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; }; @@ -2083,7 +2082,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, mlx5_ib_cont_pages(obj->umem, obj->umem->address, MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &obj->page_shift, &obj->ncont); + &obj->page_shift); if (!npages) { ib_umem_release(obj->umem); @@ -2100,8 +2099,10 @@ static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, struct devx_umem *obj, struct devx_umem_reg_cmd *cmd) { - cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + - (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); + cmd->inlen = + MLX5_ST_SZ_BYTES(create_umem_in) + + (MLX5_ST_SZ_BYTES(mtt) * + ib_umem_num_dma_blocks(obj->umem, 1UL << obj->page_shift)); cmd->in = uverbs_zalloc(attrs, cmd->inlen); return PTR_ERR_OR_ZERO(cmd->in); } @@ -2117,7 +2118,8 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); - MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); + MLX5_SET64(umem, umem, num_of_mtt, + ib_umem_num_dma_blocks(obj->umem, 1UL << obj->page_shift)); MLX5_SET(umem, umem, log_page_size, obj->page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(umem, umem, page_offset, obj->page_offset); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 2faee44cca999e..6d87f8c13ce016 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -41,12 +41,9 @@ * @max_page_shift: high limit for page_shift - 0 means no limit * @count: number of PAGE_SIZE pages covered by umem * @shift: page shift for the compound pages found in the region - * @ncont: number of compund pages */ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, int *shift, - int *ncont) + unsigned long max_page_shift, int *count, int *shift) { unsigned long tmp; unsigned long m; @@ -60,8 +57,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, struct ib_umem_odp *odp = to_ib_umem_odp(umem); *shift = odp->page_shift; - *ncont = ib_umem_odp_num_pages(odp); - *count = *ncont << (*shift - PAGE_SHIFT); + *count = ib_umem_num_pages(umem); return; } @@ -90,13 +86,10 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, i += len; } - if (i) { + if (i) m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); - *ncont = DIV_ROUND_UP(i, (1 << m)); - } else { + else m = 0; - *ncont = 0; - } *shift = PAGE_SHIFT + m; *count = i; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 15f95900e83cf3..a36b0b8138a045 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1231,8 +1231,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, unsigned long max_page_shift, - int *count, int *shift, - int *ncont); + int *count, int *shift); void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, size_t offset, size_t num_pages, __be64 *pas, int access_flags); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4b05143dedda34..305520bb08632f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -959,10 +959,9 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, struct mlx5_ib_mr *mr; int npages; int page_shift; - int ncont; mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &page_shift, &ncont); + &page_shift); ent = mr_cache_ent_from_order(dev, order_base_2(ib_umem_num_dma_blocks( umem, 1UL << page_shift))); if (!ent) @@ -1153,7 +1152,6 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int page_shift; __be64 *pas; int npages; - int ncont; void *mkc; int inlen; u32 *in; @@ -1165,7 +1163,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, return ERR_PTR(-ENOMEM); mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &page_shift, &ncont); + &page_shift); mr->page_shift = page_shift; mr->ibmr.pd = pd; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1a1999b721eaa9..b313e990362e64 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -781,7 +781,7 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, unsigned long addr, size_t size, struct ib_umem **umem, int *npages, int *page_shift, - int *ncont, u32 *offset) + u32 *offset) { int err; @@ -791,7 +791,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, return PTR_ERR(*umem); } - mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont); + mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift); err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); if (err) { @@ -799,8 +799,8 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, goto err_umem; } - mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n", - addr, size, *npages, *page_shift, *ncont, *offset); + mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, offset %d\n", + addr, size, *npages, *page_shift, *offset); return 0; @@ -836,7 +836,6 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, int page_shift = 0; int npages; u32 offset = 0; - int ncont = 0; int err; if (!ucmd->buf_addr) @@ -849,8 +848,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, - &ncont); + mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift); err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, &rwq->rq_page_offset); if (err) { @@ -858,14 +856,14 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_umem; } - rwq->rq_num_pas = ncont; + rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, 1UL << page_shift); rwq->page_shift = page_shift; rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", (unsigned long long)ucmd->buf_addr, rwq->buf_size, - npages, page_shift, ncont, offset); + npages, page_shift, rwq->rq_num_pas, offset); err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db); if (err) { @@ -952,9 +950,10 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, ubuffer->buf_addr = ucmd->buf_addr; err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, &ubuffer->umem, - &npages, &page_shift, &ncont, &offset); + &npages, &page_shift, &offset); if (err) goto err_bfreg; + ncont = ib_umem_num_dma_blocks(ubuffer->umem, 1UL << page_shift); } else { ubuffer->umem = NULL; } @@ -1211,16 +1210,17 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, int err; int page_shift = 0; int npages; - int ncont = 0; u32 offset = 0; err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, - &sq->ubuffer.umem, &npages, &page_shift, &ncont, + &sq->ubuffer.umem, &npages, &page_shift, &offset); if (err) return err; - inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont; + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * ib_umem_num_dma_blocks(sq->ubuffer.umem, + 1UL << page_shift); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 95fc16aa0d31a1..24076c6078cf24 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -53,7 +53,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, int err; int npages; int page_shift; - int ncont; u32 offset; u32 uidx = MLX5_IB_DEFAULT_UIDX; @@ -88,7 +87,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, } mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, - &page_shift, &ncont); + &page_shift); err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); if (err) { @@ -96,7 +95,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, goto err_umem; } - in->pas = kvcalloc(ncont, sizeof(*in->pas), GFP_KERNEL); + in->pas = kvcalloc(ib_umem_num_dma_blocks(srq->umem, 1UL << page_shift), + sizeof(*in->pas), GFP_KERNEL); if (!in->pas) { err = -ENOMEM; goto err_umem; From f8fb3110635c91a375399450dc5a4a79a83b0414 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:19:36 +0200 Subject: [PATCH 044/147] RDMA/mlx5: Remove npages from mlx5_ib_cont_pages() Most callers don't need this, and the few that do can get it as ib_umem_num_pages(umem). Link: https://lore.kernel.org/r/20201026131936.1335664-8-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cq.c | 14 +++++++------- drivers/infiniband/hw/mlx5/devx.c | 10 +--------- drivers/infiniband/hw/mlx5/mem.c | 5 +---- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/mr.c | 10 +++------- drivers/infiniband/hw/mlx5/qp.c | 27 +++++++++++++-------------- drivers/infiniband/hw/mlx5/srq.c | 4 +--- 7 files changed, 27 insertions(+), 45 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index e2d28081bd2a9f..2088e4a3c32d5f 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -710,7 +710,6 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, size_t ucmdlen; int page_shift; __be64 *pas; - int npages; int ncont; void *cqc; int err; @@ -746,11 +745,13 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, - &page_shift); + mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &page_shift); ncont = ib_umem_num_dma_blocks(cq->buf.umem, 1UL << page_shift); - mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", - ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %u, npages %zu, page_shift %d, ncont %d\n", + ucmd.buf_addr, entries * ucmd.cqe_size, + ib_umem_num_pages(cq->buf.umem), page_shift, ncont); *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; @@ -1135,7 +1136,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, struct mlx5_ib_resize_cq ucmd; struct ib_umem *umem; int err; - int npages; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) @@ -1156,7 +1156,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift); + mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, page_shift); cq->resize_umem = umem; *cqe_size = ucmd.cqe_size; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 7d99b5b298da8a..ae889266acf1c2 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2056,7 +2056,6 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, u64 addr; size_t size; u32 access; - int npages; int err; u32 page_mask; @@ -2081,14 +2080,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, return PTR_ERR(obj->umem); mlx5_ib_cont_pages(obj->umem, obj->umem->address, - MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &obj->page_shift); - - if (!npages) { - ib_umem_release(obj->umem); - return -EINVAL; - } - + MLX5_MKEY_PAGE_SHIFT_MASK, &obj->page_shift); page_mask = (1 << obj->page_shift) - 1; obj->page_offset = obj->umem->address & page_mask; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 6d87f8c13ce016..7ae96b37bd6e65 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -39,11 +39,10 @@ /* @umem: umem object to scan * @addr: ib virtual address requested by the user * @max_page_shift: high limit for page_shift - 0 means no limit - * @count: number of PAGE_SIZE pages covered by umem * @shift: page shift for the compound pages found in the region */ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, int *count, int *shift) + unsigned long max_page_shift, int *shift) { unsigned long tmp; unsigned long m; @@ -57,7 +56,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, struct ib_umem_odp *odp = to_ib_umem_odp(umem); *shift = odp->page_shift; - *count = ib_umem_num_pages(umem); return; } @@ -91,7 +89,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, else m = 0; *shift = PAGE_SHIFT + m; - *count = i; } /* diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a36b0b8138a045..3e2c471d77bd6b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1231,7 +1231,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, unsigned long max_page_shift, - int *count, int *shift); + int *shift); void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, size_t offset, size_t num_pages, __be64 *pas, int access_flags); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 305520bb08632f..49484a9e64cd4a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -957,11 +957,9 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - int npages; int page_shift; - mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &page_shift); + mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &page_shift); ent = mr_cache_ent_from_order(dev, order_base_2(ib_umem_num_dma_blocks( umem, 1UL << page_shift))); if (!ent) @@ -1151,7 +1149,6 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, struct mlx5_ib_mr *mr; int page_shift; __be64 *pas; - int npages; void *mkc; int inlen; u32 *in; @@ -1162,8 +1159,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, if (!mr) return ERR_PTR(-ENOMEM); - mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &page_shift); + mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &page_shift); mr->page_shift = page_shift; mr->ibmr.pd = pd; @@ -1171,7 +1167,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, inlen = MLX5_ST_SZ_BYTES(create_mkey_in); if (populate) - inlen += sizeof(*pas) * roundup(npages, 2); + inlen += sizeof(*pas) * roundup(ib_umem_num_pages(umem), 2); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index b313e990362e64..46cfe50383b2a9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -780,7 +780,7 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, unsigned long addr, size_t size, - struct ib_umem **umem, int *npages, int *page_shift, + struct ib_umem **umem, int *page_shift, u32 *offset) { int err; @@ -791,7 +791,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, return PTR_ERR(*umem); } - mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift); + mlx5_ib_cont_pages(*umem, addr, 0, page_shift); err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); if (err) { @@ -799,8 +799,8 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, goto err_umem; } - mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, offset %d\n", - addr, size, *npages, *page_shift, *offset); + mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %zu, page_shift %d, offset %d\n", + addr, size, ib_umem_num_pages(*umem), *page_shift, *offset); return 0; @@ -834,7 +834,6 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); int page_shift = 0; - int npages; u32 offset = 0; int err; @@ -848,7 +847,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift); + mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &page_shift); err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, &rwq->rq_page_offset); if (err) { @@ -861,9 +860,12 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); - mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", - (unsigned long long)ucmd->buf_addr, rwq->buf_size, - npages, page_shift, rwq->rq_num_pas, offset); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %zd, npages %zu, page_shift %d, ncont %d, offset %d\n", + (unsigned long long)ucmd->buf_addr, rwq->buf_size, + ib_umem_num_pages(rwq->umem), page_shift, rwq->rq_num_pas, + offset); err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db); if (err) { @@ -896,7 +898,6 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; int page_shift = 0; int uar_index = 0; - int npages; u32 offset = 0; int bfregn; int ncont = 0; @@ -950,7 +951,7 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, ubuffer->buf_addr = ucmd->buf_addr; err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, &ubuffer->umem, - &npages, &page_shift, &offset); + &page_shift, &offset); if (err) goto err_bfreg; ncont = ib_umem_num_dma_blocks(ubuffer->umem, 1UL << page_shift); @@ -1209,12 +1210,10 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, int inlen; int err; int page_shift = 0; - int npages; u32 offset = 0; err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, - &sq->ubuffer.umem, &npages, &page_shift, - &offset); + &sq->ubuffer.umem, &page_shift, &offset); if (err) return err; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 24076c6078cf24..dec9269514b915 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -51,7 +51,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata, struct mlx5_ib_ucontext, ibucontext); size_t ucmdlen; int err; - int npages; int page_shift; u32 offset; u32 uidx = MLX5_IB_DEFAULT_UIDX; @@ -86,8 +85,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return err; } - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, - &page_shift); + mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &page_shift); err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); if (err) { From aab8d3966d2c2aa6c59fbacdb17d82156653754f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:23:10 +0200 Subject: [PATCH 045/147] RDMA/mlx5: Change mlx5_ib_populate_pas() to use rdma_for_each_block() This routine converts the umem SGL into a list of fixed pages for DMA, which is exactly what rdma_umem_for_each_dma_block() is for, use the common code directly. Link: https://lore.kernel.org/r/20201026132314.1336717-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cq.c | 6 +++--- drivers/infiniband/hw/mlx5/devx.c | 4 ++-- drivers/infiniband/hw/mlx5/mem.c | 19 ++++++++++++++----- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++-- drivers/infiniband/hw/mlx5/mr.c | 7 +++++-- drivers/infiniband/hw/mlx5/qp.c | 6 +++--- drivers/infiniband/hw/mlx5/srq.c | 2 +- 7 files changed, 30 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2088e4a3c32d5f..9ab93d730769f3 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -762,7 +762,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, } pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); - mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(cq->buf.umem, 1UL << page_shift, pas, 0); cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, @@ -1305,8 +1305,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); if (udata) - mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, - pas, 0); + mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas, + 0); else mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas); diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ae889266acf1c2..611ce21157dec7 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2115,9 +2115,9 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, MLX5_SET(umem, umem, log_page_size, obj->page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(umem, umem, page_offset, obj->page_offset); - mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, + mlx5_ib_populate_pas(obj->umem, 1UL << obj->page_shift, mtt, (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | - MLX5_IB_MTT_READ); + MLX5_IB_MTT_READ); } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 7ae96b37bd6e65..779c4a040d8b70 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -155,13 +155,22 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, } } -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags) +/* + * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be + * filled in the pas array. + */ +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags) { - return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, - ib_umem_num_dma_blocks(umem, PAGE_SIZE), - pas, access_flags); + struct ib_block_iter biter; + + rdma_umem_for_each_dma_block (umem, &biter, page_size) { + *pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) | + access_flags); + pas++; + } } + int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) { u64 page_size; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 3e2c471d77bd6b..b043a178e95b10 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1235,8 +1235,8 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, size_t offset, size_t num_pages, __be64 *pas, int access_flags); -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags); +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 49484a9e64cd4a..1cf57a921dacb5 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1167,7 +1167,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, inlen = MLX5_ST_SZ_BYTES(create_mkey_in); if (populate) - inlen += sizeof(*pas) * roundup(ib_umem_num_pages(umem), 2); + inlen += + sizeof(*pas) * + roundup(ib_umem_num_dma_blocks(umem, 1UL << page_shift), + 2); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1179,7 +1182,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, err = -EINVAL; goto err_2; } - mlx5_ib_populate_pas(dev, umem, page_shift, pas, + mlx5_ib_populate_pas(umem, 1ULL << page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 46cfe50383b2a9..cfabcd4d46b691 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -971,7 +971,7 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(create_qp_in, *in, uid, uid); pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); if (ubuffer->umem) - mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); + mlx5_ib_populate_pas(ubuffer->umem, 1UL << page_shift, pas, 0); qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); @@ -1251,7 +1251,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, MLX5_SET(wq, wq, page_offset, offset); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(sq->ubuffer.umem, 1UL << page_shift, pas, 0); err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp); @@ -4885,7 +4885,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(rqc, rqc, delay_drop_en, 1); } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); + mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp); if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { err = set_delay_drop(dev); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index dec9269514b915..cb5ad045da9abc 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -100,7 +100,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, goto err_umem; } - mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); + mlx5_ib_populate_pas(srq->umem, 1UL << page_shift, in->pas, 0); err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); if (err) { From f22c30aa6d27597a6da665db1a5f099e4903ecc7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:23:11 +0200 Subject: [PATCH 046/147] RDMA/mlx5: Move xlt_emergency_page_mutex into mr.c This is the only user, so remove the wrappers. Link: https://lore.kernel.org/r/20201026132314.1336717-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 20 -------------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +-- drivers/infiniband/hw/mlx5/mr.c | 18 ++++++++++++++++++ 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 92a419425a102f..cc42c0ef4a79f6 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -75,12 +75,6 @@ static LIST_HEAD(mlx5_ib_dev_list); */ static DEFINE_MUTEX(mlx5_ib_multiport_mutex); -/* We can't use an array for xlt_emergency_page because dma_map_single - * doesn't work on kernel modules memory - */ -static unsigned long xlt_emergency_page; -static struct mutex xlt_emergency_page_mutex; - struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) { struct mlx5_ib_dev *dev; @@ -4804,17 +4798,6 @@ static struct mlx5_interface mlx5_ib_interface = { .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; -unsigned long mlx5_ib_get_xlt_emergency_page(void) -{ - mutex_lock(&xlt_emergency_page_mutex); - return xlt_emergency_page; -} - -void mlx5_ib_put_xlt_emergency_page(void) -{ - mutex_unlock(&xlt_emergency_page_mutex); -} - static int __init mlx5_ib_init(void) { int err; @@ -4823,8 +4806,6 @@ static int __init mlx5_ib_init(void) if (!xlt_emergency_page) return -ENOMEM; - mutex_init(&xlt_emergency_page_mutex); - mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); if (!mlx5_ib_event_wq) { free_page(xlt_emergency_page); @@ -4842,7 +4823,6 @@ static void __exit mlx5_ib_cleanup(void) { mlx5_unregister_interface(&mlx5_ib_interface); destroy_workqueue(mlx5_ib_event_wq); - mutex_destroy(&xlt_emergency_page_mutex); free_page(xlt_emergency_page); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b043a178e95b10..8f728b98f9a6ab 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1454,8 +1454,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; } -unsigned long mlx5_ib_get_xlt_emergency_page(void); -void mlx5_ib_put_xlt_emergency_page(void); +extern unsigned long xlt_emergency_page; int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1cf57a921dacb5..fd14ced1159360 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -41,6 +41,13 @@ #include #include "mlx5_ib.h" +/* + * We can't use an array for xlt_emergency_page because dma_map_single doesn't + * work on kernel modules memory + */ +unsigned long xlt_emergency_page; +static DEFINE_MUTEX(xlt_emergency_page_mutex); + enum { MAX_PENDING_REG_MR = 8, }; @@ -992,6 +999,17 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, MLX5_UMR_MTT_ALIGNMENT) #define MLX5_SPARE_UMR_CHUNK 0x10000 +static unsigned long mlx5_ib_get_xlt_emergency_page(void) +{ + mutex_lock(&xlt_emergency_page_mutex); + return xlt_emergency_page; +} + +static void mlx5_ib_put_xlt_emergency_page(void) +{ + mutex_unlock(&xlt_emergency_page_mutex); +} + int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags) { From 8010d74b9965b33182651767e9788ed84cf8e5f9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:23:12 +0200 Subject: [PATCH 047/147] RDMA/mlx5: Split the WR setup out of mlx5_ib_update_xlt() The memory allocation is quite complicated, and makes this function hard to understand. Refactor things so that a function call sets up the WR, SG, DMA mapping and buffer, further splitting that into buffer and DMA/wr. This also slightly changes the buffer allocation logic to try an order 0 page allocation (with OOM warnings on) before going to the emergency page. Link: https://lore.kernel.org/r/20201026132314.1336717-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 6 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/mr.c | 189 ++++++++++++++++++--------- 3 files changed, 128 insertions(+), 69 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index cc42c0ef4a79f6..36b15a063e688a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4802,13 +4802,13 @@ static int __init mlx5_ib_init(void) { int err; - xlt_emergency_page = __get_free_page(GFP_KERNEL); + xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL); if (!xlt_emergency_page) return -ENOMEM; mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); if (!mlx5_ib_event_wq) { - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); return -ENOMEM; } @@ -4823,7 +4823,7 @@ static void __exit mlx5_ib_cleanup(void) { mlx5_unregister_interface(&mlx5_ib_interface); destroy_workqueue(mlx5_ib_event_wq); - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); } module_init(mlx5_ib_init); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 8f728b98f9a6ab..d92afbd26aa564 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1454,7 +1454,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; } -extern unsigned long xlt_emergency_page; +extern void *xlt_emergency_page; int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd14ced1159360..0fa3899dee7e08 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -45,7 +45,7 @@ * We can't use an array for xlt_emergency_page because dma_map_single doesn't * work on kernel modules memory */ -unsigned long xlt_emergency_page; +void *xlt_emergency_page; static DEFINE_MUTEX(xlt_emergency_page_mutex); enum { @@ -999,15 +999,121 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, MLX5_UMR_MTT_ALIGNMENT) #define MLX5_SPARE_UMR_CHUNK 0x10000 -static unsigned long mlx5_ib_get_xlt_emergency_page(void) +/* + * Allocate a temporary buffer to hold the per-page information to transfer to + * HW. For efficiency this should be as large as it can be, but buffer + * allocation failure is not allowed, so try smaller sizes. + */ +static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) { + const size_t xlt_chunk_align = + MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size); + size_t size; + void *res = NULL; + + static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); + + /* + * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the + * allocation can't trigger any kind of reclaim. + */ + might_sleep(); + + gfp_mask |= __GFP_ZERO; + + /* + * If the system already has a suitable high order page then just use + * that, but don't try hard to create one. This max is about 1M, so a + * free x86 huge page will satisfy it. + */ + size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), + MLX5_MAX_UMR_CHUNK); + *nents = size / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + + if (size > MLX5_SPARE_UMR_CHUNK) { + size = MLX5_SPARE_UMR_CHUNK; + *nents = get_order(size) / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + } + + *nents = PAGE_SIZE / ent_size; + res = (void *)__get_free_page(gfp_mask); + if (res) + return res; + mutex_lock(&xlt_emergency_page_mutex); + memset(xlt_emergency_page, 0, PAGE_SIZE); return xlt_emergency_page; } -static void mlx5_ib_put_xlt_emergency_page(void) +static void mlx5_ib_free_xlt(void *xlt, size_t length) +{ + if (xlt == xlt_emergency_page) { + mutex_unlock(&xlt_emergency_page_mutex); + return; + } + + free_pages((unsigned long)xlt, get_order(length)); +} + +/* + * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for + * submission. + */ +static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, + struct mlx5_umr_wr *wr, struct ib_sge *sg, + size_t nents, size_t ent_size, + unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr->dev; + struct device *ddev = dev->ib_dev.dev.parent; + dma_addr_t dma; + void *xlt; + + xlt = mlx5_ib_alloc_xlt(&nents, ent_size, + flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : + GFP_KERNEL); + sg->length = nents * ent_size; + dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); + mlx5_ib_free_xlt(xlt, sg->length); + return NULL; + } + sg->addr = dma; + sg->lkey = dev->umrc.pd->local_dma_lkey; + + memset(wr, 0, sizeof(*wr)); + wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; + if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) + wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; + wr->wr.sg_list = sg; + wr->wr.num_sge = 1; + wr->wr.opcode = MLX5_IB_WR_UMR; + wr->pd = mr->ibmr.pd; + wr->mkey = mr->mmkey.key; + wr->length = mr->mmkey.size; + wr->virt_addr = mr->mmkey.iova; + wr->access_flags = mr->access_flags; + wr->page_shift = mr->page_shift; + wr->xlt_size = sg->length; + return xlt; +} + +static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, + struct ib_sge *sg) { - mutex_unlock(&xlt_emergency_page_mutex); + struct device *ddev = dev->ib_dev.dev.parent; + + dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); + mlx5_ib_free_xlt(xlt, sg->length); } int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, @@ -1015,9 +1121,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, { struct mlx5_ib_dev *dev = mr->dev; struct device *ddev = dev->ib_dev.dev.parent; - int size; void *xlt; - dma_addr_t dma; struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; @@ -1028,10 +1132,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, const int page_mask = page_align - 1; size_t pages_mapped = 0; size_t pages_to_map = 0; - size_t pages_iter = 0; + size_t pages_iter; size_t size_to_map = 0; - gfp_t gfp; - bool use_emergency_page = false; + size_t orig_sg_length; if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && !umr_can_use_indirect_mkey(dev)) @@ -1044,37 +1147,13 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, npages += idx & page_mask; idx &= ~page_mask; } - - gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; - gfp |= __GFP_ZERO | __GFP_NOWARN; - pages_to_map = ALIGN(npages, page_align); - size = desc_size * pages_to_map; - size = min_t(int, size, MLX5_MAX_UMR_CHUNK); - - xlt = (void *)__get_free_pages(gfp, get_order(size)); - if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { - mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", - size, get_order(size), MLX5_SPARE_UMR_CHUNK); - size = MLX5_SPARE_UMR_CHUNK; - xlt = (void *)__get_free_pages(gfp, get_order(size)); - } - - if (!xlt) { - mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); - xlt = (void *)mlx5_ib_get_xlt_emergency_page(); - size = PAGE_SIZE; - memset(xlt, 0, size); - use_emergency_page = true; - } - pages_iter = size / desc_size; - dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); - err = -ENOMEM; - goto free_xlt; - } + xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); + if (!xlt) + return -ENOMEM; + pages_iter = sg.length / desc_size; + orig_sg_length = sg.length; if (mr->umem->is_odp) { if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { @@ -1085,22 +1164,6 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, } } - sg.addr = dma; - sg.lkey = dev->umrc.pd->local_dma_lkey; - - memset(&wr, 0, sizeof(wr)); - wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; - if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) - wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; - wr.wr.sg_list = &sg; - wr.wr.num_sge = 1; - wr.wr.opcode = MLX5_IB_WR_UMR; - - wr.pd = mr->ibmr.pd; - wr.mkey = mr->mmkey.key; - wr.length = mr->mmkey.size; - wr.virt_addr = mr->mmkey.iova; - wr.access_flags = mr->access_flags; wr.page_shift = page_shift; for (pages_mapped = 0; @@ -1108,7 +1171,8 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, pages_mapped += pages_iter, idx += pages_iter) { npages = min_t(int, pages_iter, pages_to_map - pages_mapped); size_to_map = npages * desc_size; - dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); if (mr->umem->is_odp) { mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); } else { @@ -1118,9 +1182,10 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, /* Clear padding after the pages * brought from the umem. */ - memset(xlt + size_to_map, 0, size - size_to_map); + memset(xlt + size_to_map, 0, sg.length - size_to_map); } - dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); @@ -1144,14 +1209,8 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, err = mlx5_ib_post_send_wait(dev, &wr); } - dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); - -free_xlt: - if (use_emergency_page) - mlx5_ib_put_xlt_emergency_page(); - else - free_pages((unsigned long)xlt, get_order(size)); - + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, xlt, &sg); return err; } From f1eaac37da20823816af274d69a9eed7444e9822 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:23:13 +0200 Subject: [PATCH 048/147] RDMA/mlx5: Split mlx5_ib_update_xlt() into ODP and non-ODP cases Mixing these together is just a mess, make a dedicated version, mlx5_ib_update_mr_pas(), which directly loads the whole MTT for a non-ODP MR. The split out version can trivially use a simple loop with rdma_for_each_block() which allows using the core code to compute the MR pages and avoids seeking in the SGL list after each chunk as the __mlx5_ib_populate_pas() call required. Significantly speeds loading large MTTs. Link: https://lore.kernel.org/r/20201026132314.1336717-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mem.c | 64 ------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 - drivers/infiniband/hw/mlx5/mr.c | 132 ++++++++++++++++++--------- 3 files changed, 90 insertions(+), 109 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 779c4a040d8b70..92e7621ec858b1 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -91,70 +91,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, *shift = PAGE_SHIFT + m; } -/* - * Populate the given array with bus addresses from the umem. - * - * dev - mlx5_ib device - * umem - umem to use to fill the pages - * page_shift - determines the page size used in the resulting array - * offset - offset into the umem to start from, - * only implemented for ODP umems - * num_pages - total number of pages to fill - * pas - bus addresses array to fill - * access_flags - access flags to set on all present pages. - use enum mlx5_ib_mtt_access_flags for this. - */ -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags) -{ - int shift = page_shift - PAGE_SHIFT; - int mask = (1 << shift) - 1; - int i, k, idx; - u64 cur = 0; - u64 base; - int len; - struct scatterlist *sg; - int entry; - - i = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - base = sg_dma_address(sg); - - /* Skip elements below offset */ - if (i + len < offset << shift) { - i += len; - continue; - } - - /* Skip pages below offset */ - if (i < offset << shift) { - k = (offset << shift) - i; - i = offset << shift; - } else { - k = 0; - } - - for (; k < len; k++) { - if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - cur |= access_flags; - idx = (i >> shift) - offset; - - pas[idx] = cpu_to_be64(cur); - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", - i >> shift, be64_to_cpu(pas[idx])); - } - i++; - - /* Stop after num_pages reached */ - if (i >> shift >= offset + num_pages) - return; - } - } -} - /* * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be * filled in the pas array. diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d92afbd26aa564..aadd43425a587c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1232,9 +1232,6 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, unsigned long max_page_shift, int *shift); -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags); void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, u64 access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 0fa3899dee7e08..3fa3809c266057 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1116,6 +1116,21 @@ static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, mlx5_ib_free_xlt(xlt, sg->length); } +static unsigned int xlt_wr_final_send_flags(unsigned int flags) +{ + unsigned int res = 0; + + if (flags & MLX5_IB_UPD_XLT_ENABLE) + res |= MLX5_IB_SEND_UMR_ENABLE_MR | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | + MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) + res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; + if (flags & MLX5_IB_UPD_XLT_ADDR) + res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + return res; +} + int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags) { @@ -1140,6 +1155,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, !umr_can_use_indirect_mkey(dev)) return -EPERM; + if (WARN_ON(!mr->umem->is_odp)) + return -EINVAL; + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, * so we need to align the offset and length accordingly */ @@ -1155,13 +1173,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, pages_iter = sg.length / desc_size; orig_sg_length = sg.length; - if (mr->umem->is_odp) { - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; - pages_to_map = min_t(size_t, pages_to_map, max_pages); - } + pages_to_map = min_t(size_t, pages_to_map, max_pages); } wr.page_shift = page_shift; @@ -1173,36 +1189,14 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, size_to_map = npages * desc_size; dma_sync_single_for_cpu(ddev, sg.addr, sg.length, DMA_TO_DEVICE); - if (mr->umem->is_odp) { - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); - } else { - __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, - npages, xlt, - MLX5_IB_MTT_PRESENT); - /* Clear padding after the pages - * brought from the umem. - */ - memset(xlt + size_to_map, 0, sg.length - size_to_map); - } + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); - if (pages_mapped + pages_iter >= pages_to_map) { - if (flags & MLX5_IB_UPD_XLT_ENABLE) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_ENABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - if (flags & MLX5_IB_UPD_XLT_PD || - flags & MLX5_IB_UPD_XLT_ACCESS) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - if (flags & MLX5_IB_UPD_XLT_ADDR) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - } + if (pages_mapped + pages_iter >= pages_to_map) + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); wr.offset = idx * desc_size; wr.xlt_size = sg.length; @@ -1214,6 +1208,69 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, return err; } +/* + * Send the DMA list to the HW for a normal MR using UMR. + */ +static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr->dev; + struct device *ddev = dev->ib_dev.dev.parent; + struct ib_block_iter biter; + struct mlx5_mtt *cur_mtt; + struct mlx5_umr_wr wr; + size_t orig_sg_length; + struct mlx5_mtt *mtt; + size_t final_size; + struct ib_sge sg; + int err = 0; + + if (WARN_ON(mr->umem->is_odp)) + return -EINVAL; + + mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, + ib_umem_num_dma_blocks(mr->umem, + 1 << mr->page_shift), + sizeof(*mtt), flags); + if (!mtt) + return -ENOMEM; + orig_sg_length = sg.length; + + cur_mtt = mtt; + rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap, + BIT(mr->page_shift)) { + if (cur_mtt == (void *)mtt + sg.length) { + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + if (err) + goto err; + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + wr.offset += sg.length; + cur_mtt = mtt; + } + + cur_mtt->ptag = + cpu_to_be64(rdma_block_iter_dma_address(&biter) | + MLX5_IB_MTT_PRESENT); + cur_mtt++; + } + + final_size = (void *)cur_mtt - (void *)mtt; + sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); + memset(cur_mtt, 0, sg.length - final_size); + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); + wr.xlt_size = sg.length; + + dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + +err: + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, mtt, &sg); + return err; +} + /* * If ibmr is NULL it will be allocated by reg_create. * Else, the given ibmr will be used. @@ -1480,12 +1537,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, * configured properly but left disabled. It is safe to go ahead * and configure it again via UMR while enabling it. */ - int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; - - err = mlx5_ib_update_xlt( - mr, 0, - ib_umem_num_dma_blocks(umem, 1UL << mr->page_shift), - mr->page_shift, update_xlt_flags); + err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); if (err) { dereg_mr(dev, mr); return ERR_PTR(err); @@ -1651,11 +1703,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, upd_flags |= MLX5_IB_UPD_XLT_PD; if (flags & IB_MR_REREG_ACCESS) upd_flags |= MLX5_IB_UPD_XLT_ACCESS; - err = mlx5_ib_update_xlt( - mr, 0, - ib_umem_num_dma_blocks(mr->umem, - 1UL << mr->page_shift), - mr->page_shift, upd_flags); + err = mlx5_ib_update_mr_pas(mr, upd_flags); } else { err = rereg_umr(pd, mr, access_flags, flags); } From d5c7916fe4613e9128b0f877f7e2dd0c85f5d2d2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:23:14 +0200 Subject: [PATCH 049/147] RDMA/mlx5: Use ib_umem_find_best_pgsz() for mkc's Now that all the PAS arrays or UMR XLT's for mkcs are filled using rdma_for_each_block() we can use the common ib_umem_find_best_pgsz() algorithm. Link: https://lore.kernel.org/r/20201026132314.1336717-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 9 +++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 27 ++++++++++++++++++++ drivers/infiniband/hw/mlx5/mr.c | 37 +++++++++++++++------------- 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index e9fecbdf391bcc..f1fc7e39c782fb 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -84,6 +84,15 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, dma_addr_t mask; int i; + if (umem->is_odp) { + unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift); + + /* ODP must always be self consistent. */ + if (!(pgsz_bitmap & page_size)) + return 0; + return page_size; + } + /* rdma_for_each_block() has a bug if the page size is smaller than the * page size used to build the umem. For now prevent smaller page sizes * from being returned. diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index aadd43425a587c..bb44080170be8c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -42,6 +42,33 @@ #define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) +static __always_inline unsigned long +__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits, + unsigned int pgsz_shift) +{ + unsigned int largest_pg_shift = + min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift, + BITS_PER_LONG - 1); + + /* + * Despite a command allowing it, the device does not support lower than + * 4k page size. + */ + pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift); + return GENMASK(largest_pg_shift, pgsz_shift); +} + +/* + * For mkc users, instead of a page_offset the command has a start_iova which + * specifies both the page_offset and the on-the-wire IOVA + */ +#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova) \ + ib_umem_find_best_pgsz(umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), \ + pgsz_shift), \ + iova) + enum { MLX5_IB_MMAP_OFFSET_START = 9, MLX5_IB_MMAP_OFFSET_END = 255, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3fa3809c266057..b091d84ba4359a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -964,11 +964,13 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - int page_shift; + unsigned int page_size; - mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &page_shift); - ent = mr_cache_ent_from_order(dev, order_base_2(ib_umem_num_dma_blocks( - umem, 1UL << page_shift))); + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!page_size)) + return ERR_PTR(-EINVAL); + ent = mr_cache_ent_from_order( + dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); if (!ent) return ERR_PTR(-E2BIG); @@ -990,7 +992,7 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, mr->mmkey.iova = iova; mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; - mr->page_shift = page_shift; + mr->page_shift = order_base_2(page_size); return mr; } @@ -1280,8 +1282,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int access_flags, bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + unsigned int page_size; struct mlx5_ib_mr *mr; - int page_shift; __be64 *pas; void *mkc; int inlen; @@ -1289,22 +1291,23 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); + page_size = + mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!page_size)) + return ERR_PTR(-EINVAL); + mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &page_shift); - - mr->page_shift = page_shift; mr->ibmr.pd = pd; mr->access_flags = access_flags; + mr->page_shift = order_base_2(page_size); inlen = MLX5_ST_SZ_BYTES(create_mkey_in); if (populate) - inlen += - sizeof(*pas) * - roundup(ib_umem_num_dma_blocks(umem, 1UL << page_shift), - 2); + inlen += sizeof(*pas) * + roundup(ib_umem_num_dma_blocks(umem, page_size), 2); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1316,7 +1319,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, err = -EINVAL; goto err_2; } - mlx5_ib_populate_pas(umem, 1ULL << page_shift, pas, + mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); } @@ -1334,11 +1337,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET64(mkc, mkc, len, umem->length); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, - get_octo_len(iova, umem->length, page_shift)); - MLX5_SET(mkc, mkc, log_page_size, page_shift); + get_octo_len(iova, umem->length, mr->page_shift)); + MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(iova, umem->length, page_shift)); + get_octo_len(iova, umem->length, mr->page_shift)); } err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); From 856c2998999958761b6a52208b4edb4d352c4037 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Wed, 28 Oct 2020 20:25:09 +0800 Subject: [PATCH 050/147] RDMA/siw: Fix typo of EAGAIN not -EAGAIN in siw_cm_work_handler() The rv cannot be 'EAGAIN' in the previous path, we should use '-EAGAIN' to check it. For example: Call trace: ->siw_cm_work_handler ->siw_proc_mpareq ->siw_recv_mpa_rr Link: https://lore.kernel.org/r/20201028122509.47074-1-zhangqilong3@huawei.com Signed-off-by: Zhang Qilong Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 66764f7ef072ad..1f9e15b715048c 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1047,7 +1047,7 @@ static void siw_cm_work_handler(struct work_struct *w) cep->state); } } - if (rv && rv != EAGAIN) + if (rv && rv != -EAGAIN) release_cep = 1; break; From 5c4193669b6f20b990f99cb8e15cdea80f865ac1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 30 Oct 2020 11:03:05 -0300 Subject: [PATCH 051/147] RDMA/rxe,siw: Restore uverbs_cmd_mask IB_USER_VERBS_CMD_POST_SEND These two drivers open code the call to POST_SEND and do not use the rdma-core wrapper to do it, thus their usages was missed during the audit. Both drivers use this as a doorbell to signal the kernel to start DMA. Fixes: 628c02bf38aa ("RDMA: Remove uverbs cmds from drivers that don't use them") Link: https://lore.kernel.org/r/0-v1-4608c5610afa+fb-uverbs_cmd_post_send_fix_jgg@nvidia.com Reported-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 3 ++- drivers/infiniband/sw/siw/siw_main.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 7652d53af2c1d9..209c7b3fab97a2 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1142,7 +1142,8 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dma_set_max_seg_size(&dev->dev, UINT_MAX); dma_set_coherent_mask(&dev->dev, dma_get_required_mask(&dev->dev)); - dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); + dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | + BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); ib_set_device_ops(dev, &rxe_dev_ops); err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1); diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index e49faefdee923d..9cf596429dbf7d 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -347,6 +347,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) addr); } + base_dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); + base_dev->node_type = RDMA_NODE_RNIC; memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON, sizeof(SIW_NODE_DESC_COMMON)); From 235b6ac306951fd4e8c31861d79ca68a643cab8a Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Mon, 26 Oct 2020 15:29:04 +0200 Subject: [PATCH 052/147] RDMA/ipoib: Add 50Gb and 100Gb link speeds to ethtool The IBTA specification has new speeds - HDR and NDR, supporting signaling rate of 50Gb and 100Gb respectively. ethtool support of ipoib driver translates IB speed to signaling rate. Added translation of HDR and NDR IB types to rates of 50Gb and 100Gb ethernet speed. Link: https://lore.kernel.org/r/20201026132904.1338526-1-leon@kernel.org Signed-off-by: Meir Lichtinger Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 67a21fdf5367f8..823f6831e7eac1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -166,6 +166,10 @@ static inline int ib_speed_enum_to_int(int speed) return SPEED_14000; case IB_SPEED_EDR: return SPEED_25000; + case IB_SPEED_HDR: + return SPEED_50000; + case IB_SPEED_NDR: + return SPEED_100000; } return SPEED_UNKNOWN; From c7adf7717301558e8852949d8e3dc3748d1a4a97 Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Mon, 26 Oct 2020 15:37:37 +0200 Subject: [PATCH 053/147] IB/core: Add support for NDR link speed Add new IBTA speed NDR, supporting signaling rate of 100Gb. Link: https://lore.kernel.org/r/20201026133738.1340432-2-leon@kernel.org Signed-off-by: Meir Lichtinger Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 88460d76a84afd..b8abb30f80df5d 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -275,6 +275,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, speed = " HDR"; rate = 500; break; + case IB_SPEED_NDR: + speed = " NDR"; + rate = 1000; + break; case IB_SPEED_SDR: default: /* default to SDR for invalid rates */ speed = " SDR"; From f946e45f59ef01ff54ffb3b1eba3a8e7915e7326 Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Mon, 26 Oct 2020 15:37:38 +0200 Subject: [PATCH 054/147] IB/mlx5: Add support for NDR link speed The IBTA specification has new speed - NDR. That speed supports signaling rate of 100Gb. mlx5 IB driver translates link modes reported by ConnectX device to IB speed and width. Added translation of new 100Gb, 200Gb and 400Gb link modes to NDR IB type and width of x1, x2 or x4 respectively. Link: https://lore.kernel.org/r/20201026133738.1340432-3-leon@kernel.org Signed-off-by: Meir Lichtinger Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 36b15a063e688a..1efcb0129efe7f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -419,10 +419,22 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_NDR; + break; case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_NDR; + break; + case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_NDR; + break; default: return -EINVAL; } From b2d2440430c0fdd5e0cad3efd6d1c9e3d3d02e5b Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Mon, 2 Nov 2020 22:13:42 +0800 Subject: [PATCH 055/147] RDMA/rxe: Remove VLAN code leftovers from RXE Since the commit fd49ddaf7e26 ("RDMA/rxe: prevent rxe creation on top of vlan interface") does not permit rxe on top of vlan device, all the stuff related with vlan should be removed. Fixes: fd49ddaf7e26 ("RDMA/rxe: prevent rxe creation on top of vlan interface") Link: https://lore.kernel.org/r/1604326422-18625-1-git-send-email-yanjunz@nvidia.com Signed-off-by: Zhu Yanjun Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 9 --------- drivers/infiniband/sw/rxe/rxe_resp.c | 5 ----- 2 files changed, 14 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 575e1a4ec82121..d61f06d4e8bdbe 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -26,9 +25,6 @@ struct device *rxe_dma_device(struct rxe_dev *rxe) ndev = rxe->ndev; - if (is_vlan_dev(ndev)) - ndev = vlan_dev_real_dev(ndev); - return ndev->dev.parent; } @@ -166,14 +162,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct udphdr *udph; struct net_device *ndev = skb->dev; - struct net_device *rdev = ndev; struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - if (!rxe && is_vlan_dev(rdev)) { - rdev = vlan_dev_real_dev(ndev); - rxe = rxe_get_dev_from_net(rdev); - } if (!rxe) goto drop; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c7e3b6a4af38f9..5a098083a9d225 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -872,11 +872,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, else wc->network_hdr_type = RDMA_NETWORK_IPV6; - if (is_vlan_dev(skb->dev)) { - wc->wc_flags |= IB_WC_WITH_VLAN; - wc->vlan_id = vlan_dev_vlan_id(skb->dev); - } - if (pkt->mask & RXE_IMMDT_MASK) { wc->wc_flags |= IB_WC_WITH_IMM; wc->ex.imm_data = immdt_imm(pkt); From f7a95c902b020c7fe6781f0814187c2e2266a689 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 3 Nov 2020 19:57:54 +0800 Subject: [PATCH 056/147] IB/isert: Do not excplicitly check == false for bool It is not the kernel style, warning reported by coccicheck: ./ib_isert.c:1104:12-24: WARNING: Comparison to bool Link: https://lore.kernel.org/r/1604404674-32998-1-git-send-email-zou_wei@huawei.com Reported-by: Hulk Robot Signed-off-by: Zou Wei Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/isert/ib_isert.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index bd478947b93a5e..e47cd0291a7e24 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1101,7 +1101,7 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, sequence_cmd: rc = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); - if (!rc && dump_payload == false && unsol_data) + if (!rc && !dump_payload && unsol_data) iscsit_set_unsolicited_dataout(cmd); else if (dump_payload && imm_data) target_put_sess_cmd(&cmd->se_cmd); From efa968ee20248ebf8da8542f21d5d2811e86392f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Nov 2020 16:45:55 +0200 Subject: [PATCH 057/147] RDMA/core: Postpone uobject cleanup on failure till FD close Remove the ib_is_destroyable_retryable() concept. The idea here was to allow the drivers to forcibly clean the HW object even if they otherwise didn't want to (eg because of usecnt). This was an attempt to clean up in a world where drivers were not allowed to fail HW object destruction. Now that we are going back to allowing HW objects to fail destroy this doesn't make sense. Instead if a uobject's HW object can't be destroyed it is left on the uobject list and it is up to uverbs_destroy_ufile_hw() to clean it. Multiple passes over the uobject list allow hidden dependencies to be resolved. If that fails the HW driver is broken, throw a WARN_ON and leak the HW object memory. All the other tricky failure paths (eg on creation error unwind) have already been updated to this new model. Link: https://lore.kernel.org/r/20201104144556.3809085-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 51 +++++++------------ drivers/infiniband/core/uverbs_cmd.c | 5 +- drivers/infiniband/core/uverbs_std_types.c | 15 +++--- .../core/uverbs_std_types_counters.c | 5 +- drivers/infiniband/core/uverbs_std_types_cq.c | 4 +- drivers/infiniband/core/uverbs_std_types_dm.c | 6 +-- .../core/uverbs_std_types_flow_action.c | 6 +-- drivers/infiniband/core/uverbs_std_types_qp.c | 4 +- .../infiniband/core/uverbs_std_types_srq.c | 4 +- drivers/infiniband/core/uverbs_std_types_wq.c | 4 +- drivers/infiniband/hw/mlx5/devx.c | 4 +- drivers/infiniband/hw/mlx5/fs.c | 6 +-- include/rdma/ib_verbs.h | 44 +--------------- 13 files changed, 45 insertions(+), 113 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index ffe11b03724cf5..61fa0a3f8e5eed 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -137,15 +137,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, } else if (uobj->object) { ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, attrs); - if (ret) { - if (ib_is_destroy_retryable(ret, reason, uobj)) - return ret; - - /* Nothing to be done, dangle the memory and move on */ - WARN(true, - "ib_uverbs: failed to remove uobject id %d, driver err=%d", - uobj->id, ret); - } + if (ret) + /* Nothing to be done, wait till ucontext will clean it */ + return ret; uobj->object = NULL; } @@ -543,12 +537,7 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, struct uverbs_obj_idr_type, type); int ret = idr_type->destroy_object(uobj, why, attrs); - /* - * We can only fail gracefully if the user requested to destroy the - * object or when a retry may be called upon an error. - * In the rest of the cases, just remove whatever you can. - */ - if (ib_is_destroy_retryable(ret, why, uobj)) + if (ret) return ret; if (why == RDMA_REMOVE_ABORT) @@ -581,12 +570,8 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, { const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); - int ret = fd_type->destroy_object(uobj, why); - if (ib_is_destroy_retryable(ret, why, uobj)) - return ret; - - return 0; + return fd_type->destroy_object(uobj, why); } static void remove_handle_fd_uobject(struct ib_uobject *uobj) @@ -863,11 +848,18 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * racing with a lookup_get. */ WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); + if (reason == RDMA_REMOVE_DRIVER_FAILURE) + obj->object = NULL; if (!uverbs_destroy_uobject(obj, reason, &attrs)) ret = 0; else atomic_set(&obj->usecnt, 0); } + + if (reason == RDMA_REMOVE_DRIVER_FAILURE) { + WARN_ON(!list_empty(&ufile->uobjects)); + return 0; + } return ret; } @@ -889,21 +881,12 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, if (!ufile->ucontext) goto done; - ufile->ucontext->cleanup_retryable = true; - while (!list_empty(&ufile->uobjects)) - if (__uverbs_cleanup_ufile(ufile, reason)) { - /* - * No entry was cleaned-up successfully during this - * iteration. It is a driver bug to fail destruction. - */ - WARN_ON(!list_empty(&ufile->uobjects)); - break; - } - - ufile->ucontext->cleanup_retryable = false; - if (!list_empty(&ufile->uobjects)) - __uverbs_cleanup_ufile(ufile, reason); + while (!list_empty(&ufile->uobjects) && + !__uverbs_cleanup_ufile(ufile, reason)) { + } + if (WARN_ON(!list_empty(&ufile->uobjects))) + __uverbs_cleanup_ufile(ufile, RDMA_REMOVE_DRIVER_FAILURE); ufile_destroy_ucontext(ufile, reason); done: diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 72b209ca77c734..92c9d2a548f31a 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -681,8 +681,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, return 0; ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata); - - if (ib_is_destroy_retryable(ret, why, uobject)) { + if (ret) { atomic_inc(&xrcd->usecnt); return ret; } @@ -690,7 +689,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, if (inode) xrcd_table_delete(dev, inode); - return ret; + return 0; } static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 0658101fca004a..585042ead93928 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -88,7 +88,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, return -EBUSY; ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; for (i = 0; i < table_size; i++) @@ -96,7 +96,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, kfree(rwq_ind_tbl); kfree(ind_tbl); - return ret; + return 0; } static int uverbs_free_xrcd(struct ib_uobject *uobject, @@ -108,9 +108,8 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, container_of(uobject, struct ib_uxrcd_object, uobject); int ret; - ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&uxrcd->refcnt)) + return -EBUSY; mutex_lock(&attrs->ufile->device->xrcd_tree_mutex); ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs); @@ -124,11 +123,9 @@ static int uverbs_free_pd(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_pd *pd = uobject->object; - int ret; - ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&pd->usecnt)) + return -EBUSY; return ib_dealloc_pd_user(pd, &attrs->driver_udata); } diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index b3c6c066b60100..999da9c7986687 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -42,9 +42,8 @@ static int uverbs_free_counters(struct ib_uobject *uobject, struct ib_counters *counters = uobject->object; int ret; - ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&counters->usecnt)) + return -EBUSY; ret = counters->device->ops.destroy_counters(counters); if (ret) diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 8dabd05988b237..370ad7c83f8804 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -46,7 +46,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, int ret; ret = ib_destroy_cq_user(cq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; ib_uverbs_release_ucq( @@ -55,7 +55,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, ev_queue) : NULL, ucq); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index d5a1de33c2c9ad..98c522cf86d6f2 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -39,11 +39,9 @@ static int uverbs_free_dm(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_dm *dm = uobject->object; - int ret; - ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&dm->usecnt)) + return -EBUSY; return dm->device->ops.dealloc_dm(dm, attrs); } diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index 459cf165b231e6..d42ed7ff223e3d 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -39,11 +39,9 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_flow_action *action = uobject->object; - int ret; - ret = ib_destroy_usecnt(&action->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&action->usecnt)) + return -EBUSY; return action->device->ops.destroy_flow_action(action); } diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c index 3bf8dcdfe7eb64..294cd29d5cede7 100644 --- a/drivers/infiniband/core/uverbs_std_types_qp.c +++ b/drivers/infiniband/core/uverbs_std_types_qp.c @@ -32,14 +32,14 @@ static int uverbs_free_qp(struct ib_uobject *uobject, } ret = ib_destroy_qp_user(qp, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; if (uqp->uxrcd) atomic_dec(&uqp->uxrcd->refcnt); ib_uverbs_release_uevent(&uqp->uevent); - return ret; + return 0; } static int check_creation_flags(enum ib_qp_type qp_type, diff --git a/drivers/infiniband/core/uverbs_std_types_srq.c b/drivers/infiniband/core/uverbs_std_types_srq.c index c0ecbba26bf49f..e5513f828bdc12 100644 --- a/drivers/infiniband/core/uverbs_std_types_srq.c +++ b/drivers/infiniband/core/uverbs_std_types_srq.c @@ -18,7 +18,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, int ret; ret = ib_destroy_srq_user(srq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; if (srq_type == IB_SRQT_XRC) { @@ -30,7 +30,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, } ib_uverbs_release_uevent(uevent); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c index f2e6a625724a4b..7ded8339346f31 100644 --- a/drivers/infiniband/core/uverbs_std_types_wq.c +++ b/drivers/infiniband/core/uverbs_std_types_wq.c @@ -17,11 +17,11 @@ static int uverbs_free_wq(struct ib_uobject *uobject, int ret; ret = ib_destroy_wq_user(wq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; ib_uverbs_release_uevent(&uwq->uevent); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)( diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 611ce21157dec7..c315dbe0b6bd09 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1310,7 +1310,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, else ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; devx_event_table = &dev->devx_event_table; @@ -2181,7 +2181,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, int err; err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(err, why, uobject)) + if (err) return err; ib_umem_release(obj->umem); diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 492cfe063bcade..25da0b05b4e2f0 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2035,11 +2035,9 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_flow_matcher *obj = uobject->object; - int ret; - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&obj->usecnt)) + return -EBUSY; kfree(obj); return 0; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9420827d242188..7e330f4a6d33ff 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1471,6 +1471,8 @@ enum rdma_remove_reason { RDMA_REMOVE_DRIVER_REMOVE, /* uobj is being cleaned-up before being committed */ RDMA_REMOVE_ABORT, + /* The driver failed to destroy the uobject and is being disconnected */ + RDMA_REMOVE_DRIVER_FAILURE, }; struct ib_rdmacg_object { @@ -1483,8 +1485,6 @@ struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; - bool cleanup_retryable; - struct ib_rdmacg_object cg_obj; /* * Implementation details of the RDMA core, don't use in drivers: @@ -2903,46 +2903,6 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, return ib_is_buffer_cleared(udata->inbuf + offset, len); } -/** - * ib_is_destroy_retryable - Check whether the uobject destruction - * is retryable. - * @ret: The initial destruction return code - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * This function is a helper function that IB layer and low-level drivers - * can use to consider whether the destruction of the given uobject is - * retry-able. - * It checks the original return code, if it wasn't success the destruction - * is retryable according to the ucontext state (i.e. cleanup_retryable) and - * the remove reason. (i.e. why). - * Must be called with the object locked for destroy. - */ -static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - return ret && (why == RDMA_REMOVE_DESTROY || - uobj->context->cleanup_retryable); -} - -/** - * ib_destroy_usecnt - Called during destruction to check the usecnt - * @usecnt: The usecnt atomic - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * Non-zero usecnts will block destruction unless destruction was triggered by - * a ucontext cleanup. - */ -static inline int ib_destroy_usecnt(atomic_t *usecnt, - enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) - return -EBUSY; - return 0; -} - /** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for From c5633a72a1b8a2740bdb1495eab010f1124fd5ee Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Nov 2020 16:45:56 +0200 Subject: [PATCH 058/147] RDMA/core: Make FD destroy callback void All FD object destroy implementations return 0, so declare this callback void. Link: https://lore.kernel.org/r/20201104144556.3809085-3-leon@kernel.org Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 3 ++- drivers/infiniband/core/uverbs_std_types.c | 3 +-- drivers/infiniband/core/uverbs_std_types_async_fd.c | 5 ++--- drivers/infiniband/hw/mlx5/devx.c | 10 ++++------ include/rdma/uverbs_types.h | 4 ++-- 5 files changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 61fa0a3f8e5eed..c44079b9158e14 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -571,7 +571,8 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); - return fd_type->destroy_object(uobj, why); + fd_type->destroy_object(uobj, why); + return 0; } static void remove_handle_fd_uobject(struct ib_uobject *uobj) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 585042ead93928..13776a66e2e43a 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -154,7 +154,7 @@ void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue) spin_unlock_irq(&event_queue->lock); } -static int +static void uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, enum rdma_remove_reason why) { @@ -163,7 +163,6 @@ uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, uobj); ib_uverbs_free_event_queue(&file->ev_queue); - return 0; } int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c index 61899eaf1f91ff..cc24cfdf7aee66 100644 --- a/drivers/infiniband/core/uverbs_std_types_async_fd.c +++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c @@ -19,8 +19,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)( return 0; } -static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct ib_uverbs_async_event_file *event_file = container_of(uobj, struct ib_uverbs_async_event_file, uobj); @@ -30,7 +30,6 @@ static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, if (why == RDMA_REMOVE_DRIVER_REMOVE) ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL, NULL, NULL); - return 0; } int uverbs_async_event_release(struct inode *inode, struct file *filp) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index c315dbe0b6bd09..f6839472b6fb5e 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2594,8 +2594,8 @@ static const struct file_operations devx_async_event_fops = { .llseek = no_llseek, }; -static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_cmd_event_file *comp_ev_file = container_of(uobj, struct devx_async_cmd_event_file, @@ -2617,11 +2617,10 @@ static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, kvfree(entry); } spin_unlock_irq(&comp_ev_file->ev_queue.lock); - return 0; }; -static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_event_file *ev_file = container_of(uobj, struct devx_async_event_file, @@ -2665,7 +2664,6 @@ static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, mutex_unlock(&dev->devx_event_table.event_xa_lock); put_device(&dev->ib_dev.dev); - return 0; }; DECLARE_UVERBS_NAMED_METHOD( diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 06db27e35f405a..a27e9fb4903f3d 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -138,8 +138,8 @@ struct uverbs_obj_fd_type { * because the driver is removed or the FD is closed. */ struct uverbs_obj_type type; - int (*destroy_object)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + void (*destroy_object)(struct ib_uobject *uobj, + enum rdma_remove_reason why); const struct file_operations *fops; const char *name; int flags; From fbb7dc5db6dee553b5a07c27e86364a5223e244c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 22:12:30 +0100 Subject: [PATCH 059/147] RDMa/mthca: Work around -Wenum-conversion warning gcc points out a suspicious mixing of enum types in a function that converts from MTHCA_OPCODE_* values to IB_WC_* values: drivers/infiniband/hw/mthca/mthca_cq.c: In function 'mthca_poll_one': drivers/infiniband/hw/mthca/mthca_cq.c:607:21: warning: implicit conversion from 'enum ' to 'enum ib_wc_opcode' [-Wenum-conversion] 607 | entry->opcode = MTHCA_OPCODE_INVALID; Nothing seems to ever check for MTHCA_OPCODE_INVALID again, no idea if this is meaningful, but it seems harmless as it deals with an invalid input. Remove MTHCA_OPCODE_INVALID and set the ib_wc_opcode to 0xFF, which is still bogus, but at least doesn't make compiler warnings. Fixes: 2a4443a69934 ("[PATCH] IB/mthca: fill in opcode field for send completions") Link: https://lore.kernel.org/r/20201026211311.3887003-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mthca/mthca_cq.c | 2 +- drivers/infiniband/hw/mthca/mthca_dev.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index c3cfea243af8c1..36416f937b696c 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -604,7 +604,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; default: - entry->opcode = MTHCA_OPCODE_INVALID; + entry->opcode = 0xFF; break; } } else { diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 9dbbf4d16796a4..a445160de3e16c 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -105,7 +105,6 @@ enum { MTHCA_OPCODE_ATOMIC_CS = 0x11, MTHCA_OPCODE_ATOMIC_FA = 0x12, MTHCA_OPCODE_BIND_MW = 0x18, - MTHCA_OPCODE_INVALID = 0xff }; enum { From b116c702791a9834e6485f67ca6267d9fdf59b87 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:33 +0100 Subject: [PATCH 060/147] RDMA/umem: Use ib_dma_max_seg_size instead of dma_get_max_seg_size RDMA ULPs must not call DMA mapping APIs directly but instead use the ib_dma_* wrappers. Fixes: 0c16d9635e3a ("RDMA/umem: Move to allocate SG table from pages") Link: https://lore.kernel.org/r/20201106181941.1878556-3-hch@lst.de Reported-by: Jason Gunthorpe Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index f1fc7e39c782fb..7ca4112e3e8f7f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -229,10 +229,10 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, cur_base += ret * PAGE_SIZE; npages -= ret; - sg = __sg_alloc_table_from_pages( - &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, - dma_get_max_seg_size(device->dma_device), sg, npages, - GFP_KERNEL); + sg = __sg_alloc_table_from_pages(&umem->sg_head, page_list, ret, + 0, ret << PAGE_SHIFT, + ib_dma_max_seg_size(device), sg, npages, + GFP_KERNEL); umem->sg_nents = umem->sg_head.nents; if (IS_ERR(sg)) { unpin_user_pages_dirty_lock(page_list, ret, 0); From 2af29468e3b3793b49f6c4385d2cabcea43fe076 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:37 +0100 Subject: [PATCH 061/147] RDMA/core: Remove ib_dma_{alloc,free}_coherent These two functions are entirely unused. Link: https://lore.kernel.org/r/20201106181941.1878556-7-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7e330f4a6d33ff..a0f9b1c133e4af 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4061,35 +4061,6 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, dma_sync_single_for_device(dev->dma_device, addr, size, dir); } -/** - * ib_dma_alloc_coherent - Allocate memory and map it for DMA - * @dev: The device for which the DMA address is requested - * @size: The size of the region to allocate in bytes - * @dma_handle: A pointer for returning the DMA address of the region - * @flag: memory allocator flags - */ -static inline void *ib_dma_alloc_coherent(struct ib_device *dev, - size_t size, - dma_addr_t *dma_handle, - gfp_t flag) -{ - return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); -} - -/** - * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() - * @dev: The device for which the DMA addresses were allocated - * @size: The size of the region - * @cpu_addr: the address returned by ib_dma_alloc_coherent() - * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() - */ -static inline void ib_dma_free_coherent(struct ib_device *dev, - size_t size, void *cpu_addr, - dma_addr_t dma_handle) -{ - dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); -} - /* ib_reg_user_mr - register a memory region for virtual addresses from kernel * space. This function should be called when 'current' is the owning MM. */ From 8ecfca68dc4cbee1272a0161e3f2fb9387dc6930 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:34 +0100 Subject: [PATCH 062/147] RDMA: Lift ibdev_to_node from rds to common code Lift the ibdev_to_node from rds to common code and document it. Link: https://lore.kernel.org/r/20201106181941.1878556-4-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 13 +++++++++++++ net/rds/ib.h | 7 ------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a0f9b1c133e4af..3feb42ef82dc2f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4549,6 +4549,19 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) return coredev->owner; } +/** + * ibdev_to_node - return the NUMA node for a given ib_device + * @dev: device to get the NUMA node for. + */ +static inline int ibdev_to_node(struct ib_device *ibdev) +{ + struct device *parent = ibdev->dev.parent; + + if (!parent) + return NUMA_NO_NODE; + return dev_to_node(parent); +} + /** * rdma_device_to_drv_device - Helper macro to reach back to driver's * ib_device holder structure from device pointer. diff --git a/net/rds/ib.h b/net/rds/ib.h index 8dfff43cf07f46..c23a11d9ad3628 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -264,13 +264,6 @@ struct rds_ib_device { int *vector_load; }; -static inline int ibdev_to_node(struct ib_device *ibdev) -{ - struct device *parent; - - parent = ibdev->dev.parent; - return parent ? dev_to_node(parent) : NUMA_NO_NODE; -} #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev) /* bits for i_ack_flags */ From 22dd4c707673129ed17e803b4bf68a567b2731db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:35 +0100 Subject: [PATCH 063/147] nvme-rdma: Use ibdev_to_node instead of dereferencing ->dma_device ->dma_device is a private implementation detail of the RDMA core. Use the ibdev_to_node helper to get the NUMA node for a ib_device instead of poking into ->dma_device. Link: https://lore.kernel.org/r/20201106181941.1878556-5-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index aad829a2b50d0f..9a6957b4b70098 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -854,7 +854,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, return error; ctrl->device = ctrl->queues[0].device; - ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device); + ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev); /* T10-PI support */ if (ctrl->device->dev->attrs.device_cap_flags & From 7af80c02c7b3cf7ac580a33f15d155730574769f Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Thu, 12 Nov 2020 19:29:42 +0800 Subject: [PATCH 064/147] RDMA/hns: Fix double free of the pointer to TSQ/TPQ A return statement is omitted after getting HEM table, then the newly allocated pointer will be freed directly, which will cause a calltrace when the driver was removed. Fixes: d6d91e46210f ("RDMA/hns: Add support for configuring GMV table") Link: https://lore.kernel.org/r/1605180582-46504-1-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 83842cdfe906c5..4d697e408807c1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2582,6 +2582,8 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) if (ret) goto err_get_hem_table_failed; + return 0; + err_get_hem_table_failed: hns_roce_free_link_table(hr_dev, &priv->tpq); From 8bc205eff3056a9ed3850937cdd2192f05e6d3cb Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Nov 2020 16:40:05 +0200 Subject: [PATCH 065/147] RDMA/counter: Combine allocation and bind logic RDMA counters are allocated and bounded to QP immediately after that. Only after this two step process they are really usable. By combining the logic, we are ensuring that once counter is returned to the caller, it will have everything set. Link: https://lore.kernel.org/r/20201104144008.3808124-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 136 +++++++++++++---------------- 1 file changed, 62 insertions(+), 74 deletions(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index e4ff0d3328b6c8..2c67ba6a272579 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -64,8 +64,40 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, return ret; } -static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, - enum rdma_nl_counter_mode mode) +static void auto_mode_init_counter(struct rdma_counter *counter, + const struct ib_qp *qp, + enum rdma_nl_counter_mask new_mask) +{ + struct auto_mode_param *param = &counter->mode.param; + + counter->mode.mode = RDMA_COUNTER_MODE_AUTO; + counter->mode.mask = new_mask; + + if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) + param->qp_type = qp->qp_type; +} + +static int __rdma_counter_bind_qp(struct rdma_counter *counter, + struct ib_qp *qp) +{ + int ret; + + if (qp->counter) + return -EINVAL; + + if (!qp->device->ops.counter_bind_qp) + return -EOPNOTSUPP; + + mutex_lock(&counter->lock); + ret = qp->device->ops.counter_bind_qp(counter, qp); + mutex_unlock(&counter->lock); + + return ret; +} + +static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port, + struct ib_qp *qp, + enum rdma_nl_counter_mode mode) { struct rdma_port_counter *port_counter; struct rdma_counter *counter; @@ -88,11 +120,22 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, port_counter = &dev->port_data[port].port_counter; mutex_lock(&port_counter->lock); - if (mode == RDMA_COUNTER_MODE_MANUAL) { + switch (mode) { + case RDMA_COUNTER_MODE_MANUAL: ret = __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_MANUAL, 0); - if (ret) + if (ret) { + mutex_unlock(&port_counter->lock); goto err_mode; + } + break; + case RDMA_COUNTER_MODE_AUTO: + auto_mode_init_counter(counter, qp, port_counter->mode.mask); + break; + default: + ret = -EOPNOTSUPP; + mutex_unlock(&port_counter->lock); + goto err_mode; } port_counter->num_counters++; @@ -102,10 +145,15 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, kref_init(&counter->kref); mutex_init(&counter->lock); + ret = __rdma_counter_bind_qp(counter, qp); + if (ret) + goto err_mode; + + rdma_restrack_parent_name(&counter->res, &qp->res); + rdma_restrack_add(&counter->res); return counter; err_mode: - mutex_unlock(&port_counter->lock); kfree(counter->stats); err_stats: rdma_restrack_put(&counter->res); @@ -132,19 +180,6 @@ static void rdma_counter_free(struct rdma_counter *counter) kfree(counter); } -static void auto_mode_init_counter(struct rdma_counter *counter, - const struct ib_qp *qp, - enum rdma_nl_counter_mask new_mask) -{ - struct auto_mode_param *param = &counter->mode.param; - - counter->mode.mode = RDMA_COUNTER_MODE_AUTO; - counter->mode.mask = new_mask; - - if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) - param->qp_type = qp->qp_type; -} - static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, enum rdma_nl_counter_mask auto_mask) { @@ -161,24 +196,6 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, return match; } -static int __rdma_counter_bind_qp(struct rdma_counter *counter, - struct ib_qp *qp) -{ - int ret; - - if (qp->counter) - return -EINVAL; - - if (!qp->device->ops.counter_bind_qp) - return -EOPNOTSUPP; - - mutex_lock(&counter->lock); - ret = qp->device->ops.counter_bind_qp(counter, qp); - mutex_unlock(&counter->lock); - - return ret; -} - static int __rdma_counter_unbind_qp(struct ib_qp *qp) { struct rdma_counter *counter = qp->counter; @@ -247,13 +264,6 @@ static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, return counter; } -static void rdma_counter_res_add(struct rdma_counter *counter, - struct ib_qp *qp) -{ - rdma_restrack_parent_name(&counter->res, &qp->res); - rdma_restrack_add(&counter->res); -} - static void counter_release(struct kref *kref) { struct rdma_counter *counter; @@ -293,19 +303,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) return ret; } } else { - counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); + counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO); if (!counter) return -ENOMEM; - - auto_mode_init_counter(counter, qp, port_counter->mode.mask); - - ret = __rdma_counter_bind_qp(counter, qp); - if (ret) { - rdma_counter_free(counter); - return ret; - } - - rdma_counter_res_add(counter, qp); } return 0; @@ -419,15 +419,6 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) return NULL; } -static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, - struct ib_qp *qp) -{ - if ((counter->device != qp->device) || (counter->port != qp->port)) - return -EINVAL; - - return __rdma_counter_bind_qp(counter, qp); -} - static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, u32 counter_id) { @@ -475,7 +466,12 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, goto err_task; } - ret = rdma_counter_bind_qp_manual(counter, qp); + if ((counter->device != qp->device) || (counter->port != qp->port)) { + ret = -EINVAL; + goto err_task; + } + + ret = __rdma_counter_bind_qp(counter, qp); if (ret) goto err_task; @@ -520,26 +516,18 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, goto err; } - counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); + counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL); if (!counter) { ret = -ENOMEM; goto err; } - ret = rdma_counter_bind_qp_manual(counter, qp); - if (ret) - goto err_bind; - if (counter_id) *counter_id = counter->id; - rdma_counter_res_add(counter, qp); - rdma_restrack_put(&qp->res); - return ret; + return 0; -err_bind: - rdma_counter_free(counter); err: rdma_restrack_put(&qp->res); return ret; From 0413755c95e76972451ac8433151bc368a065a3f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Nov 2020 16:40:06 +0200 Subject: [PATCH 066/147] RDMA/restrack: Store all special QPs in restrack DB Special QPs (SMI and GSI) have different rules in regards of their QP numbers. While all other QP numbers are unique per-device, the QP0 and QP1 are created per-port as requested by IBTA. In multiple port devices, the number of SMI and GSI QPs with be equal to the number ports. $ rdma dev 0: ibp0s9: node_type ca fw 4.4.9999 node_guid 5254:00c0:fe12:3455 sys_image_guid 5254:00c0:fe12:3455 $ rdma link 0/1: ibp0s9/1: subnet_prefix fe80:0000:0000:0000 lid 13397 sm_lid 49151 lmc 0 state ACTIVE physical_state LINK_UP 0/2: ibp0s9/2: subnet_prefix fe80:0000:0000:0000 lid 13397 sm_lid 49151 lmc 0 state UNKNOWN physical_state UNKNOWN Before: $ rdma res show qp type SMI,GSI link ibp0s9/1 lqpn 0 type SMI state RTS sq-psn 0 comm [ib_core] link ibp0s9/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core] After: $ rdma res show qp type SMI,GSI link ibp0s9/1 lqpn 0 type SMI state RTS sq-psn 0 comm [ib_core] link ibp0s9/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core] link ibp0s9/2 lqpn 0 type SMI state RTS sq-psn 0 comm [ib_core] link ibp0s9/2 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core] Link: https://lore.kernel.org/r/20201104144008.3808124-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 2 ++ drivers/infiniband/core/restrack.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index e84b0fedaacb83..7c4752c47f80ef 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -347,6 +347,8 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->srq = attr->srq; qp->rwq_ind_tbl = attr->rwq_ind_tbl; qp->event_handler = attr->event_handler; + qp->qp_type = attr->qp_type; + qp->port = attr->port_num; atomic_set(&qp->usecnt, 0); spin_lock_init(&qp->mr_lock); diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 4aeeaaed0f17dd..a123b22b454a3c 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -232,8 +232,15 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) /* Special case to ensure that LQPN points to right QP */ struct ib_qp *qp = container_of(res, struct ib_qp, res); - ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL); - res->id = ret ? 0 : qp->qp_num; + WARN_ONCE(qp->qp_num >> 24 || qp->port >> 8, + "QP number 0x%0X and port 0x%0X", qp->qp_num, + qp->port); + res->id = qp->qp_num; + if (qp->qp_type == IB_QPT_SMI || qp->qp_type == IB_QPT_GSI) + res->id |= qp->port << 24; + ret = xa_insert(&rt->xa, res->id, res, GFP_KERNEL); + if (ret) + res->id = 0; } else if (res->type == RDMA_RESTRACK_COUNTER) { /* Special case to ensure that cntn points to right counter */ struct rdma_counter *counter; From c80a0c52d85c49a910d0dc0e342e8d8898677dc0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Nov 2020 16:40:07 +0200 Subject: [PATCH 067/147] RDMA/cma: Add missing error handling of listen_id Don't silently continue if rdma_listen() fails but destroy previously created CM_ID and return an error to the caller. Fixes: d02d1f5359e7 ("RDMA/cma: Fix deadlock destroying listen requests") Link: https://lore.kernel.org/r/20201104144008.3808124-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 175 ++++++++++++++++++++-------------- 1 file changed, 101 insertions(+), 74 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 7c2ab1f2fbea37..89cc4b0a48ecc5 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2495,8 +2495,8 @@ static int cma_listen_handler(struct rdma_cm_id *id, return id_priv->id.event_handler(id, event); } -static void cma_listen_on_dev(struct rdma_id_private *id_priv, - struct cma_device *cma_dev) +static int cma_listen_on_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev) { struct rdma_id_private *dev_id_priv; struct net *net = id_priv->id.route.addr.dev_addr.net; @@ -2505,13 +2505,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, lockdep_assert_held(&lock); if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) - return; + return 0; dev_id_priv = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type, id_priv); if (IS_ERR(dev_id_priv)) - return; + return PTR_ERR(dev_id_priv); dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), @@ -2527,19 +2527,34 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) - dev_warn(&cma_dev->device->dev, - "RDMA CMA: cma_listen_on_dev, error %d\n", ret); + goto err_listen; + return 0; +err_listen: + list_del(&id_priv->listen_list); + dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret); + rdma_destroy_id(&dev_id_priv->id); + return ret; } -static void cma_listen_on_all(struct rdma_id_private *id_priv) +static int cma_listen_on_all(struct rdma_id_private *id_priv) { struct cma_device *cma_dev; + int ret; mutex_lock(&lock); list_add_tail(&id_priv->list, &listen_any_list); - list_for_each_entry(cma_dev, &dev_list, list) - cma_listen_on_dev(id_priv, cma_dev); + list_for_each_entry(cma_dev, &dev_list, list) { + ret = cma_listen_on_dev(id_priv, cma_dev); + if (ret) + goto err_listen; + } + mutex_unlock(&lock); + return 0; + +err_listen: + list_del(&id_priv->list); mutex_unlock(&lock); + return ret; } void rdma_set_service_type(struct rdma_cm_id *id, int tos) @@ -3692,8 +3707,11 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) ret = -ENOSYS; goto err; } - } else - cma_listen_on_all(id_priv); + } else { + ret = cma_listen_on_all(id_priv); + if (ret) + goto err; + } return 0; err: @@ -4745,69 +4763,6 @@ static struct notifier_block cma_nb = { .notifier_call = cma_netdev_callback }; -static int cma_add_one(struct ib_device *device) -{ - struct cma_device *cma_dev; - struct rdma_id_private *id_priv; - unsigned int i; - unsigned long supported_gids = 0; - int ret; - - cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); - if (!cma_dev) - return -ENOMEM; - - cma_dev->device = device; - cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, - sizeof(*cma_dev->default_gid_type), - GFP_KERNEL); - if (!cma_dev->default_gid_type) { - ret = -ENOMEM; - goto free_cma_dev; - } - - cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, - sizeof(*cma_dev->default_roce_tos), - GFP_KERNEL); - if (!cma_dev->default_roce_tos) { - ret = -ENOMEM; - goto free_gid_type; - } - - rdma_for_each_port (device, i) { - supported_gids = roce_gid_type_mask_support(device, i); - WARN_ON(!supported_gids); - if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) - cma_dev->default_gid_type[i - rdma_start_port(device)] = - CMA_PREFERRED_ROCE_GID_TYPE; - else - cma_dev->default_gid_type[i - rdma_start_port(device)] = - find_first_bit(&supported_gids, BITS_PER_LONG); - cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; - } - - init_completion(&cma_dev->comp); - refcount_set(&cma_dev->refcount, 1); - INIT_LIST_HEAD(&cma_dev->id_list); - ib_set_client_data(device, &cma_client, cma_dev); - - mutex_lock(&lock); - list_add_tail(&cma_dev->list, &dev_list); - list_for_each_entry(id_priv, &listen_any_list, list) - cma_listen_on_dev(id_priv, cma_dev); - mutex_unlock(&lock); - - trace_cm_add_one(device); - return 0; - -free_gid_type: - kfree(cma_dev->default_gid_type); - -free_cma_dev: - kfree(cma_dev); - return ret; -} - static void cma_send_device_removal_put(struct rdma_id_private *id_priv) { struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; @@ -4870,6 +4825,78 @@ static void cma_process_remove(struct cma_device *cma_dev) wait_for_completion(&cma_dev->comp); } +static int cma_add_one(struct ib_device *device) +{ + struct cma_device *cma_dev; + struct rdma_id_private *id_priv; + unsigned int i; + unsigned long supported_gids = 0; + int ret; + + cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL); + if (!cma_dev) + return -ENOMEM; + + cma_dev->device = device; + cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_gid_type), + GFP_KERNEL); + if (!cma_dev->default_gid_type) { + ret = -ENOMEM; + goto free_cma_dev; + } + + cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_roce_tos), + GFP_KERNEL); + if (!cma_dev->default_roce_tos) { + ret = -ENOMEM; + goto free_gid_type; + } + + rdma_for_each_port (device, i) { + supported_gids = roce_gid_type_mask_support(device, i); + WARN_ON(!supported_gids); + if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) + cma_dev->default_gid_type[i - rdma_start_port(device)] = + CMA_PREFERRED_ROCE_GID_TYPE; + else + cma_dev->default_gid_type[i - rdma_start_port(device)] = + find_first_bit(&supported_gids, BITS_PER_LONG); + cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; + } + + init_completion(&cma_dev->comp); + refcount_set(&cma_dev->refcount, 1); + INIT_LIST_HEAD(&cma_dev->id_list); + ib_set_client_data(device, &cma_client, cma_dev); + + mutex_lock(&lock); + list_add_tail(&cma_dev->list, &dev_list); + list_for_each_entry(id_priv, &listen_any_list, list) { + ret = cma_listen_on_dev(id_priv, cma_dev); + if (ret) + goto free_listen; + } + mutex_unlock(&lock); + + trace_cm_add_one(device); + return 0; + +free_listen: + list_del(&cma_dev->list); + mutex_unlock(&lock); + + cma_process_remove(cma_dev); + kfree(cma_dev->default_roce_tos); +free_gid_type: + kfree(cma_dev->default_gid_type); + +free_cma_dev: + kfree(cma_dev); + return ret; +} + static void cma_remove_one(struct ib_device *device, void *client_data) { struct cma_device *cma_dev = client_data; From 31e2daa17ed9684e73a1e8e5080b70b0230bf59a Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Thu, 12 Nov 2020 21:19:44 +0800 Subject: [PATCH 068/147] RDMA/hns: Add new PCI device ID matching for HIP09 The 200G device has a new device ID 0xA228, add it to the PCI table. Link: https://lore.kernel.org/r/1605187184-26079-1-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4d697e408807c1..4b8291263e5cd0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6327,6 +6327,7 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA), 0}, /* required last entry */ {0, } }; From 8c030d780ad279d710dc912050e593aea32f5514 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 15 Nov 2020 12:34:02 +0200 Subject: [PATCH 069/147] RDMA/efa: Remove .create_ah callback assignment Drivers now expose two callbacks for address handle creation, one for uverbs and one for kverbs. EFA only supports uverbs so the .create_ah assignment can be removed. Fix the core code caller to check the proper function pointer. Link: https://lore.kernel.org/r/20201115103404.48829-3-galpress@amazon.com Signed-off-by: Gal Pressman Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 2 +- drivers/infiniband/hw/efa/efa_main.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index ab1e6048685ee5..33778f8674a145 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -516,7 +516,7 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); - if (!device->ops.create_ah) + if (!udata && !device->ops.create_ah) return ERR_PTR(-EOPNOTSUPP); ah = rdma_zalloc_drv_obj_gfp( diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 2b3da85fb43c98..cb2f2c647ee565 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -245,7 +245,6 @@ static const struct ib_device_ops efa_dev_ops = { .alloc_hw_stats = efa_alloc_hw_stats, .alloc_pd = efa_alloc_pd, .alloc_ucontext = efa_alloc_ucontext, - .create_ah = efa_create_ah, .create_cq = efa_create_cq, .create_qp = efa_create_qp, .create_user_ah = efa_create_ah, From b045db62f6f61c2f0f993696abe620379db34163 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 15 Nov 2020 13:43:05 +0200 Subject: [PATCH 070/147] RDMA/mlx5: Use ib_umem_find_best_pgoff() for SRQ SRQ uses a quantized and scaled page_offset, which is another variation of ib_umem_find_best_pgsz(). Add mlx5_umem_find_best_quantized_pgoff() to perform this calculation for each mailbox. A macro shows how the calculation is directly connected to the mailbox format. This new routine replaces the limited mlx5_ib_cont_pages() and mlx5_ib_get_buf_offset() pairing which would reject valid configurations rather than adjust the page_size to make it work. In turn this is much more aggressive about choosing large page sizes for these objects and when THP is enabled it will now often find a single page solution. Link: https://lore.kernel.org/r/20201115114311.136250-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mem.c | 45 ++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 31 +++++++++++++++++++ drivers/infiniband/hw/mlx5/srq.c | 20 ++++++------- include/rdma/ib_umem.h | 42 ++++++++++++++++++++++++++ 4 files changed, 128 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 92e7621ec858b1..fd9778113d265a 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -107,6 +107,51 @@ void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, } } +/* + * Compute the page shift and page_offset for mailboxes that use a quantized + * page_offset. The granulatity of the page offset scales according to page + * size. + */ +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized) +{ + const u64 page_offset_mask = (1 << page_offset_bits) - 1; + unsigned long page_size; + u64 page_offset; + + page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask); + if (!page_size) + return 0; + + /* + * page size is the largest possible page size. + * + * Reduce the page_size, and thus the page_offset and quanta, until the + * page_offset fits into the mailbox field. Once page_size < scale this + * loop is guaranteed to terminate. + */ + page_offset = ib_umem_dma_offset(umem, page_size); + while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) { + page_size /= 2; + page_offset = ib_umem_dma_offset(umem, page_size); + } + + /* + * The address is not aligned, or otherwise cannot be represented by the + * page_offset. + */ + if (!(pgsz_bitmap & page_size)) + return 0; + + *page_offset_quantized = + (unsigned long)page_offset / (page_size / scale); + if (WARN_ON(*page_offset_quantized > page_offset_mask)) + return 0; + return page_size; +} + int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) { u64 page_size; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bb44080170be8c..2f08a5b4a43856 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -69,6 +69,37 @@ __mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits, pgsz_shift), \ iova) +static __always_inline unsigned long +__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits, + unsigned int offset_shift) +{ + unsigned int largest_offset_shift = + min_t(unsigned long, page_offset_bits - 1 + offset_shift, + BITS_PER_LONG - 1); + + return GENMASK(largest_offset_shift, offset_shift); +} + +/* + * QP/CQ/WQ/etc type commands take a page offset that satisifies: + * page_offset_quantized * (page_size/scale) = page_offset + * Which restricts allowed page sizes to ones that satisify the above. + */ +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized); +#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), \ + GENMASK(31, order_base_2(scale)), scale, \ + page_offset_quantized) + enum { MLX5_IB_MMAP_OFFSET_START = 9, MLX5_IB_MMAP_OFFSET_END = 255, diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index cb5ad045da9abc..7dfdc9e548661c 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -51,8 +51,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata, struct mlx5_ib_ucontext, ibucontext); size_t ucmdlen; int err; - int page_shift; - u32 offset; + unsigned int page_offset_quantized; + unsigned int page_size; u32 uidx = MLX5_IB_DEFAULT_UIDX; ucmdlen = min(udata->inlen, sizeof(ucmd)); @@ -85,22 +85,22 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return err; } - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &page_shift); - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, - &offset); - if (err) { + page_size = mlx5_umem_find_best_quantized_pgoff( + srq->umem, srqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { mlx5_ib_warn(dev, "bad offset\n"); goto err_umem; } - in->pas = kvcalloc(ib_umem_num_dma_blocks(srq->umem, 1UL << page_shift), + in->pas = kvcalloc(ib_umem_num_dma_blocks(srq->umem, page_size), sizeof(*in->pas), GFP_KERNEL); if (!in->pas) { err = -ENOMEM; goto err_umem; } - mlx5_ib_populate_pas(srq->umem, 1UL << page_shift, in->pas, 0); + mlx5_ib_populate_pas(srq->umem, page_size, in->pas, 0); err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); if (err) { @@ -108,8 +108,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, goto err_in; } - in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->page_offset = offset; + in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; + in->page_offset = page_offset_quantized; in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 70597508c7656b..7752211c96384f 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -34,6 +34,13 @@ static inline int ib_umem_offset(struct ib_umem *umem) return umem->address & ~PAGE_MASK; } +static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem, + unsigned long pgsz) +{ + return (sg_dma_address(umem->sg_head.sgl) + ib_umem_offset(umem)) & + (pgsz - 1); +} + static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem, unsigned long pgsz) { @@ -79,6 +86,35 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt); +/** + * ib_umem_find_best_pgoff - Find best HW page size + * + * @umem: umem struct + * @pgsz_bitmap bitmap of HW supported page sizes + * @pgoff_bitmask: Mask of bits that can be represented with an offset + * + * This is very similar to ib_umem_find_best_pgsz() except instead of accepting + * an IOVA it accepts a bitmask specifying what address bits can be represented + * with a page offset. + * + * For instance if the HW has multiple page sizes, requires 64 byte alignemnt, + * and can support aligned offsets up to 4032 then pgoff_bitmask would be + * "111111000000". + * + * If the pgoff_bitmask requires either alignment in the low bit or an + * unavailable page size for the high bits, this function returns 0. + */ +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + struct scatterlist *sg = umem->sg_head.sgl; + dma_addr_t dma_addr; + + dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK); + return ib_umem_find_best_pgsz(umem, pgsz_bitmap, + dma_addr & pgoff_bitmask); +} #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -101,6 +137,12 @@ static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, { return 0; } +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + return 0; +} #endif /* CONFIG_INFINIBAND_USER_MEM */ From ad480ea5d6ea605bf63d23f5f91447d5cac3c95e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 15 Nov 2020 13:43:06 +0200 Subject: [PATCH 071/147] RDMA/mlx5: Use mlx5_umem_find_best_quantized_pgoff() for WQ This fixes a subtle bug, the WQ mailbox has only 5 bits to describe the page_offset, while mlx5_ib_get_buf_offset() is hard wired to only work with 6 bit page_offsets. Thus it did not properly reject badly aligned buffers. Fixes: 79b20a6c3014 ("IB/mlx5: Add receive Work Queue verbs") Fixes: 0fb2ed66a14c ("IB/mlx5: Add create and destroy functionality for Raw Packet QP") Link: https://lore.kernel.org/r/20201115114311.136250-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 53 +++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index cfabcd4d46b691..125dd00cc27d2d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -833,7 +833,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - int page_shift = 0; + unsigned long page_size = 0; u32 offset = 0; int err; @@ -847,24 +847,25 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &page_shift); - err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, - &rwq->rq_page_offset); - if (err) { + page_size = mlx5_umem_find_best_quantized_pgoff( + rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &rwq->rq_page_offset); + if (!page_size) { mlx5_ib_warn(dev, "bad offset\n"); + err = -EINVAL; goto err_umem; } - rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, 1UL << page_shift); - rwq->page_shift = page_shift; - rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size); + rwq->page_shift = order_base_2(page_size); + rwq->log_page_size = rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT; rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); mlx5_ib_dbg( dev, - "addr 0x%llx, size %zd, npages %zu, page_shift %d, ncont %d, offset %d\n", + "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n", (unsigned long long)ucmd->buf_addr, rwq->buf_size, - ib_umem_num_pages(rwq->umem), page_shift, rwq->rq_num_pas, + ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas, offset); err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db); @@ -1209,17 +1210,24 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, void *wq; int inlen; int err; - int page_shift = 0; - u32 offset = 0; - - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, - &sq->ubuffer.umem, &page_shift, &offset); - if (err) - return err; + unsigned int page_offset_quantized; + unsigned long page_size; + + sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(sq->ubuffer.umem)) + return PTR_ERR(sq->ubuffer.umem); + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } inlen = MLX5_ST_SZ_BYTES(create_sq_in) + - sizeof(u64) * ib_umem_num_dma_blocks(sq->ubuffer.umem, - 1UL << page_shift); + sizeof(u64) * + ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1247,11 +1255,12 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size)); - MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(wq, wq, page_offset, offset); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(sq->ubuffer.umem, 1UL << page_shift, pas, 0); + mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0); err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp); From 7579dcdf737d76a28243add75dc78babf09e29fa Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 15 Nov 2020 13:43:07 +0200 Subject: [PATCH 072/147] RDMA/mlx5: Directly compute the PAS list for raw QP RQ's The RQ WQ created when making a raw ethernet QP copies the PAS list from a dummy QPC command created earlier in the flow. The WQC and QPC PAS lists are not fully compatible as the page_offset is a different size. Create the RQ WQ's PAS list directly and do not try to copy it from another command structure. Like the prior patch, this also means that badly aligned buffers were not correctly rejected. Link: https://lore.kernel.org/r/20201115114311.136250-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 41 +++++++++++++-------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 125dd00cc27d2d..fefe8af6eced59 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1286,40 +1286,31 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, ib_umem_release(sq->ubuffer.umem); } -static size_t get_rq_pas_size(void *qpc) -{ - u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12; - u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride); - u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size); - u32 page_offset = MLX5_GET(qpc, qpc, page_offset); - u32 po_quanta = 1 << (log_page_size - 6); - u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride); - u32 page_size = 1 << log_page_size; - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; - - return rq_num_pas * sizeof(u64); -} - static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - size_t qpinlen, struct ib_pd *pd) + struct ib_pd *pd) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; - __be64 *qp_pas; void *in; void *rqc; void *wq; void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); - size_t rq_pas_size = get_rq_pas_size(qpc); + struct ib_umem *umem = rq->base.ubuffer.umem; + unsigned int page_offset_quantized; + unsigned long page_size = 0; size_t inlen; int err; - if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas)) + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, + MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, + &page_offset_quantized); + if (!page_size) return -EINVAL; - inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size; + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1341,16 +1332,16 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING) MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset)); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4); - MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size)); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size)); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas); - memcpy(pas, qp_pas, rq_pas_size); + mlx5_ib_populate_pas(umem, page_size, pas, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp); @@ -1471,7 +1462,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd); + err = create_raw_packet_qp_rq(dev, rq, in, pd); if (err) goto err_destroy_sq; From a59b7b05efc827929c2aa46fc2fe561982bd19fc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 15 Nov 2020 13:43:08 +0200 Subject: [PATCH 073/147] RDMA/mlx5: Use mlx5_umem_find_best_quantized_pgoff() for QP Delete custom logic in the QP in favor of more general variant. Link: https://lore.kernel.org/r/20201115114311.136250-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mem.c | 21 -------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - drivers/infiniband/hw/mlx5/qp.c | 71 +++++++++------------------- 3 files changed, 23 insertions(+), 70 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index fd9778113d265a..ad9830eeb4b6aa 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -152,27 +152,6 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff( return page_size; } -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) -{ - u64 page_size; - u64 page_mask; - u64 off_size; - u64 off_mask; - u64 buf_off; - - page_size = (u64)1 << page_shift; - page_mask = page_size - 1; - buf_off = addr & page_mask; - off_size = page_size >> 6; - off_mask = off_size - 1; - - if (buf_off & off_mask) - return -EINVAL; - - *offset = buf_off >> ilog2(off_size); - return 0; -} - #define WR_ID_BF 0xBF #define WR_ID_END 0xBAD #define TEST_WC_NUM_WQES 255 diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2f08a5b4a43856..555bc94d478692 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1267,7 +1267,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, struct ib_smp *out_mad); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index fefe8af6eced59..1ff156cfb5b14b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -778,39 +778,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, return bfregi->sys_pages[index_of_sys_page] + offset; } -static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, - unsigned long addr, size_t size, - struct ib_umem **umem, int *page_shift, - u32 *offset) -{ - int err; - - *umem = ib_umem_get(&dev->ib_dev, addr, size, 0); - if (IS_ERR(*umem)) { - mlx5_ib_dbg(dev, "umem_get failed\n"); - return PTR_ERR(*umem); - } - - mlx5_ib_cont_pages(*umem, addr, 0, page_shift); - - err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %zu, page_shift %d, offset %d\n", - addr, size, ib_umem_num_pages(*umem), *page_shift, *offset); - - return 0; - -err_umem: - ib_umem_release(*umem); - *umem = NULL; - - return err; -} - static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_rwq *rwq, struct ib_udata *udata) { @@ -897,9 +864,9 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *context; struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; - int page_shift = 0; + unsigned int page_offset_quantized = 0; + unsigned long page_size = 0; int uar_index = 0; - u32 offset = 0; int bfregn; int ncont = 0; __be64 *pas; @@ -950,12 +917,21 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->buf_addr && ubuffer->buf_size) { ubuffer->buf_addr = ucmd->buf_addr; - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, - ubuffer->buf_size, &ubuffer->umem, - &page_shift, &offset); - if (err) + ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(ubuffer->umem)) { + err = PTR_ERR(ubuffer->umem); goto err_bfreg; - ncont = ib_umem_num_dma_blocks(ubuffer->umem, 1UL << page_shift); + } + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, qpc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size); } else { ubuffer->umem = NULL; } @@ -970,15 +946,14 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; MLX5_SET(create_qp_in, *in, uid, uid); - pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); - if (ubuffer->umem) - mlx5_ib_populate_pas(ubuffer->umem, 1UL << page_shift, pas, 0); - qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); - - MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(qpc, qpc, page_offset, offset); - + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); + if (ubuffer->umem) { + mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0); + MLX5_SET(qpc, qpc, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, page_offset, page_offset_quantized); + } MLX5_SET(qpc, qpc, uar_page, uar_index); if (bfregn != MLX5_IB_INVALID_BFREG) resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn); From c08fbdc57741026a440d01593e09e11b60b3e210 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 15 Nov 2020 13:43:09 +0200 Subject: [PATCH 074/147] RDMA/mlx5: mlx5_umem_find_best_quantized_pgoff() for CQ This fixes a bug where the page_offset was not being considered when building a CQ. The HW specification says it 'must be zero', so use a variant of mlx5_umem_find_best_quantized_pgoff() with a 0 pgoff_bitmask to force this result. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Link: https://lore.kernel.org/r/20201115114311.136250-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cq.c | 48 ++++++++++++++++++++-------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 10 ++++++ 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 9ab93d730769f3..eb92cefffd7771 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -707,8 +707,9 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd = {}; + unsigned long page_size; + unsigned int page_offset_quantized; size_t ucmdlen; - int page_shift; __be64 *pas; int ncont; void *cqc; @@ -741,17 +742,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, return err; } + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &page_shift); - ncont = ib_umem_num_dma_blocks(cq->buf.umem, 1UL << page_shift); + ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size); mlx5_ib_dbg( dev, - "addr 0x%llx, size %u, npages %zu, page_shift %d, ncont %d\n", + "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n", ucmd.buf_addr, entries * ucmd.cqe_size, - ib_umem_num_pages(cq->buf.umem), page_shift, ncont); + ib_umem_num_pages(cq->buf.umem), page_size, ncont); *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; @@ -762,11 +770,12 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, } pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); - mlx5_ib_populate_pas(cq->buf.umem, 1UL << page_shift, pas, 0); + mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0); cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, - page_shift - MLX5_ADAPTER_PAGE_SHIFT); + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { *index = ucmd.uar_page_index; @@ -1131,7 +1140,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, int entries, struct ib_udata *udata, - int *page_shift, int *cqe_size) + int *cqe_size) { struct mlx5_ib_resize_cq ucmd; struct ib_umem *umem; @@ -1156,8 +1165,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, page_shift); - cq->resize_umem = umem; *cqe_size = ucmd.cqe_size; @@ -1250,7 +1257,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) int err; int npas; __be64 *pas; - int page_shift; + unsigned int page_offset_quantized = 0; + unsigned int page_shift; int inlen; int cqe_size; unsigned long flags; @@ -1277,11 +1285,22 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) mutex_lock(&cq->resize_mutex); if (udata) { - err = resize_user(dev, cq, entries, udata, &page_shift, - &cqe_size); + unsigned long page_size; + + err = resize_user(dev, cq, entries, udata, &cqe_size); if (err) goto ex; - npas = ib_umem_num_dma_blocks(cq->resize_umem, 1UL << page_shift); + + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->resize_umem, cqc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto ex_resize; + } + npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size); + page_shift = order_base_2(page_size); } else { struct mlx5_frag_buf *frag_buf; @@ -1320,6 +1339,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size, cq->private_flags & diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 555bc94d478692..3098a7e0d18631 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -100,6 +100,16 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff( GENMASK(31, order_base_2(scale)), scale, \ page_offset_quantized) +#define mlx5_umem_find_best_cq_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \ + page_offset_quantized) + enum { MLX5_IB_MMAP_OFFSET_START = 9, MLX5_IB_MMAP_OFFSET_END = 255, From 878f7b31c3a7f3e48c6601ea373b8688e7e308e0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 15 Nov 2020 13:43:10 +0200 Subject: [PATCH 075/147] RDMA/mlx5: Use ib_umem_find_best_pgsz() for devx Since devx uses the new rdma_for_each_block() to fill the PAS it can also use ib_umem_find_best_pgsz(). However, the umem constructionin devx is complicated, the umem must still respect all the HW limits such as page_offset_quantized and the IOVA alignment. Since we don't know what the user intends to use the umem for we have to limit it to PAGE_SIZE. There are users trying to mix umem's with mkeys so this makes them work reliably, at least for an identity IOVA, by ensuring the IOVA matches the selected page size. Last user of mlx5_ib_get_buf_offset() so it can also be removed. Fixes: aeae94579caf ("IB/mlx5: Add DEVX support for memory registration") Link: https://lore.kernel.org/r/20201115114311.136250-7-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 66 +++++++++++++++------------- drivers/infiniband/hw/mlx5/mem.c | 55 ----------------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 --- 3 files changed, 35 insertions(+), 91 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index f6839472b6fb5e..7c3eefba619716 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -93,8 +93,6 @@ struct devx_async_event_file { struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; - u32 page_offset; - int page_shift; u32 dinlen; u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; }; @@ -2057,7 +2055,6 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, size_t size; u32 access; int err; - u32 page_mask; if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) @@ -2078,46 +2075,55 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); - - mlx5_ib_cont_pages(obj->umem, obj->umem->address, - MLX5_MKEY_PAGE_SHIFT_MASK, &obj->page_shift); - page_mask = (1 << obj->page_shift) - 1; - obj->page_offset = obj->umem->address & page_mask; - return 0; } -static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, +static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, + struct uverbs_attr_bundle *attrs, struct devx_umem *obj, struct devx_umem_reg_cmd *cmd) { - cmd->inlen = - MLX5_ST_SZ_BYTES(create_umem_in) + - (MLX5_ST_SZ_BYTES(mtt) * - ib_umem_num_dma_blocks(obj->umem, 1UL << obj->page_shift)); - cmd->in = uverbs_zalloc(attrs, cmd->inlen); - return PTR_ERR_OR_ZERO(cmd->in); -} - -static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, - struct devx_umem *obj, - struct devx_umem_reg_cmd *cmd) -{ - void *umem; + unsigned int page_size; __be64 *mtt; + void *umem; + + /* + * We don't know what the user intends to use this umem for, but the HW + * restrictions must be met. MR, doorbell records, QP, WQ and CQ all + * have different requirements. Since we have no idea how to sort this + * out, only support PAGE_SIZE with the expectation that userspace will + * provide the necessary alignments inside the known PAGE_SIZE and that + * FW will check everything. + */ + page_size = ib_umem_find_best_pgoff( + obj->umem, PAGE_SIZE, + __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset), + 0)); + if (!page_size) + return -EINVAL; + + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + + (MLX5_ST_SZ_BYTES(mtt) * + ib_umem_num_dma_blocks(obj->umem, page_size)); + cmd->in = uverbs_zalloc(attrs, cmd->inlen); + if (!cmd->in) + return PTR_ERR(cmd->in); umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); MLX5_SET64(umem, umem, num_of_mtt, - ib_umem_num_dma_blocks(obj->umem, 1UL << obj->page_shift)); - MLX5_SET(umem, umem, log_page_size, obj->page_shift - - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(umem, umem, page_offset, obj->page_offset); - mlx5_ib_populate_pas(obj->umem, 1UL << obj->page_shift, mtt, + ib_umem_num_dma_blocks(obj->umem, page_size)); + MLX5_SET(umem, umem, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(umem, umem, page_offset, + ib_umem_dma_offset(obj->umem, page_size)); + + mlx5_ib_populate_pas(obj->umem, page_size, mtt, (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | MLX5_IB_MTT_READ); + return 0; } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( @@ -2144,12 +2150,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (err) goto err_obj_free; - err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd); + err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd); if (err) goto err_umem_release; - devx_umem_reg_cmd_build(dev, obj, &cmd); - MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, sizeof(cmd.out)); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index ad9830eeb4b6aa..f8ec5156d8e963 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -36,61 +36,6 @@ #include "mlx5_ib.h" #include -/* @umem: umem object to scan - * @addr: ib virtual address requested by the user - * @max_page_shift: high limit for page_shift - 0 means no limit - * @shift: page shift for the compound pages found in the region - */ -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, int *shift) -{ - unsigned long tmp; - unsigned long m; - u64 base = ~0, p = 0; - u64 len, pfn; - int i = 0; - struct scatterlist *sg; - int entry; - - if (umem->is_odp) { - struct ib_umem_odp *odp = to_ib_umem_odp(umem); - - *shift = odp->page_shift; - return; - } - - addr = addr >> PAGE_SHIFT; - tmp = (unsigned long)addr; - m = find_first_bit(&tmp, BITS_PER_LONG); - if (max_page_shift) - m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); - - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - pfn = sg_dma_address(sg) >> PAGE_SHIFT; - if (base + p != pfn) { - /* If either the offset or the new - * base are unaligned update m - */ - tmp = (unsigned long)(pfn | p); - if (!IS_ALIGNED(tmp, 1 << m)) - m = find_first_bit(&tmp, BITS_PER_LONG); - - base = pfn; - p = 0; - } - - p += len; - i += len; - } - - if (i) - m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); - else - m = 0; - *shift = PAGE_SHIFT + m; -} - /* * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be * filled in the pas array. diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 3098a7e0d18631..718e59fce006d4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -40,8 +40,6 @@ #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) -#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) - static __always_inline unsigned long __mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits, unsigned int pgsz_shift) @@ -1296,9 +1294,6 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *shift); void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, u64 access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); From 8a7904a672a1d33c848e5129f886ee69e0773a2e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 15 Nov 2020 13:43:11 +0200 Subject: [PATCH 076/147] RDMA/mlx5: Lower setting the umem's PAS for SRQ Some of the SRQ types are created using a WQ, and the WQ requires a different parameter set to mlx5_umem_find_best_quantized_pgoff() as it has a 5 bit page_offset. Add the umem to the mlx5_srq_attr and defer computing the PAS data until the code has figured out what kind of mailbox to use. Compute the PAS directly from the umem for each of the four unique mailbox types. This also avoids allocating memory to store the user PAS, instead it is written directly to the mailbox as in most other cases. Fixes: 01949d0109ee ("net/mlx5_core: Enable XRCs and SRQs when using ISSI > 0") Link: https://lore.kernel.org/r/20201115114311.136250-8-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/srq.c | 27 +--------- drivers/infiniband/hw/mlx5/srq.h | 1 + drivers/infiniband/hw/mlx5/srq_cmd.c | 80 ++++++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 7dfdc9e548661c..fab6736e4d6a05 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -51,8 +51,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata, struct mlx5_ib_ucontext, ibucontext); size_t ucmdlen; int err; - unsigned int page_offset_quantized; - unsigned int page_size; u32 uidx = MLX5_IB_DEFAULT_UIDX; ucmdlen = min(udata->inlen, sizeof(ucmd)); @@ -84,32 +82,14 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, err = PTR_ERR(srq->umem); return err; } - - page_size = mlx5_umem_find_best_quantized_pgoff( - srq->umem, srqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, - page_offset, 64, &page_offset_quantized); - if (!page_size) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - in->pas = kvcalloc(ib_umem_num_dma_blocks(srq->umem, page_size), - sizeof(*in->pas), GFP_KERNEL); - if (!in->pas) { - err = -ENOMEM; - goto err_umem; - } - - mlx5_ib_populate_pas(srq->umem, page_size, in->pas, 0); + in->umem = srq->umem; err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); if (err) { mlx5_ib_dbg(dev, "map doorbell failed\n"); - goto err_in; + goto err_umem; } - in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; - in->page_offset = page_offset_quantized; in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) @@ -117,9 +97,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return 0; -err_in: - kvfree(in->pas); - err_umem: ib_umem_release(srq->umem); diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 2c3627b2509d96..a7e3dc5564ac90 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -28,6 +28,7 @@ struct mlx5_srq_attr { u32 user_index; u64 db_record; __be64 *pas; + struct ib_umem *umem; u32 tm_log_list_size; u32 tm_next_tag; u32 tm_hw_phase_cnt; diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index db889ec3fd48e8..8b338539659933 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -92,6 +92,25 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) return srq; } +static int __set_srq_page_size(struct mlx5_srq_attr *in, + unsigned long page_size) +{ + if (!page_size) + return -EINVAL; + in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; + + if (WARN_ON(get_pas_size(in) != + ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64))) + return -EINVAL; + return 0; +} + +#define set_srq_page_size(in, typ, log_pgsz_fld) \ + __set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff( \ + (in)->umem, typ, log_pgsz_fld, \ + MLX5_ADAPTER_PAGE_SHIFT, page_offset, \ + 64, &(in)->page_offset)) + static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { @@ -103,6 +122,12 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -114,7 +139,13 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); set_srqc(srqc, in); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_SRQ); @@ -194,6 +225,12 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, xrc_srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -207,7 +244,13 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, set_srqc(xrc_srqc, in); MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_xrc_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); @@ -289,11 +332,18 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in = NULL; void *rmpc; void *wq; + void *pas; int pas_size; int outlen; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; outlen = MLX5_ST_SZ_BYTES(create_rmp_out); @@ -309,8 +359,16 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); MLX5_SET(create_rmp_in, create_in, uid, in->uid); + pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas); + set_wq(wq, in); - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); @@ -421,10 +479,17 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in; void *xrqc; void *wq; + void *pas; int pas_size; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -433,9 +498,16 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas); set_wq(wq, in); - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); if (in->type == IB_SRQT_TM) { MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); From 42f2611cc1738b201701e717246e11e86bef4e1e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:36 +0100 Subject: [PATCH 077/147] rds: stop using dmapool RDMA ULPs should only perform DMA through the ib_dma_* API instead of using the hidden dma_device directly. In addition using the dma coherent API family that dmapool is a part of can be very ineffcient on plaforms that are not DMA coherent. Switch to use slab allocations and the ib_dma_* APIs instead. Link: https://lore.kernel.org/r/20201106181941.1878556-6-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Santosh Shilimkar Signed-off-by: Jason Gunthorpe --- net/rds/ib.c | 10 ---- net/rds/ib.h | 6 --- net/rds/ib_cm.c | 128 ++++++++++++++++++++++++++++------------------ net/rds/ib_recv.c | 18 +++++-- net/rds/ib_send.c | 8 +++ 5 files changed, 101 insertions(+), 69 deletions(-) diff --git a/net/rds/ib.c b/net/rds/ib.c index deecbdcdae84ef..24c9a9005a6fba 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -30,7 +30,6 @@ * SOFTWARE. * */ -#include #include #include #include @@ -108,7 +107,6 @@ static void rds_ib_dev_free(struct work_struct *work) rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); - dma_pool_destroy(rds_ibdev->rid_hdrs_pool); list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { list_del(&i_ipaddr->list); @@ -191,14 +189,6 @@ static int rds_ib_add_one(struct ib_device *device) rds_ibdev->pd = NULL; goto put_dev; } - rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name, - device->dma_device, - sizeof(struct rds_header), - L1_CACHE_BYTES, 0); - if (!rds_ibdev->rid_hdrs_pool) { - ret = -ENOMEM; - goto put_dev; - } rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); diff --git a/net/rds/ib.h b/net/rds/ib.h index c23a11d9ad3628..2ba71102b1f1f2 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -246,7 +246,6 @@ struct rds_ib_device { struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; - struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */ u8 odp_capable:1; unsigned int max_mrs; @@ -380,11 +379,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6); void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event); -struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, - struct dma_pool *pool, - dma_addr_t **dma_addrs, u32 num_hdrs); -void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, - dma_addr_t *dma_addrs, u32 num_hdrs); #define rds_ib_conn_error(conn, fmt...) \ __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index b36b60668b1da9..f5cbe963cd8f78 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -30,7 +30,6 @@ * SOFTWARE. * */ -#include #include #include #include @@ -441,42 +440,87 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index) rds_ibdev->vector_load[index]--; } +static void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr, + dma_addr_t dma_addr, enum dma_data_direction dir) +{ + ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir); + kfree(hdr); +} + +static struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev, + dma_addr_t *dma_addr, enum dma_data_direction dir) +{ + struct rds_header *hdr; + + hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev)); + if (!hdr) + return NULL; + + *dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr), + DMA_BIDIRECTIONAL); + if (ib_dma_mapping_error(dev, *dma_addr)) { + kfree(hdr); + return NULL; + } + + return hdr; +} + +/* Free the DMA memory used to store struct rds_header. + * + * @dev: the RDS IB device + * @hdrs: pointer to the array storing DMA memory pointers + * @dma_addrs: pointer to the array storing DMA addresses + * @num_hdars: number of headers to free. + */ +static void rds_dma_hdrs_free(struct rds_ib_device *dev, + struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs, + enum dma_data_direction dir) +{ + u32 i; + + for (i = 0; i < num_hdrs; i++) + rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir); + kvfree(hdrs); + kvfree(dma_addrs); +} + + /* Allocate DMA coherent memory to be used to store struct rds_header for * sending/receiving packets. The pointers to the DMA memory and the * associated DMA addresses are stored in two arrays. * - * @ibdev: the IB device - * @pool: the DMA memory pool + * @dev: the RDS IB device * @dma_addrs: pointer to the array for storing DMA addresses * @num_hdrs: number of headers to allocate * * It returns the pointer to the array storing the DMA memory pointers. On * error, NULL pointer is returned. */ -struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, - struct dma_pool *pool, - dma_addr_t **dma_addrs, u32 num_hdrs) +static struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev, + dma_addr_t **dma_addrs, u32 num_hdrs, + enum dma_data_direction dir) { struct rds_header **hdrs; dma_addr_t *hdr_daddrs; u32 i; hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, - ibdev_to_node(ibdev)); + ibdev_to_node(dev->dev)); if (!hdrs) return NULL; hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, - ibdev_to_node(ibdev)); + ibdev_to_node(dev->dev)); if (!hdr_daddrs) { kvfree(hdrs); return NULL; } for (i = 0; i < num_hdrs; i++) { - hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]); + hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir); if (!hdrs[i]) { - rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i); + rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir); return NULL; } } @@ -485,24 +529,6 @@ struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, return hdrs; } -/* Free the DMA memory used to store struct rds_header. - * - * @pool: the DMA memory pool - * @hdrs: pointer to the array storing DMA memory pointers - * @dma_addrs: pointer to the array storing DMA addresses - * @num_hdars: number of headers to free. - */ -void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, - dma_addr_t *dma_addrs, u32 num_hdrs) -{ - u32 i; - - for (i = 0; i < num_hdrs; i++) - dma_pool_free(pool, hdrs[i], dma_addrs[i]); - kvfree(hdrs); - kvfree(dma_addrs); -} - /* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. @@ -516,7 +542,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn) struct rds_ib_device *rds_ibdev; unsigned long max_wrs; int ret, fr_queue_space; - struct dma_pool *pool; /* * It's normal to see a null device if an incoming connection races @@ -612,25 +637,26 @@ static int rds_ib_setup_qp(struct rds_connection *conn) goto recv_cq_out; } - pool = rds_ibdev->rid_hdrs_pool; - ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr, + DMA_TO_DEVICE); if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("DMA send hdrs alloc failed\n"); goto qp_out; } - ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr, + DMA_FROM_DEVICE); if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("DMA recv hdrs alloc failed\n"); goto send_hdrs_dma_out; } - ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL, - &ic->i_ack_dma); + ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma, + DMA_TO_DEVICE); if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("DMA ack header alloc failed\n"); @@ -666,18 +692,19 @@ static int rds_ib_setup_qp(struct rds_connection *conn) vfree(ic->i_sends); ack_dma_out: - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma, + DMA_TO_DEVICE); ic->i_ack = NULL; recv_hdrs_dma_out: - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr, DMA_FROM_DEVICE); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; send_hdrs_dma_out: - rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr, DMA_TO_DEVICE); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; @@ -1110,29 +1137,30 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) } if (ic->rds_ibdev) { - struct dma_pool *pool; - - pool = ic->rds_ibdev->rid_hdrs_pool; - /* then free the resources that ib callbacks use */ if (ic->i_send_hdrs) { - rds_dma_hdrs_free(pool, ic->i_send_hdrs, + rds_dma_hdrs_free(ic->rds_ibdev, + ic->i_send_hdrs, ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + ic->i_send_ring.w_nr, + DMA_TO_DEVICE); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; } if (ic->i_recv_hdrs) { - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, + rds_dma_hdrs_free(ic->rds_ibdev, + ic->i_recv_hdrs, ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + ic->i_recv_ring.w_nr, + DMA_FROM_DEVICE); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; } if (ic->i_ack) { - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack, + ic->i_ack_dma, DMA_TO_DEVICE); ic->i_ack = NULL; } } else { diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 3cffcec5fb371b..6fdedd9dbbc28f 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -662,10 +662,16 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi seq = rds_ib_get_ack(ic); rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); + + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma, + sizeof(*hdr), DMA_TO_DEVICE); rds_message_populate_header(hdr, 0, 0, 0); hdr->h_ack = cpu_to_be64(seq); hdr->h_credit = adv_credits; rds_message_make_checksum(hdr); + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma, + sizeof(*hdr), DMA_TO_DEVICE); + ic->i_ack_queued = jiffies; ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL); @@ -845,6 +851,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_incoming *ibinc = ic->i_ibinc; struct rds_header *ihdr, *hdr; + dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; /* XXX shut down the connection if port 0,0 are seen? */ @@ -863,6 +870,8 @@ static void rds_ib_process_recv(struct rds_connection *conn, ihdr = ic->i_recv_hdrs[recv - ic->i_recvs]; + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr, + sizeof(*ihdr), DMA_FROM_DEVICE); /* Validate the checksum. */ if (!rds_message_verify_checksum(ihdr)) { rds_ib_conn_error(conn, "incoming message " @@ -870,7 +879,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, "forcing a reconnect\n", &conn->c_faddr); rds_stats_inc(s_recv_drop_bad_checksum); - return; + goto done; } /* Process the ACK sequence which comes with every packet */ @@ -899,7 +908,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, */ rds_ib_frag_free(ic, recv->r_frag); recv->r_frag = NULL; - return; + goto done; } /* @@ -933,7 +942,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, hdr->h_dport != ihdr->h_dport) { rds_ib_conn_error(conn, "fragment header mismatch; forcing reconnect\n"); - return; + goto done; } } @@ -965,6 +974,9 @@ static void rds_ib_process_recv(struct rds_connection *conn, rds_inc_put(&ibinc->ii_inc); } +done: + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr, + sizeof(*ihdr), DMA_FROM_DEVICE); } void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index dfe778220657af..92b4a8689aae7a 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -638,6 +638,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, send->s_sge[0].length = sizeof(struct rds_header); send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, + ic->i_send_hdrs_dma[pos], + sizeof(struct rds_header), + DMA_TO_DEVICE); memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); @@ -688,6 +692,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, adv_credits = 0; rds_ib_stats_inc(s_ib_tx_credit_updates); } + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, + ic->i_send_hdrs_dma[pos], + sizeof(struct rds_header), + DMA_TO_DEVICE); if (prev) prev->s_wr.next = &send->s_wr; From 5a7a9e038b032137ae9c45d5429f18a2ffdf7d42 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:38 +0100 Subject: [PATCH 078/147] RDMA/core: remove use of dma_virt_ops Use the ib_dma_* helpers to skip the DMA translation instead. This removes the last user if dma_virt_ops and keeps the weird layering violation inside the RDMA core instead of burderning the DMA mapping subsystems with it. This also means the software RDMA drivers now don't have to mess with DMA parameters that are not relevant to them at all, and that in the future we can use PCI P2P transfers even for software RDMA, as there is no first fake layer of DMA mapping that the P2P DMA support. Link: https://lore.kernel.org/r/20201106181941.1878556-8-hch@lst.de Signed-off-by: Christoph Hellwig Tested-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 43 ++++++++-------- drivers/infiniband/core/rw.c | 5 +- drivers/infiniband/sw/rdmavt/Kconfig | 1 - drivers/infiniband/sw/rdmavt/mr.c | 6 +-- drivers/infiniband/sw/rdmavt/vt.c | 8 --- drivers/infiniband/sw/rxe/Kconfig | 1 - drivers/infiniband/sw/rxe/rxe_verbs.c | 7 --- drivers/infiniband/sw/rxe/rxe_verbs.h | 1 - drivers/infiniband/sw/siw/Kconfig | 1 - drivers/infiniband/sw/siw/siw.h | 1 - drivers/infiniband/sw/siw/siw_main.c | 7 --- drivers/nvme/target/rdma.c | 3 +- include/rdma/ib_verbs.h | 73 ++++++++++++++++++--------- 13 files changed, 81 insertions(+), 76 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ce26564d4edfed..3ab1edea6acbe9 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1209,25 +1209,6 @@ static int assign_name(struct ib_device *device, const char *name) return ret; } -static void setup_dma_device(struct ib_device *device, - struct device *dma_device) -{ - /* - * If the caller does not provide a DMA capable device then the IB - * device will be used. In this case the caller should fully setup the - * ibdev for DMA. This usually means using dma_virt_ops. - */ -#ifdef CONFIG_DMA_VIRT_OPS - if (!dma_device) { - device->dev.dma_ops = &dma_virt_ops; - dma_device = &device->dev; - } -#endif - WARN_ON(!dma_device); - device->dma_device = dma_device; - WARN_ON(!device->dma_device->dma_parms); -} - /* * setup_device() allocates memory and sets up data that requires calling the * device ops, this is the only reason these actions are not done during @@ -1373,7 +1354,14 @@ int ib_register_device(struct ib_device *device, const char *name, if (ret) return ret; - setup_dma_device(device, dma_device); + /* + * If the caller does not provide a DMA capable device then the IB core + * will set up ib_sge and scatterlist structures that stash the kernel + * virtual address into the address field. + */ + WARN_ON(dma_device && !dma_device->dma_parms); + device->dma_device = dma_device; + ret = setup_device(device); if (ret) return ret; @@ -2708,6 +2696,21 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) } EXPORT_SYMBOL(ib_set_device_ops); +#ifdef CONFIG_INFINIBAND_VIRT_DMA +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + sg_dma_address(s) = (uintptr_t)sg_virt(s); + sg_dma_len(s) = s->length; + } + return nents; +} +EXPORT_SYMBOL(ib_dma_virt_map_sg); +#endif /* CONFIG_INFINIBAND_VIRT_DMA */ + static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 13f43ab7220b05..a96030b784eb21 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -285,8 +285,11 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir) { - if (is_pci_p2pdma_page(sg_page(sg))) + if (is_pci_p2pdma_page(sg_page(sg))) { + if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) + return 0; return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + } return ib_dma_map_sg(dev, sg, sg_cnt, dir); } diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig index c8e268082952b0..0df48b3a6b56c5 100644 --- a/drivers/infiniband/sw/rdmavt/Kconfig +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -4,6 +4,5 @@ config INFINIBAND_RDMAVT depends on INFINIBAND_VIRT_DMA depends on X86_64 depends on PCI - select DMA_VIRT_OPS help This is a common software verbs provider for RDMA networks. diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8490fdb9c91e50..90fc234f489acd 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -324,8 +324,6 @@ static void __rvt_free_mr(struct rvt_mr *mr) * @acc: access flags * * Return: the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the functions in - * struct dma_virt_ops. */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) { @@ -766,7 +764,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, /* * We use LKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ if (sge->lkey == 0) { struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); @@ -877,7 +875,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, /* * We use RKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ rcu_read_lock(); if (rkey == 0) { diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 5bd817490b1f41..49cec85a372a98 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -525,7 +525,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) int rvt_register_device(struct rvt_dev_info *rdi) { int ret = 0, i; - u64 dma_mask; if (!rdi) return -EINVAL; @@ -580,13 +579,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) /* Completion queues */ spin_lock_init(&rdi->n_cqs_lock); - /* DMA Operations */ - rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms; - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask); - if (ret) - goto bail_wss; - /* Protection Domain */ spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 8810bfa680495a..4521490667925f 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -5,7 +5,6 @@ config RDMA_RXE depends on INFINIBAND_VIRT_DMA select NET_UDP_TUNNEL select CRYPTO_CRC32 - select DMA_VIRT_OPS help This driver implements the InfiniBand RDMA transport over the Linux network stack. It enables a system with a diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index bbc89002c45678..a2bd91aaa5de95 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1128,7 +1128,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) int err; struct ib_device *dev = &rxe->ib_dev; struct crypto_shash *tfm; - u64 dma_mask; strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); @@ -1139,12 +1138,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); - dev->dev.dma_parms = &rxe->dma_parms; - dma_set_max_seg_size(&dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - err = dma_coerce_mask_and_coherent(&dev->dev, dma_mask); - if (err) - return err; dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 57967fc39c045c..79e0a5a878da35 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -351,7 +351,6 @@ struct rxe_port { struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; - struct device_dma_parameters dma_parms; int max_ucontext; int max_inline_data; struct mutex usdev_lock; diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index 3450ba5081df51..1b5105cbabaeed 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -2,7 +2,6 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" depends on INET && INFINIBAND && LIBCRC32C depends on INFINIBAND_VIRT_DMA - select DMA_VIRT_OPS help This driver implements the iWARP RDMA transport over the Linux TCP/IP network stack. It enables a system with a diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index e9753831ac3f33..adda7899621962 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -69,7 +69,6 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; - struct device_dma_parameters dma_parms; struct net_device *netdev; struct siw_dev_cap attrs; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index ac675622767764..d9de062852c4f9 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -306,7 +306,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) struct siw_device *sdev = NULL; struct ib_device *base_dev; struct device *parent = netdev->dev.parent; - u64 dma_mask; int rv; if (!parent) { @@ -361,12 +360,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) */ base_dev->phys_port_cnt = 1; base_dev->dev.parent = parent; - base_dev->dev.dma_parms = &sdev->dma_parms; - dma_set_max_seg_size(&base_dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - if (dma_coerce_mask_and_coherent(&base_dev->dev, dma_mask)) - goto error; - base_dev->num_comp_vectors = num_possible_cpus(); xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ae6620489457d6..5c1e7cb7fe0dee 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -414,7 +414,8 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) goto out_free_rsp; - r->req.p2p_client = &ndev->device->dev; + if (!ib_uses_virt_dma(ndev->device)) + r->req.p2p_client = &ndev->device->dev; r->send_sge.length = sizeof(*r->req.cqe); r->send_sge.lkey = ndev->pd->local_dma_lkey; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3feb42ef82dc2f..174c1bffa00c8b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3906,6 +3906,16 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) -ENOSYS; } +/* + * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to + * NULL. This causes the ib_dma* helpers to just stash the kernel virtual + * address into the dma address. + */ +static inline bool ib_uses_virt_dma(struct ib_device *dev) +{ + return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; +} + /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created @@ -3913,6 +3923,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { + if (ib_uses_virt_dma(dev)) + return 0; return dma_mapping_error(dev->dma_device, dma_addr); } @@ -3927,6 +3939,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)cpu_addr; return dma_map_single(dev->dma_device, cpu_addr, size, direction); } @@ -3941,7 +3955,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_single(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_single(dev->dma_device, addr, size, direction); } /** @@ -3958,6 +3973,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)(page_address(page) + offset); return dma_map_page(dev->dma_device, page, offset, size, direction); } @@ -3972,7 +3989,30 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_page(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_page(dev->dma_device, addr, size, direction); +} + +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents); +static inline int ib_dma_map_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (ib_uses_virt_dma(dev)) + return ib_dma_virt_map_sg(dev, sg, nents); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); +} + +static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (!ib_uses_virt_dma(dev)) + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } /** @@ -3986,7 +4026,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - return dma_map_sg(dev->dma_device, sg, nents, direction); + return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4000,24 +4040,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - dma_unmap_sg(dev->dma_device, sg, nents, direction); -} - -static inline int ib_dma_map_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, - dma_attrs); -} - -static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); + ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4028,6 +4051,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { + if (ib_uses_virt_dma(dev)) + return UINT_MAX; return dma_get_max_seg_size(dev->dma_device); } @@ -4043,7 +4068,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -4058,7 +4084,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /* ib_reg_user_mr - register a memory region for virtual addresses from kernel From 4d34d52c25a042e8fdfcaed9715774759394e32e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:39 +0100 Subject: [PATCH 079/147] PCI/P2PDMA: Remove the DMA_VIRT_OPS hacks Now that all users of dma_virt_ops are gone we can remove the workaround for it in the PCI peer to peer code. Link: https://lore.kernel.org/r/20201106181941.1878556-9-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Acked-by: Bjorn Helgaas Signed-off-by: Jason Gunthorpe --- drivers/pci/p2pdma.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index de1c331dbed43f..b07018af53876c 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -556,15 +556,6 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, return -1; for (i = 0; i < num_clients; i++) { -#ifdef CONFIG_DMA_VIRT_OPS - if (clients[i]->dma_ops == &dma_virt_ops) { - if (verbose) - dev_warn(clients[i], - "cannot be used for peer-to-peer DMA because the driver makes use of dma_virt_ops\n"); - return -1; - } -#endif - pci_client = find_parent_pci_dev(clients[i]); if (!pci_client) { if (verbose) @@ -837,17 +828,6 @@ static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, phys_addr_t paddr; int i; - /* - * p2pdma mappings are not compatible with devices that use - * dma_virt_ops. If the upper layers do the right thing - * this should never happen because it will be prevented - * by the check in pci_p2pdma_distance_many() - */ -#ifdef CONFIG_DMA_VIRT_OPS - if (WARN_ON_ONCE(dev->dma_ops == &dma_virt_ops)) - return 0; -#endif - for_each_sg(sg, s, nents, i) { paddr = sg_phys(s); From 73063ec58c848e0eb9f888847df011a85b34e5a2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:40 +0100 Subject: [PATCH 080/147] PCI/P2PDMA: Cleanup __pci_p2pdma_map_sg a bit Remove the pointless paddr variable that was only used once. Link: https://lore.kernel.org/r/20201106181941.1878556-10-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Acked-by: Bjorn Helgaas Signed-off-by: Jason Gunthorpe --- drivers/pci/p2pdma.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index b07018af53876c..afd792cc272832 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -825,13 +825,10 @@ static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, struct device *dev, struct scatterlist *sg, int nents) { struct scatterlist *s; - phys_addr_t paddr; int i; for_each_sg(sg, s, nents, i) { - paddr = sg_phys(s); - - s->dma_address = paddr - p2p_pgmap->bus_offset; + s->dma_address = sg_phys(s) - p2p_pgmap->bus_offset; sg_dma_len(s) = s->length; } From 172292be01dbd6c26aba23f62e8ec090f31cdb71 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:41 +0100 Subject: [PATCH 081/147] dma-mapping: remove dma_virt_ops Now that the RDMA core deals with devices that only do DMA mapping in lower layers properly, there is no user for dma_virt_ops and it can be removed. Link: https://lore.kernel.org/r/20201106181941.1878556-11-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/dma-mapping.h | 2 -- kernel/dma/Kconfig | 5 --- kernel/dma/Makefile | 1 - kernel/dma/virt.c | 61 ------------------------------------- 4 files changed, 69 deletions(-) delete mode 100644 kernel/dma/virt.c diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 956151052d4542..2aaed35b556df4 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -565,6 +565,4 @@ static inline int dma_mmap_wc(struct device *dev, int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, dma_addr_t dma_start, u64 size); -extern const struct dma_map_ops dma_virt_ops; - #endif /* _LINUX_DMA_MAPPING_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index c99de4a2145889..fd2db2665fc691 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -75,11 +75,6 @@ config ARCH_HAS_DMA_PREP_COHERENT config ARCH_HAS_FORCE_DMA_UNENCRYPTED bool -config DMA_VIRT_OPS - bool - depends on HAS_DMA - select DMA_OPS - config SWIOTLB bool select NEED_DMA_MAP_STATE diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index dc755ab68aabf9..cd1d86358a7a62 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_DMA_OPS) += ops_helpers.o obj-$(CONFIG_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o -obj-$(CONFIG_DMA_VIRT_OPS) += virt.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c deleted file mode 100644 index 59d32317dd574a..00000000000000 --- a/kernel/dma/virt.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations that map to virtual addresses without flushing memory. - */ -#include -#include -#include -#include - -static void *dma_virt_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - ret = (void *)__get_free_pages(gfp | __GFP_ZERO, get_order(size)); - if (ret) - *dma_handle = (uintptr_t)ret; - return ret; -} - -static void dma_virt_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - free_pages((unsigned long)cpu_addr, get_order(size)); -} - -static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return (uintptr_t)(page_address(page) + offset); -} - -static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - sg_dma_address(sg) = (uintptr_t)sg_virt(sg); - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -const struct dma_map_ops dma_virt_ops = { - .alloc = dma_virt_alloc, - .free = dma_virt_free, - .map_page = dma_virt_map_page, - .map_sg = dma_virt_map_sg, - .alloc_pages = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, -}; -EXPORT_SYMBOL(dma_virt_ops); From d024f27de11c2cb5460894eed09d5e616b81587d Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Tue, 13 Oct 2020 09:43:42 +0200 Subject: [PATCH 082/147] RDMA/ipoib: Distribute cq completion vector better Currently ipoib choose cq completion vector based on port number, when HCA only have one port, all the interface recv queue completion are bind to cq completion vector 0. To better distribute the load, use same method as __ib_alloc_cq_any to choose completion vector, with the change, each interface now use different completion vectors. Link: https://lore.kernel.org/r/20201013074342.15867-1-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang Reviewed-by: Gioh Kim Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 587252fd6f57dd..5a150a080ac217 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -158,6 +158,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) int ret, size, req_vec; int i; + static atomic_t counter; size = ipoib_recvq_size + 1; ret = ipoib_cm_dev_init(dev); @@ -171,8 +172,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (ret != -EOPNOTSUPP) return ret; - req_vec = (priv->port - 1) * 2; - + req_vec = atomic_inc_return(&counter) * 2; cq_attr.cqe = size; cq_attr.comp_vector = req_vec % priv->ca->num_comp_vectors; priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, From 4846bf44e1a8b5038f73bd7460d402e519971492 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 20 Nov 2020 12:25:27 -0600 Subject: [PATCH 083/147] IB/hfi1: Fix fall-through warnings for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple warnings by explicitly adding multiple break statements instead of just letting the code fall through to the next case. Link: https://lore.kernel.org/r/13cc2fe2cf8a71a778dbb3d996b07f5e5d04fd40.1605896059.git.gustavoars@kernel.org Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva Tested-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/qp.c | 1 + drivers/infiniband/hw/hfi1/tid_rdma.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 356518e17fa6c4..681bb4e918c926 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -339,6 +339,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) return -EINVAL; if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) return -EINVAL; + break; default: break; } diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 73d197e2173053..92aa2a9b3b5ac2 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2826,6 +2826,7 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, default: break; } + break; default: break; } @@ -3005,6 +3006,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, default: break; } + break; default: break; } @@ -3221,6 +3223,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) req = wqe_to_tid_req(prev); if (req->ack_seg != req->total_segs) goto interlock; + break; default: break; } @@ -3239,9 +3242,11 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) req = wqe_to_tid_req(prev); if (req->ack_seg != req->total_segs) goto interlock; + break; default: break; } + break; default: break; } From 667d457fa84f27c3cd9e0f96557422e398f96fee Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 20 Nov 2020 12:28:49 -0600 Subject: [PATCH 084/147] IB/mlx4: Fix fall-through warnings for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning by explicitly adding a break statement instead of just letting the code fall through to the next case. Link: https://lore.kernel.org/r/0153716933e01608d46155941c447d011c59c1e4.1605896059.git.gustavoars@kernel.org Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/mad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 8bd16474708fbc..f3ace85552f36b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1523,6 +1523,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc return; } else *slave_id = slave; + break; default: /* nothing */; } From c6191f83be6a9d671c5e9bff99e5d03e338252f8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 20 Nov 2020 12:28:56 -0600 Subject: [PATCH 085/147] IB/qedr: Fix fall-through warnings for Clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning by explicitly adding a break statement instead of just letting the code fall through to the next case. Link: https://lore.kernel.org/r/8d7cf00ec3a4b27a895534e02077c2c9ed8a5f8e.1605896059.git.gustavoars@kernel.org Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva Acked-by: Michal Kalderon  Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index e8b812bb3a7ace..8e7c069e1a2d46 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -766,6 +766,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) } xa_unlock_irqrestore(&dev->srqs, flags); DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq); + break; default: break; } From 808b2c925dd0308a89e717df57721a9ed015c243 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 20 Nov 2020 12:31:49 -0600 Subject: [PATCH 086/147] IB/mlx5: Fix fall-through warnings for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning by explicitly adding the new pseudo-keyword fallthrough; instead of letting the code fall through to the next case. Link: https://lore.kernel.org/r/2b0c87362bc86f6adfe56a5a6685837b71022bbf.1605896059.git.gustavoars@kernel.org Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1ff156cfb5b14b..8944249092663c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2434,6 +2434,7 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case IB_QPT_GSI: if (dev->profile == &raw_eth_profile) goto out; + fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: From a9d2e9ae953f0ddd0327479c81a085adaa76d903 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 6 Nov 2020 10:00:49 -0400 Subject: [PATCH 087/147] RDMA/siw,rxe: Make emulated devices virtual in the device tree This moves siw and rxe to be virtual devices in the device tree: lrwxrwxrwx 1 root root 0 Nov 6 13:55 /sys/class/infiniband/rxe0 -> ../../devices/virtual/infiniband/rxe0/ Previously they were trying to parent themselves to the physical device of their attached netdev, which doesn't make alot of sense. My hope is this will solve some weird syzkaller hits related to sysfs as it could be possible that the parent of a netdev is another netdev, eg under bonding or some other syzkaller found netdev configuration. Nesting a ib_device under anything but a physical device is going to cause inconsistencies in sysfs during destructions. Link: https://lore.kernel.org/r/0-v1-dcbfc68c4b4a+d6-virtual_dev_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 9 --------- drivers/infiniband/sw/rxe/rxe_verbs.c | 1 - drivers/infiniband/sw/siw/siw_main.c | 19 +------------------ 3 files changed, 1 insertion(+), 28 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index ee3cf3af7caba5..c4b06ced30a75b 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -19,15 +19,6 @@ static struct rxe_recv_sockets recv_sockets; -struct device *rxe_dma_device(struct rxe_dev *rxe) -{ - struct net_device *ndev; - - ndev = rxe->ndev; - - return ndev->dev.parent; -} - int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) { int err; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index a2bd91aaa5de95..2fbea2b2d72a17 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1134,7 +1134,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->num_comp_vectors = num_possible_cpus(); - dev->dev.parent = rxe_dma_device(rxe); dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index d9de062852c4f9..ee95cf29179d2d 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -305,24 +305,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) { struct siw_device *sdev = NULL; struct ib_device *base_dev; - struct device *parent = netdev->dev.parent; int rv; - if (!parent) { - /* - * The loopback device has no parent device, - * so it appears as a top-level device. To support - * loopback device connectivity, take this device - * as the parent device. Skip all other devices - * w/o parent device. - */ - if (netdev->type != ARPHRD_LOOPBACK) { - pr_warn("siw: device %s error: no parent device\n", - netdev->name); - return NULL; - } - parent = &netdev->dev; - } sdev = ib_alloc_device(siw_device, base_dev); if (!sdev) return NULL; @@ -359,7 +343,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) * per physical port. */ base_dev->phys_port_cnt = 1; - base_dev->dev.parent = parent; base_dev->num_comp_vectors = num_possible_cpus(); xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); @@ -401,7 +384,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) atomic_set(&sdev->num_mr, 0); atomic_set(&sdev->num_pd, 0); - sdev->numa_node = dev_to_node(parent); + sdev->numa_node = dev_to_node(&netdev->dev); spin_lock_init(&sdev->lock); return sdev; From 6d8285e604e0221b67bd5db736921b7ddce37d00 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 8 Nov 2020 15:20:07 +0200 Subject: [PATCH 088/147] RDMA/cxgb4: Validate the number of CQEs Before create CQ, make sure that the requested number of CQEs is in the supported range. Fixes: cfdda9d76436 ("RDMA/cxgb4: Add driver for Chelsio T4 RNIC") Link: https://lore.kernel.org/r/20201108132007.67537-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 2cb65be24770f1..44c2416588d423 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -1008,6 +1008,9 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (attr->flags) return -EOPNOTSUPP; + if (entries < 1 || entries > ibdev->attrs.max_cqe) + return -EINVAL; + if (vector >= rhp->rdev.lldi.nciq) return -EINVAL; From 82101630222fc6caa21502df0d02e0153aaf25d2 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sat, 21 Nov 2020 01:25:29 +0100 Subject: [PATCH 089/147] RDMA/i40iw: Constify ops structs The ops structs are never modified. Make them const to allow the compiler to put them in read-only memory. Link: https://lore.kernel.org/r/20201121002529.89148-1-rikard.falkeborn@gmail.com Signed-off-by: Rikard Falkeborn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 20 ++++++++++---------- drivers/infiniband/hw/i40iw/i40iw_type.h | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 86d3f8aff329c1..7ed9826221c186 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -5098,7 +5098,7 @@ void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi) i40iw_hw_stats_stop_timer(vsi); } -static struct i40iw_cqp_ops iw_cqp_ops = { +static const struct i40iw_cqp_ops iw_cqp_ops = { .cqp_init = i40iw_sc_cqp_init, .cqp_create = i40iw_sc_cqp_create, .cqp_post_sq = i40iw_sc_cqp_post_sq, @@ -5107,7 +5107,7 @@ static struct i40iw_cqp_ops iw_cqp_ops = { .poll_for_cqp_op_done = i40iw_sc_poll_for_cqp_op_done }; -static struct i40iw_ccq_ops iw_ccq_ops = { +static const struct i40iw_ccq_ops iw_ccq_ops = { .ccq_init = i40iw_sc_ccq_init, .ccq_create = i40iw_sc_ccq_create, .ccq_destroy = i40iw_sc_ccq_destroy, @@ -5116,7 +5116,7 @@ static struct i40iw_ccq_ops iw_ccq_ops = { .ccq_arm = i40iw_sc_ccq_arm }; -static struct i40iw_ceq_ops iw_ceq_ops = { +static const struct i40iw_ceq_ops iw_ceq_ops = { .ceq_init = i40iw_sc_ceq_init, .ceq_create = i40iw_sc_ceq_create, .cceq_create_done = i40iw_sc_cceq_create_done, @@ -5126,7 +5126,7 @@ static struct i40iw_ceq_ops iw_ceq_ops = { .process_ceq = i40iw_sc_process_ceq }; -static struct i40iw_aeq_ops iw_aeq_ops = { +static const struct i40iw_aeq_ops iw_aeq_ops = { .aeq_init = i40iw_sc_aeq_init, .aeq_create = i40iw_sc_aeq_create, .aeq_destroy = i40iw_sc_aeq_destroy, @@ -5137,11 +5137,11 @@ static struct i40iw_aeq_ops iw_aeq_ops = { }; /* iwarp pd ops */ -static struct i40iw_pd_ops iw_pd_ops = { +static const struct i40iw_pd_ops iw_pd_ops = { .pd_init = i40iw_sc_pd_init, }; -static struct i40iw_priv_qp_ops iw_priv_qp_ops = { +static const struct i40iw_priv_qp_ops iw_priv_qp_ops = { .qp_init = i40iw_sc_qp_init, .qp_create = i40iw_sc_qp_create, .qp_modify = i40iw_sc_qp_modify, @@ -5156,14 +5156,14 @@ static struct i40iw_priv_qp_ops iw_priv_qp_ops = { .iw_mr_fast_register = i40iw_sc_mr_fast_register }; -static struct i40iw_priv_cq_ops iw_priv_cq_ops = { +static const struct i40iw_priv_cq_ops iw_priv_cq_ops = { .cq_init = i40iw_sc_cq_init, .cq_create = i40iw_sc_cq_create, .cq_destroy = i40iw_sc_cq_destroy, .cq_modify = i40iw_sc_cq_modify, }; -static struct i40iw_mr_ops iw_mr_ops = { +static const struct i40iw_mr_ops iw_mr_ops = { .alloc_stag = i40iw_sc_alloc_stag, .mr_reg_non_shared = i40iw_sc_mr_reg_non_shared, .mr_reg_shared = i40iw_sc_mr_reg_shared, @@ -5172,7 +5172,7 @@ static struct i40iw_mr_ops iw_mr_ops = { .mw_alloc = i40iw_sc_mw_alloc }; -static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { +static const struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { .manage_push_page = i40iw_sc_manage_push_page, .manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table, .set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile, @@ -5195,7 +5195,7 @@ static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { .update_resume_qp = i40iw_sc_resume_qp }; -static struct i40iw_hmc_ops iw_hmc_ops = { +static const struct i40iw_hmc_ops iw_hmc_ops = { .init_iw_hmc = i40iw_sc_init_iw_hmc, .parse_fpm_query_buf = i40iw_sc_parse_fpm_query_buf, .configure_iw_fpm = i40iw_sc_configure_iw_fpm, diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index c3babf3cbb8e5f..1dbf3991cc54ef 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -493,16 +493,16 @@ struct i40iw_sc_dev { struct i40iw_sc_aeq *aeq; struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT]; struct i40iw_sc_cq *ccq; - struct i40iw_cqp_ops *cqp_ops; - struct i40iw_ccq_ops *ccq_ops; - struct i40iw_ceq_ops *ceq_ops; - struct i40iw_aeq_ops *aeq_ops; - struct i40iw_pd_ops *iw_pd_ops; - struct i40iw_priv_qp_ops *iw_priv_qp_ops; - struct i40iw_priv_cq_ops *iw_priv_cq_ops; - struct i40iw_mr_ops *mr_ops; - struct i40iw_cqp_misc_ops *cqp_misc_ops; - struct i40iw_hmc_ops *hmc_ops; + const struct i40iw_cqp_ops *cqp_ops; + const struct i40iw_ccq_ops *ccq_ops; + const struct i40iw_ceq_ops *ceq_ops; + const struct i40iw_aeq_ops *aeq_ops; + const struct i40iw_pd_ops *iw_pd_ops; + const struct i40iw_priv_qp_ops *iw_priv_qp_ops; + const struct i40iw_priv_cq_ops *iw_priv_cq_ops; + const struct i40iw_mr_ops *mr_ops; + const struct i40iw_cqp_misc_ops *cqp_misc_ops; + const struct i40iw_hmc_ops *hmc_ops; struct i40iw_vchnl_if vchnl_if; const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; From df0e4de29c75fab2d59b67b7542ea1e10d32c6e1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 21 Nov 2020 10:51:27 +0100 Subject: [PATCH 090/147] IB/qib: Use dma_set_mask_and_coherent to simplify code 'pci_set_dma_mask()' + 'pci_set_consistent_dma_mask()' can be replaced by an equivalent 'dma_set_mask_and_coherent()' which is much less verbose. Link: https://lore.kernel.org/r/20201121095127.1335228-1-christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Acked-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_pcie.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 3dc6ce03331905..2e07b3749b8809 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -90,25 +90,18 @@ int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) goto bail; } - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (ret) { /* * If the 64 bit setup fails, try 32 bit. Some systems * do not setup 64 bit maps on systems with 2GB or less * memory installed. */ - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret); goto bail; } - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } else - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - qib_early_err(&pdev->dev, - "Unable to set DMA consistent mask: %d\n", ret); - goto bail; } pci_set_master(pdev); From 93035242d9e22f2aad6ac0b886f19444713c0089 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 15 Nov 2020 14:06:50 +0200 Subject: [PATCH 091/147] tools/testing/scatterlist: Test dynamic __sg_alloc_table_from_pages Add few cases to test the dynamic allocation flow of __sg_alloc_table_from_pages. Link: https://lore.kernel.org/r/20201115120650.139277-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- tools/testing/scatterlist/main.c | 64 ++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index f561aed7c65703..2d27e47bbf2f1d 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -9,6 +9,7 @@ struct test { int alloc_ret; unsigned num_pages; unsigned *pfn; + unsigned *pfn_app; unsigned size; unsigned int max_seg; unsigned int expected_segments; @@ -52,31 +53,39 @@ int main(void) { const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT; struct test *test, tests[] = { - { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 }, - { 0, 1, pfn(0), 1, sgmax, 1 }, - { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 }, - { 0, 2, pfn(1, 0), 2 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(0, 1, 2), 3 * PAGE_SIZE, sgmax, 1 }, - { 0, 3, pfn(0, 2, 1), 3 * PAGE_SIZE, sgmax, 3 }, - { 0, 3, pfn(0, 1, 3), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(1, 2, 4), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(1, 3, 4), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 4, pfn(0, 1, 3, 4), 4 * PAGE_SIZE, sgmax, 2 }, - { 0, 5, pfn(0, 1, 3, 4, 5), 5 * PAGE_SIZE, sgmax, 2 }, - { 0, 5, pfn(0, 1, 3, 4, 6), 5 * PAGE_SIZE, sgmax, 3 }, - { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, sgmax, 1 }, - { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 6, pfn(0, 1, 2, 3, 4, 5), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 6, pfn(0, 2, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 4 }, - { 0, 6, pfn(0, 1, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 0, NULL, 0, 0, 0 }, + { -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 }, + { 0, 1, pfn(0), NULL, 1, sgmax, 1 }, + { 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 }, + { 0, 2, pfn(1, 0), NULL, 2 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), pfn(3, 4, 5), 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), pfn(4, 5, 6), 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(0, 2, 1), NULL, 3 * PAGE_SIZE, sgmax, 3 }, + { 0, 3, pfn(0, 1, 3), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(1, 2, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(1, 3, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 4, pfn(0, 1, 3, 4), NULL, 4 * PAGE_SIZE, sgmax, 2 }, + { 0, 5, pfn(0, 1, 3, 4, 5), NULL, 5 * PAGE_SIZE, sgmax, 2 }, + { 0, 5, pfn(0, 1, 3, 4, 6), NULL, 5 * PAGE_SIZE, sgmax, 3 }, + { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, sgmax, 1 }, + { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, 2 * PAGE_SIZE, + 3 }, + { 0, 6, pfn(0, 1, 2, 3, 4, 5), NULL, 6 * PAGE_SIZE, + 2 * PAGE_SIZE, 3 }, + { 0, 6, pfn(0, 2, 3, 4, 5, 6), NULL, 6 * PAGE_SIZE, + 2 * PAGE_SIZE, 4 }, + { 0, 6, pfn(0, 1, 3, 4, 5, 6), pfn(7, 8, 9, 10, 11, 12), + 6 * PAGE_SIZE, 12 * PAGE_SIZE, 2 }, + { 0, 0, NULL, NULL, 0, 0, 0 }, }; unsigned int i; for (i = 0, test = tests; test->expected_segments; test++, i++) { + int left_pages = test->pfn_app ? test->num_pages : 0; struct page *pages[MAX_PAGES]; struct sg_table st; struct scatterlist *sg; @@ -84,14 +93,23 @@ int main(void) set_pages(pages, test->pfn, test->num_pages); sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, - test->size, test->max_seg, NULL, 0, GFP_KERNEL); + test->size, test->max_seg, NULL, left_pages, GFP_KERNEL); assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); if (test->alloc_ret) continue; + if (test->pfn_app) { + set_pages(pages, test->pfn_app, test->num_pages); + sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, + test->size, test->max_seg, sg, 0, GFP_KERNEL); + + assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); + } + VALIDATE(st.nents == test->expected_segments, &st, test); - VALIDATE(st.orig_nents == test->expected_segments, &st, test); + if (!test->pfn_app) + VALIDATE(st.orig_nents == test->expected_segments, &st, test); sg_free_table(&st); } From 6f6e2dcbb82b9b2ea304fe32635789fedd4e9868 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Sat, 14 Nov 2020 17:58:36 +0800 Subject: [PATCH 092/147] RDMA/hns: Refactor the hns_roce_buf allocation flow Add a group of flags to control the 'struct hns_roce_buf' allocation flow, this is used to support the caller running in atomic context. Link: https://lore.kernel.org/r/1605347916-15964-1-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 128 +++++++++++--------- drivers/infiniband/hw/hns/hns_roce_device.h | 51 ++++---- drivers/infiniband/hw/hns/hns_roce_mr.c | 39 ++---- 3 files changed, 113 insertions(+), 105 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index a6b23dec1adcf6..dad2b9ba7b7ac6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -159,76 +159,96 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap) void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf) { - struct device *dev = hr_dev->dev; - u32 size = buf->size; - int i; + struct hns_roce_buf_list *trunks; + u32 i; - if (size == 0) + if (!buf) return; - buf->size = 0; + trunks = buf->trunk_list; + if (trunks) { + buf->trunk_list = NULL; + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift, + trunks[i].buf, trunks[i].map); - if (hns_roce_buf_is_direct(buf)) { - dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); - } else { - for (i = 0; i < buf->npages; ++i) - if (buf->page_list[i].buf) - dma_free_coherent(dev, 1 << buf->page_shift, - buf->page_list[i].buf, - buf->page_list[i].map); - kfree(buf->page_list); - buf->page_list = NULL; + kfree(trunks); } + + kfree(buf); } -int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift) +/* + * Allocate the dma buffer for storing ROCEE table entries + * + * @size: required size + * @page_shift: the unit size in a continuous dma address range + * @flags: HNS_ROCE_BUF_ flags to control the allocation flow. + */ +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags) { - struct hns_roce_buf_list *buf_list; - struct device *dev = hr_dev->dev; - u32 page_size; - int i; + u32 trunk_size, page_size, alloced_size; + struct hns_roce_buf_list *trunks; + struct hns_roce_buf *buf; + gfp_t gfp_flags; + u32 ntrunk, i; /* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */ - buf->page_shift = max_t(int, HNS_HW_PAGE_SHIFT, page_shift); + if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT)) + return ERR_PTR(-EINVAL); + + gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL; + buf = kzalloc(sizeof(*buf), gfp_flags); + if (!buf) + return ERR_PTR(-ENOMEM); + buf->page_shift = page_shift; page_size = 1 << buf->page_shift; - buf->npages = DIV_ROUND_UP(size, page_size); - - /* required size is not bigger than one trunk size */ - if (size <= max_direct) { - buf->page_list = NULL; - buf->direct.buf = dma_alloc_coherent(dev, size, - &buf->direct.map, - GFP_KERNEL); - if (!buf->direct.buf) - return -ENOMEM; + + /* Calc the trunk size and num by required size and page_shift */ + if (flags & HNS_ROCE_BUF_DIRECT) { + buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE)); + ntrunk = 1; } else { - buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL); - if (!buf_list) - return -ENOMEM; - - for (i = 0; i < buf->npages; i++) { - buf_list[i].buf = dma_alloc_coherent(dev, page_size, - &buf_list[i].map, - GFP_KERNEL); - if (!buf_list[i].buf) - break; - } + buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE)); + ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift); + } - if (i != buf->npages && i > 0) { - while (i-- > 0) - dma_free_coherent(dev, page_size, - buf_list[i].buf, - buf_list[i].map); - kfree(buf_list); - return -ENOMEM; - } - buf->page_list = buf_list; + trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags); + if (!trunks) { + kfree(buf); + return ERR_PTR(-ENOMEM); } - buf->size = size; - return 0; + trunk_size = 1 << buf->trunk_shift; + alloced_size = 0; + for (i = 0; i < ntrunk; i++) { + trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size, + &trunks[i].map, gfp_flags); + if (!trunks[i].buf) + break; + + alloced_size += trunk_size; + } + + buf->ntrunks = i; + + /* In nofail mode, it's only failed when the alloced size is 0 */ + if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) { + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, trunk_size, + trunks[i].buf, trunks[i].map); + + kfree(trunks); + kfree(buf); + return ERR_PTR(-ENOMEM); + } + + buf->npages = DIV_ROUND_UP(alloced_size, page_size); + buf->trunk_list = trunks; + + return buf; } int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1d99022f70f796..11a137f4d7e2fd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -267,9 +267,6 @@ enum { #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) -/* The minimum page count for hardware access page directly. */ -#define HNS_HW_DIRECT_PAGE_COUNT 2 - struct hns_roce_uar { u64 pfn; unsigned long index; @@ -421,11 +418,26 @@ struct hns_roce_buf_list { dma_addr_t map; }; +/* + * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous + * dma address range. + * + * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep. + * + * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even + * the allocated size is smaller than the required size. + */ +enum { + HNS_ROCE_BUF_DIRECT = BIT(0), + HNS_ROCE_BUF_NOSLEEP = BIT(1), + HNS_ROCE_BUF_NOFAIL = BIT(2), +}; + struct hns_roce_buf { - struct hns_roce_buf_list direct; - struct hns_roce_buf_list *page_list; + struct hns_roce_buf_list *trunk_list; + u32 ntrunks; u32 npages; - u32 size; + unsigned int trunk_shift; unsigned int page_shift; }; @@ -1081,29 +1093,18 @@ static inline struct hns_roce_qp return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); } -static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf) -{ - if (buf->page_list) - return false; - - return true; -} - static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) { - if (hns_roce_buf_is_direct(buf)) - return (char *)(buf->direct.buf) + (offset & (buf->size - 1)); - - return (char *)(buf->page_list[offset >> buf->page_shift].buf) + - (offset & ((1 << buf->page_shift) - 1)); + return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) + + (offset & ((1 << buf->trunk_shift) - 1)); } static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) { - if (hns_roce_buf_is_direct(buf)) - return buf->direct.map + ((dma_addr_t)idx << buf->page_shift); - else - return buf->page_list[idx].map; + int offset = idx << buf->page_shift; + + return buf->trunk_list[offset >> buf->trunk_shift].map + + (offset & ((1 << buf->trunk_shift) - 1)); } #define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) @@ -1227,8 +1228,8 @@ int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int hns_roce_dealloc_mw(struct ib_mw *ibmw); void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); -int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift); +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags); int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct hns_roce_buf *buf); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 7f81a695e9af93..87e2e6236c6922 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -695,15 +695,6 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) return size; } -static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, - unsigned int page_shift) -{ - if (is_direct) - return ALIGN(alloc_size, 1 << page_shift); - else - return HNS_HW_DIRECT_PAGE_COUNT << page_shift; -} - /* * check the given pages in continuous address space * Returns 0 on success, or the error page num. @@ -732,7 +723,6 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) /* release kernel buffers */ if (mtr->kmem) { hns_roce_buf_free(hr_dev, mtr->kmem); - kfree(mtr->kmem); mtr->kmem = NULL; } } @@ -744,13 +734,12 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct ib_device *ibdev = &hr_dev->ib_dev; unsigned int best_pg_shift; int all_pg_count = 0; - size_t direct_size; size_t total_size; int ret; total_size = mtr_bufs_size(buf_attr); if (total_size < 1) { - ibdev_err(ibdev, "Failed to check mtr size\n"); + ibdev_err(ibdev, "failed to check mtr size\n."); return -EINVAL; } @@ -762,7 +751,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); if (IS_ERR_OR_NULL(mtr->umem)) { - ibdev_err(ibdev, "Failed to get umem, ret %ld\n", + ibdev_err(ibdev, "failed to get umem, ret = %ld.\n", PTR_ERR(mtr->umem)); return -ENOMEM; } @@ -780,19 +769,16 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, ret = 0; } else { mtr->umem = NULL; - mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL); - if (!mtr->kmem) { - ibdev_err(ibdev, "Failed to alloc kmem\n"); - return -ENOMEM; - } - direct_size = mtr_kmem_direct_size(is_direct, total_size, - buf_attr->page_shift); - ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, - mtr->kmem, buf_attr->page_shift); - if (ret) { - ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); - goto err_alloc_mem; + mtr->kmem = + hns_roce_buf_alloc(hr_dev, total_size, + buf_attr->page_shift, + is_direct ? HNS_ROCE_BUF_DIRECT : 0); + if (IS_ERR(mtr->kmem)) { + ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", + PTR_ERR(mtr->kmem)); + return PTR_ERR(mtr->kmem); } + best_pg_shift = buf_attr->page_shift; all_pg_count = mtr->kmem->npages; } @@ -800,7 +786,8 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, /* must bigger than minimum hardware page shift */ if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { ret = -EINVAL; - ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", + ibdev_err(ibdev, + "failed to check mtr, page shift = %u count = %d.\n", best_pg_shift, all_pg_count); goto err_alloc_mem; } From dd37d2f59eb839d51b988f6668ce5f0d533b23fd Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 18 Nov 2020 09:33:23 -0400 Subject: [PATCH 093/147] RDMA/cma: Fix deadlock on &lock in rdma_cma_listen_on_all() error unwind rdma_detroy_id() cannot be called under &lock - we must instead keep the error'd ID around until &lock can be released, then destroy it. This is complicated by the usual way listen IDs are destroyed through cma_process_remove() which can run at any time and will asynchronously destroy the same ID. Remove the ID from visiblity of cma_process_remove() before going down the destroy path outside the locking. Fixes: c80a0c52d85c ("RDMA/cma: Add missing error handling of listen_id") Link: https://lore.kernel.org/r/20201118133756.GK244516@ziepe.ca Reported-by: syzbot+1bc48bf7f78253f664a9@syzkaller.appspotmail.com Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 4585f654f88369..c06c87a4dc5e7d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2496,7 +2496,8 @@ static int cma_listen_handler(struct rdma_cm_id *id, } static int cma_listen_on_dev(struct rdma_id_private *id_priv, - struct cma_device *cma_dev) + struct cma_device *cma_dev, + struct rdma_id_private **to_destroy) { struct rdma_id_private *dev_id_priv; struct net *net = id_priv->id.route.addr.dev_addr.net; @@ -2504,6 +2505,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, lockdep_assert_held(&lock); + *to_destroy = NULL; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return 0; @@ -2518,7 +2520,6 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, rdma_addr_size(cma_src_addr(id_priv))); _cma_attach_to_dev(dev_id_priv, cma_dev); - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); cma_id_get(id_priv); dev_id_priv->internal_id = 1; dev_id_priv->afonly = id_priv->afonly; @@ -2528,25 +2529,31 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) goto err_listen; + list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); return 0; err_listen: - list_del(&id_priv->listen_list); + /* Caller must destroy this after releasing lock */ + *to_destroy = dev_id_priv; dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret); - rdma_destroy_id(&dev_id_priv->id); return ret; } static int cma_listen_on_all(struct rdma_id_private *id_priv) { + struct rdma_id_private *to_destroy; struct cma_device *cma_dev; int ret; mutex_lock(&lock); list_add_tail(&id_priv->list, &listen_any_list); list_for_each_entry(cma_dev, &dev_list, list) { - ret = cma_listen_on_dev(id_priv, cma_dev); - if (ret) + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); + if (ret) { + /* Prevent racing with cma_process_remove() */ + if (to_destroy) + list_del_init(&to_destroy->list); goto err_listen; + } } mutex_unlock(&lock); return 0; @@ -2554,6 +2561,8 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv) err_listen: list_del(&id_priv->list); mutex_unlock(&lock); + if (to_destroy) + rdma_destroy_id(&to_destroy->id); return ret; } @@ -4855,6 +4864,7 @@ static void cma_process_remove(struct cma_device *cma_dev) static int cma_add_one(struct ib_device *device) { + struct rdma_id_private *to_destroy; struct cma_device *cma_dev; struct rdma_id_private *id_priv; unsigned int i; @@ -4902,7 +4912,7 @@ static int cma_add_one(struct ib_device *device) mutex_lock(&lock); list_add_tail(&cma_dev->list, &dev_list); list_for_each_entry(id_priv, &listen_any_list, list) { - ret = cma_listen_on_dev(id_priv, cma_dev); + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); if (ret) goto free_listen; } @@ -4915,6 +4925,7 @@ static int cma_add_one(struct ib_device *device) list_del(&cma_dev->list); mutex_unlock(&lock); + /* cma_process_remove() will delete to_destroy */ cma_process_remove(cma_dev); kfree(cma_dev->default_roce_tos); free_gid_type: From f957d4d09a5ff79b0f7d29fb60ba7682260e58a5 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 15 Nov 2020 14:14:24 +0200 Subject: [PATCH 094/147] RDMA/mlx5: Enable querying AH for XRC QP types Address handle is set for connected QP types such as RC and UC, and thus can also be queried. Since XRC QP types INI and TGT are connected, it should be possible to query their address handle as well. Until now it was not the case, and although the firmware supported it, the driver allowed querying the address handle only for RC and UC. Hence, we enable it now for INI and TGT QPs as well. Link: https://lore.kernel.org/r/20201115121425.139833-2-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Avihai Horon Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8944249092663c..8d40c929b2f414 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4555,7 +4555,9 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path); qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index); From 8138a4c21b090ae400916a9fdf0e59296aae01c6 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 15 Nov 2020 14:14:25 +0200 Subject: [PATCH 095/147] RDMA/mlx4: Enable querying AH for XRC QP types Address handle is set for connected QP types such as RC and UC, and thus can also be queried. Since XRC QP types INI and TGT are connected, it should be possible to query their address handle as well. Until now it was not the case, and although the firmware supported it, the driver allowed querying the address handle only for RC and UC. Hence, we enable it now for INI and TGT QPs as well. Link: https://lore.kernel.org/r/20201115121425.139833-3-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Avihai Horon Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 47b9ed5599b396..7636e7f1485892 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -4010,7 +4010,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context.params2)); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; From 7406c0036f851ee1cd93cb08349f24b051b4cbf8 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Mon, 16 Nov 2020 19:33:22 +0800 Subject: [PATCH 096/147] RDMA/hns: Only record vlan info for HIP08 Information about vlan is stored in GMV(GID/MAC/VLAN) table for HIP09, so there is no need to copy it to address vector. Link: https://lore.kernel.org/r/1605526408-6936-2-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_ah.c | 51 ++++++++++----------- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 13 ++++-- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 75b06db60f7c2a..3be80d42e03a9f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -31,13 +31,13 @@ */ #include +#include #include #include #include "hns_roce_device.h" -#define HNS_ROCE_PORT_NUM_SHIFT 24 -#define HNS_ROCE_VLAN_SL_BIT_MASK 7 -#define HNS_ROCE_VLAN_SL_SHIFT 13 +#define VLAN_SL_MASK 7 +#define VLAN_SL_SHIFT 13 static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) { @@ -58,37 +58,16 @@ static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); - const struct ib_gid_attr *gid_attr; - struct device *dev = hr_dev->dev; - struct hns_roce_ah *ah = to_hr_ah(ibah); struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - u16 vlan_id = 0xffff; - bool vlan_en = false; - int ret; - - gid_attr = ah_attr->grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); - if (ret) - return ret; + struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); + struct hns_roce_ah *ah = to_hr_ah(ibah); + int ret = 0; - /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - if (vlan_id < VLAN_N_VID) { - vlan_en = true; - vlan_id |= (rdma_ah_get_sl(ah_attr) & - HNS_ROCE_VLAN_SL_BIT_MASK) << - HNS_ROCE_VLAN_SL_SHIFT; - } - ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan_id = vlan_id; - ah->av.vlan_en = vlan_en; - dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, - ah->av.vlan_id); if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; @@ -98,7 +77,23 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, ah->av.flowlabel = grh->flow_label; ah->av.udp_sport = get_ah_udp_sport(ah_attr); - return 0; + /* HIP08 needs to record vlan info in Address Vector */ + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { + ah->av.vlan_en = 0; + + ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, + &ah->av.vlan_id, NULL); + if (ret) + return ret; + + if (ah->av.vlan_id < VLAN_N_VID) { + ah->av.vlan_en = 1; + ah->av.vlan_id |= (rdma_ah_get_sl(ah_attr) & VLAN_SL_MASK) << + VLAN_SL_SHIFT; + } + } + + return ret; } int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 11a137f4d7e2fd..1c1deb4ff94804 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -561,7 +561,7 @@ struct hns_roce_av { u8 dgid[HNS_ROCE_GID_SIZE]; u8 mac[ETH_ALEN]; u16 vlan_id; - bool vlan_en; + u8 vlan_en; }; struct hns_roce_ah { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4b8291263e5cd0..f79c97e21440ef 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -492,8 +492,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, - V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, @@ -505,11 +503,18 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M, V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port); - roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, - ah->av.vlan_en ? 1 : 0); roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index); + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { + roce_set_bit(ud_sq_wqe->byte_40, + V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, + ah->av.vlan_en); + roce_set_field(ud_sq_wqe->byte_36, + V2_UD_SEND_WQE_BYTE_36_VLAN_M, + V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); + } + memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); set_extend_sge(qp, wr, &curr_idx, valid_num_sge); From fba429fcf9a5e0c4ec2523ecf4cf18bc0507fcbc Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Mon, 16 Nov 2020 19:33:23 +0800 Subject: [PATCH 097/147] RDMA/hns: Fix missing fields in address vector Traffic class and hop limit in address vector is not assigned from GRH, but it will be filled into UD SQ WQE. So the hardware will get a wrong value. Fixes: 82e620d9c3a0 ("RDMA/hns: Modify the data structure of hns_roce_av") Link: https://lore.kernel.org/r/1605526408-6936-3-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_ah.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 3be80d42e03a9f..d65ff6aa322fa6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -64,18 +64,20 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct hns_roce_ah *ah = to_hr_ah(ibah); int ret = 0; - memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; - memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); - ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.hop_limit = grh->hop_limit; ah->av.flowlabel = grh->flow_label; ah->av.udp_sport = get_ah_udp_sport(ah_attr); + ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.tclass = grh->traffic_class; + + memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); + memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); /* HIP08 needs to record vlan info in Address Vector */ if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { From 3631dadfb118821236098a215e59fb5d3e1c30a8 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Mon, 16 Nov 2020 19:33:24 +0800 Subject: [PATCH 098/147] RDMA/hns: Avoid setting loopback indicator when smac is same as dmac The loopback flag will be set to 1 by the hardware when the source mac address is same as the destination mac address. So the driver don't need to compare them. Fixes: d6a3627e311c ("RDMA/hns: Optimize wqe buffer set flow for post send") Link: https://lore.kernel.org/r/1605526408-6936-4-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f79c97e21440ef..438662826e3dc8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -433,8 +433,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, unsigned int curr_idx = *sge_idx; int valid_num_sge; u32 msg_len = 0; - bool loopback; - u8 *smac; int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); @@ -457,13 +455,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M, V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]); - /* MAC loopback */ - smac = (u8 *)hr_dev->dev_addr[qp->port]; - loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0; - - roce_set_bit(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback); - ud_sq_wqe->msg_len = cpu_to_le32(msg_len); /* Set sig attr */ From 148f904c6f94cbd9067008142268524a95320dde Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Mon, 16 Nov 2020 19:33:25 +0800 Subject: [PATCH 099/147] RDMA/hns: Remove the portn field in UD SQ WQE This field in UD WQE in not used by hardware. Fixes: 7bdee4158b37 ("RDMA/hns: Fill sq wqe context of ud type in hip08") Link: https://lore.kernel.org/r/1605526408-6936-5-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 -- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 --- 2 files changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 438662826e3dc8..78993fec3fcdd9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -491,8 +491,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M, - V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port); roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 1409d05a0fc107..146688809f7815 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1121,9 +1121,6 @@ struct hns_roce_v2_ud_send_wqe { #define V2_UD_SEND_WQE_BYTE_40_SL_S 20 #define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20) -#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24 -#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24) - #define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30 #define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 From 534c9bdb025b68b6c575fbc15976b4470e032691 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Mon, 16 Nov 2020 19:33:26 +0800 Subject: [PATCH 100/147] RDMA/hns: Simplify process of filling UD SQ WQE There are some codes can be simplified or encapsulated in set_ud_wqe() to make them easier to be understand. Link: https://lore.kernel.org/r/1605526408-6936-6-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 83 +++++++++++----------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 29 +------- 2 files changed, 43 insertions(+), 69 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 78993fec3fcdd9..8ba88017da5389 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -422,16 +422,49 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, return 0; } +static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + struct hns_roce_ah *ah) +{ + struct ib_device *ib_dev = ah->ibah.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, + V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); + + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, + V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, + V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, + V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, + V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); + + ud_sq_wqe->sgid_index = ah->av.gid_index; + + memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN); + memcpy(ud_sq_wqe->dgid, ah->av.dgid, GID_LEN_V2); + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; + + roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, + ah->av.vlan_en); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, + V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); + + return 0; +} + static inline int set_ud_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, unsigned int owner_bit) { - struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe; unsigned int curr_idx = *sge_idx; - int valid_num_sge; + unsigned int valid_num_sge; u32 msg_len = 0; int ret; @@ -442,28 +475,13 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, if (WARN_ON(ret)) return ret; - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M, - V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M, - V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M, - V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M, - V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_4_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_4_S, ah->av.mac[4]); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]); - ud_sq_wqe->msg_len = cpu_to_le32(msg_len); - /* Set sig attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, - (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + !!(wr->send_flags & IB_SEND_SIGNALED)); - /* Set se attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, - (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); + !!(wr->send_flags & IB_SEND_SOLICITED)); roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); @@ -476,35 +494,14 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, curr_idx & (qp->sge.sge_cnt - 1)); - roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? qp->qkey : ud_wr(wr)->remote_qkey); roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, - V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, - V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); - - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, - V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index); - - if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { - roce_set_bit(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, - ah->av.vlan_en); - roce_set_field(ud_sq_wqe->byte_36, - V2_UD_SEND_WQE_BYTE_36_VLAN_M, - V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); - } - - memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); + ret = fill_ud_av(ud_sq_wqe, ah); + if (ret) + return ret; set_extend_sge(qp, wr, &curr_idx, valid_num_sge); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 146688809f7815..c06851767a35ef 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1077,8 +1077,9 @@ struct hns_roce_v2_ud_send_wqe { __le32 byte_32; __le32 byte_36; __le32 byte_40; - __le32 dmac; - __le32 byte_48; + u8 dmac[ETH_ALEN]; + u8 sgid_index; + u8 smac_index; u8 dgid[GID_LEN_V2]; }; @@ -1125,30 +1126,6 @@ struct hns_roce_v2_ud_send_wqe { #define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 -#define V2_UD_SEND_WQE_DMAC_0_S 0 -#define V2_UD_SEND_WQE_DMAC_0_M GENMASK(7, 0) - -#define V2_UD_SEND_WQE_DMAC_1_S 8 -#define V2_UD_SEND_WQE_DMAC_1_M GENMASK(15, 8) - -#define V2_UD_SEND_WQE_DMAC_2_S 16 -#define V2_UD_SEND_WQE_DMAC_2_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_DMAC_3_S 24 -#define V2_UD_SEND_WQE_DMAC_3_M GENMASK(31, 24) - -#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_S 0 -#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_M GENMASK(7, 0) - -#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_S 8 -#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_M GENMASK(15, 8) - -#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S 16 -#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_S 24 -#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_M GENMASK(31, 24) - struct hns_roce_v2_rc_send_wqe { __le32 byte_4; __le32 msg_len; From 66d86e529dd58616495ea0b03cc687e5d6522b59 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Mon, 16 Nov 2020 19:33:27 +0800 Subject: [PATCH 101/147] RDMA/hns: Add UD support for HIP09 HIP09 supports service type of Unreliable Datagram, add necessary process to enable this feature. Link: https://lore.kernel.org/r/1605526408-6936-7-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_ah.c | 3 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 ++-- drivers/infiniband/hw/hns/hns_roce_main.c | 1 + drivers/infiniband/hw/hns/hns_roce_qp.c | 39 ++++++++++++++++------ 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index d65ff6aa322fa6..b09ef3335e9688 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -64,6 +64,9 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct hns_roce_ah *ah = to_hr_ah(ibah); int ret = 0; + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && udata) + return -EOPNOTSUPP; + ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8ba88017da5389..1bd81fb7b6e148 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -693,7 +693,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); /* Corresponding to the QP type, wqe process separately */ - if (ibqp->qp_type == IB_QPT_GSI) + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit); else if (ibqp->qp_type == IB_QPT_RC) ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit); @@ -5151,7 +5151,9 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, unsigned long flags; int ret = 0; - if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { + if ((hr_qp->ibqp.qp_type == IB_QPT_RC || + hr_qp->ibqp.qp_type == IB_QPT_UD) && + hr_qp->state != IB_QPS_RESET) { /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 97fdc55e992a92..f01590d8c3cfe6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -424,6 +424,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .alloc_pd = hns_roce_alloc_pd, .alloc_ucontext = hns_roce_alloc_ucontext, .create_ah = hns_roce_create_ah, + .create_user_ah = hns_roce_create_ah, .create_cq = hns_roce_create_cq, .create_qp = hns_roce_create_qp, .dealloc_pd = hns_roce_dealloc_pd, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e288946c6138fa..5e505a30c2eab8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -998,6 +998,30 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, kfree(hr_qp); } +static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type, + bool is_user) +{ + switch (type) { + case IB_QPT_UD: + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && + is_user) + goto out; + fallthrough; + case IB_QPT_RC: + case IB_QPT_GSI: + break; + default: + goto out; + } + + return 0; + +out: + ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type); + + return -EOPNOTSUPP; +} + struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) @@ -1007,15 +1031,9 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct hns_roce_qp *hr_qp; int ret; - switch (init_attr->qp_type) { - case IB_QPT_RC: - case IB_QPT_GSI: - break; - default: - ibdev_err(ibdev, "not support QP type %d\n", - init_attr->qp_type); - return ERR_PTR(-EOPNOTSUPP); - } + ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); + if (ret) + return ERR_PTR(ret); hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); if (!hr_qp) @@ -1030,10 +1048,11 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, if (ret) { ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n", init_attr->qp_type, ret); - ibdev_err(ibdev, "Create GSI QP failed!\n"); + kfree(hr_qp); return ERR_PTR(ret); } + return &hr_qp->ibqp; } From d0b7721c5e0d940a514fbff40b9d7bae92735f7a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Nov 2020 19:34:33 -0400 Subject: [PATCH 102/147] RDMA/mlx5: Check for ERR_PTR from uverbs_zalloc() The return code from uverbs_zalloc() was wrongly checked, it is ERR_PTR not NULL like other allocators: drivers/infiniband/hw/mlx5/devx.c:2110 devx_umem_reg_cmd_alloc() warn: passing zero to 'PTR_ERR' Fixes: 878f7b31c3a7 ("RDMA/mlx5: Use ib_umem_find_best_pgsz() for devx") Link: https://lore.kernel.org/r/0-v1-4d05ccc1c223+173-devx_err_ptr_jgg@nvidia.com Reported-by: kernel test robot Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 7c3eefba619716..ad0173f62c0e95 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2106,7 +2106,7 @@ static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, (MLX5_ST_SZ_BYTES(mtt) * ib_umem_num_dma_blocks(obj->umem, page_size)); cmd->in = uverbs_zalloc(attrs, cmd->inlen); - if (!cmd->in) + if (IS_ERR(cmd->in)) return PTR_ERR(cmd->in); umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); From d4b2d19dc53ecb5ef4fe79cc2d4b7ae3413b2604 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 25 Nov 2020 08:17:04 +0200 Subject: [PATCH 103/147] RDMA/mlx5: Silence the overflow warning while building offset mask Coverity reports "Potentially overflowing expression ..." warning, which is correct thing to complain from the compiler point of view, but this is not possible in the current code. Still, this is a small error as there are some future situations that might need to use a 32 bit offset. Use ULL so the calculation works up to 63. Fixes: b045db62f6f6 ("RDMA/mlx5: Use ib_umem_find_best_pgoff() for SRQ") Link: https://lore.kernel.org/r/20201125061704.6580-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index f8ec5156d8e963..844545064c9ea6 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -62,7 +62,7 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff( unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, unsigned int *page_offset_quantized) { - const u64 page_offset_mask = (1 << page_offset_bits) - 1; + const u64 page_offset_mask = (1UL << page_offset_bits) - 1; unsigned long page_size; u64 page_offset; From 7ec3df174f2b225267849d5e645d641d5f98dcd8 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 25 Nov 2020 08:46:28 +0200 Subject: [PATCH 104/147] RDMA/mlx5: Use PCI device for dma mappings DMA operation of the IB device is done using ib_device->dma_device. Instead of accessing parent of the IB device, use the PCI dma device which is setup to ib_device->dma_device during IB device registration. Link: https://lore.kernel.org/r/20201125064628.8431-1-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b091d84ba4359a..b6116f6d065de3 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1075,7 +1075,7 @@ static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, unsigned int flags) { struct mlx5_ib_dev *dev = mr->dev; - struct device *ddev = dev->ib_dev.dev.parent; + struct device *ddev = &dev->mdev->pdev->dev; dma_addr_t dma; void *xlt; @@ -1112,7 +1112,7 @@ static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, struct ib_sge *sg) { - struct device *ddev = dev->ib_dev.dev.parent; + struct device *ddev = &dev->mdev->pdev->dev; dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); mlx5_ib_free_xlt(xlt, sg->length); @@ -1137,7 +1137,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags) { struct mlx5_ib_dev *dev = mr->dev; - struct device *ddev = dev->ib_dev.dev.parent; + struct device *ddev = &dev->mdev->pdev->dev; void *xlt; struct mlx5_umr_wr wr; struct ib_sge sg; @@ -1216,7 +1216,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) { struct mlx5_ib_dev *dev = mr->dev; - struct device *ddev = dev->ib_dev.dev.parent; + struct device *ddev = &dev->mdev->pdev->dev; struct ib_block_iter biter; struct mlx5_mtt *cur_mtt; struct mlx5_umr_wr wr; @@ -1733,6 +1733,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, int ndescs, int desc_size) { + struct mlx5_ib_dev *dev = to_mdev(device); + struct device *ddev = &dev->mdev->pdev->dev; int size = ndescs * desc_size; int add_size; int ret; @@ -1745,9 +1747,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); - mr->desc_map = dma_map_single(device->dev.parent, mr->descs, - size, DMA_TO_DEVICE); - if (dma_mapping_error(device->dev.parent, mr->desc_map)) { + mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mr->desc_map)) { ret = -ENOMEM; goto err; } @@ -1765,9 +1766,10 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) if (mr->descs) { struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; + struct mlx5_ib_dev *dev = to_mdev(device); - dma_unmap_single(device->dev.parent, mr->desc_map, - size, DMA_TO_DEVICE); + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, + DMA_TO_DEVICE); kfree(mr->descs_alloc); mr->descs = NULL; } From b47a98efa97889c5b16d17e77eed3dc4500674eb Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 17 Nov 2020 09:01:46 +0200 Subject: [PATCH 105/147] RDMA/core: Track device memory MRs Device memory (DM) are registered as MR during initialization flow, these MRs were not tracked by resource tracker and had res->valid set as a false. Update the code to manage them too. Before this change: $ ibv_rc_pingpong -j & $ rdma res show mr <-- shows nothing After this change: $ ibv_rc_pingpong -j & $ rdma res show mr dev ibp0s9 mrn 0 mrlen 4096 pdn 3 pid 734 comm ibv_rc_pingpong Fixes: be934cca9e98 ("IB/uverbs: Add device memory registration ioctl support") Link: https://lore.kernel.org/r/20201117070148.1974114-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_mr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 9b22bb553e8b32..dc58564417292a 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -33,6 +33,7 @@ #include "rdma_core.h" #include "uverbs.h" #include +#include "restrack.h" static int uverbs_free_mr(struct ib_uobject *uobject, enum rdma_remove_reason why, @@ -134,6 +135,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( atomic_inc(&pd->usecnt); atomic_inc(&dm->usecnt); + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_set_name(&mr->res, NULL); + rdma_restrack_add(&mr->res); uobj->object = mr; uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE); From 2b1f747071c5ce5ad571d80c1541b732cf07f9c1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 17 Nov 2020 09:01:47 +0200 Subject: [PATCH 106/147] RDMA/core: Allow drivers to disable restrack DB Driver QP types are special case with no IBTA restrictions. For example, EFA implemented creation of this QP type as regular one, while mlx5 separated create to two step: create and modify. That separation causes to the situation where DC QP (mlx5) is always added to the same xarray index zero. This change allows to drivers like mlx5 simply disable restrack DB tracking, but it doesn't disable kref on the memory. Fixes: 52e0a118a203 ("RDMA/restrack: Track driver QP types in resource tracker") Link: https://lore.kernel.org/r/20201117070148.1974114-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 2 +- drivers/infiniband/core/restrack.c | 12 ++++++++++-- drivers/infiniband/hw/mlx4/qp.c | 5 +++++ drivers/infiniband/hw/mlx5/qp.c | 2 +- include/rdma/restrack.h | 24 ++++++++++++++++++++++++ 5 files changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 2c67ba6a272579..92745522250e4a 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -285,7 +285,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; - if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) + if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res)) return 0; if (!rdma_is_port_valid(dev, port)) diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index a123b22b454a3c..e0a41c8670023a 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -221,11 +221,14 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); struct rdma_restrack_root *rt; - int ret; + int ret = 0; if (!dev) return; + if (res->no_track) + goto out; + rt = &dev->res[res->type]; if (res->type == RDMA_RESTRACK_QP) { @@ -253,6 +256,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) &rt->next_id, GFP_KERNEL); } +out: if (!ret) res->valid = true; } @@ -325,6 +329,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) return; } + if (res->no_track) + goto out; + dev = res_to_dev(res); if (WARN_ON(!dev)) return; @@ -335,8 +342,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP) return; WARN_ON(old != res); - res->valid = false; +out: + res->valid = false; rdma_restrack_put(res); wait_for_completion(&res->comp); } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 7636e7f1485892..651785bd57f2de 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1561,6 +1561,11 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, if (err) return err; + if (init_attr->create_flags & + (MLX4_IB_SRIOV_SQP | MLX4_IB_SRIOV_TUNNEL_QP)) + /* Internal QP created with ib_create_qp */ + rdma_restrack_no_track(&qp->ibqp.res); + qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8d40c929b2f414..9d32f31732316e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2410,7 +2410,7 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd, } qp->state = IB_QPS_RESET; - + rdma_restrack_no_track(&qp->ibqp.res); return 0; } diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index d3a1cc5be7bcef..05e18839eaffcd 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -68,6 +68,14 @@ struct rdma_restrack_entry { * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI */ bool valid; + /** + * @no_track: don't add this entry to restrack DB + * + * This field is used to mark an entry that doesn't need to be added to + * internal restrack DB and presented later to the users at the nldev + * query stage. + */ + u8 no_track : 1; /* * @kref: Protect destroy of the resource */ @@ -145,4 +153,20 @@ int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id); + +/** + * rdma_restrack_no_track() - don't add resource to the DB + * @res: resource entry + * + * Every user of thie API should be cross examined. + * Probaby you don't need to use this function. + */ +static inline void rdma_restrack_no_track(struct rdma_restrack_entry *res) +{ + res->no_track = true; +} +static inline bool rdma_restrack_is_tracked(struct rdma_restrack_entry *res) +{ + return !res->no_track; +} #endif /* _RDMA_RESTRACK_H_ */ From 66f57b871efc576dfe8117b65af4e805e03ea689 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 17 Nov 2020 09:01:48 +0200 Subject: [PATCH 107/147] RDMA/restrack: Support all QP types The latest changes in restrack name handling allowed to simplify the QP creation code to support all types of QPs. For example XRC QP are presented with rdmatool. $ ibv_xsrq_pingpong & $ rdma res show qp link ibp0s9/1 lqpn 0 type SMI state RTS sq-psn 0 comm [ib_core] link ibp0s9/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core] link ibp0s9/1 lqpn 7 type UD state RTS sq-psn 0 comm [mlx5_ib] link ibp0s9/1 lqpn 42 type XRC_TGT state INIT sq-psn 0 path-mig-state MIGRATED comm [ib_uverbs] link ibp0s9/1 lqpn 43 type XRC_INI state INIT sq-psn 0 path-mig-state MIGRATED pdn 197 pid 419 comm ibv_xsrq_pingpong Link: https://lore.kernel.org/r/20201117070148.1974114-4-leon@kernel.org Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 25 ++++++------------- drivers/infiniband/core/uverbs_cmd.c | 4 +-- drivers/infiniband/core/uverbs_std_types_qp.c | 4 +-- drivers/infiniband/core/verbs.c | 11 ++++---- include/rdma/ib_verbs.h | 10 ++++++-- 5 files changed, 25 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 7c4752c47f80ef..baa86c86efad5d 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -318,15 +318,12 @@ struct ib_device *ib_device_get_by_index(const struct net *net, u32 index); void nldev_init(void); void nldev_exit(void); -static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, - struct ib_pd *pd, - struct ib_qp_init_attr *attr, - struct ib_udata *udata, - struct ib_uqp_object *uobj) +static inline struct ib_qp * +_ib_create_qp(struct ib_device *dev, struct ib_pd *pd, + struct ib_qp_init_attr *attr, struct ib_udata *udata, + struct ib_uqp_object *uobj, const char *caller) { - enum ib_qp_type qp_type = attr->qp_type; struct ib_qp *qp; - bool is_xrc; if (!dev->ops.create_qp) return ERR_PTR(-EOPNOTSUPP); @@ -347,7 +344,6 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->srq = attr->srq; qp->rwq_ind_tbl = attr->rwq_ind_tbl; qp->event_handler = attr->event_handler; - qp->qp_type = attr->qp_type; qp->port = attr->port_num; atomic_set(&qp->usecnt, 0); @@ -356,16 +352,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, INIT_LIST_HEAD(&qp->sig_mrs); rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP); - /* - * We don't track XRC QPs for now, because they don't have PD - * and more importantly they are created internaly by driver, - * see mlx5 create_dev_resources() as an example. - */ - is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; - if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { - rdma_restrack_parent_name(&qp->res, &pd->res); - rdma_restrack_add(&qp->res); - } + WARN_ONCE(!udata && !caller, "Missing kernel QP owner"); + rdma_restrack_set_name(&qp->res, udata ? NULL : caller); + rdma_restrack_add(&qp->res); return qp; } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 92c9d2a548f31a..402d0b8bf58e72 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1400,8 +1400,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - obj); + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, + NULL); if (IS_ERR(qp)) { ret = PTR_ERR(qp); diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c index 294cd29d5cede7..c00cfb5ed3879c 100644 --- a/drivers/infiniband/core/uverbs_std_types_qp.c +++ b/drivers/infiniband/core/uverbs_std_types_qp.c @@ -251,8 +251,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)( if (attr.qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - obj); + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, + NULL); if (IS_ERR(qp)) { ret = PTR_ERR(qp); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 33778f8674a145..5d4c7c263665cb 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1191,7 +1191,7 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, } /** - * ib_create_qp - Creates a kernel QP associated with the specified protection + * ib_create_named_qp - Creates a kernel QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. * @qp_init_attr: A list of initial attributes required to create the @@ -1200,8 +1200,9 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, * * NOTE: for user qp use ib_create_qp_user with valid udata! */ -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr) +struct ib_qp *ib_create_named_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller) { struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; struct ib_qp *qp; @@ -1226,7 +1227,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (qp_init_attr->cap.max_rdma_ctxs) rdma_rw_init_qp(device, qp_init_attr); - qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL); + qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL, caller); if (IS_ERR(qp)) return qp; @@ -1292,7 +1293,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, return ERR_PTR(ret); } -EXPORT_SYMBOL(ib_create_qp); +EXPORT_SYMBOL(ib_create_named_qp); static const struct { int valid; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 174c1bffa00c8b..7bee8abae35cf4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3618,8 +3618,14 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, bad_recv_wr ? : &dummy); } -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); +struct ib_qp *ib_create_named_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller); +static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + return ib_create_named_qp(pd, init_attr, KBUILD_MODNAME); +} /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. From 71586dd2001087e89e344e2c7dcee6b4a53bb6de Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Tue, 24 Nov 2020 20:24:09 +0800 Subject: [PATCH 108/147] RDMA/hns: Create QP with selected QPN for bank load balance In order to improve performance by balancing the load between different banks of cache, the QPC cache is desigend to choose one of 8 banks according to lower 3 bits of QPN. The hns driver needs to count the number of QP on each bank and then assigns the QP being created to the bank with the minimum load first. Link: https://lore.kernel.org/r/1606220649-1465-1-git-send-email-liweihang@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 15 ++- drivers/infiniband/hw/hns/hns_roce_qp.c | 100 ++++++++++++++++---- 2 files changed, 96 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1c1deb4ff94804..c0c8b1e3ddbbff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -117,6 +117,8 @@ #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 +#define HNS_ROCE_QP_BANK_NUM 8 + /* The chip implementation of the consumer index is calculated * according to twice the actual EQ depth */ @@ -524,13 +526,22 @@ struct hns_roce_uar_table { struct hns_roce_bitmap bitmap; }; +struct hns_roce_bank { + struct ida ida; + u32 inuse; /* Number of IDs allocated */ + u32 min; /* Lowest ID to allocate. */ + u32 max; /* Highest ID to allocate. */ + u32 next; /* Next ID to allocate. */ +}; + struct hns_roce_qp_table { - struct hns_roce_bitmap bitmap; struct hns_roce_hem_table qp_table; struct hns_roce_hem_table irrl_table; struct hns_roce_hem_table trrl_table; struct hns_roce_hem_table sccc_table; struct mutex scc_mutex; + struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM]; + spinlock_t bank_lock; }; struct hns_roce_cq_table { @@ -780,7 +791,7 @@ struct hns_roce_caps { u32 max_rq_sg; u32 max_extend_sg; int num_qps; - int reserved_qps; + u32 reserved_qps; int num_qpc_timer; int num_cqc_timer; int num_srqs; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 5e505a30c2eab8..62da30a1575aa7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -154,9 +154,50 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, } } +static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank) +{ + u32 least_load = bank[0].inuse; + u8 bankid = 0; + u32 bankcnt; + u8 i; + + for (i = 1; i < HNS_ROCE_QP_BANK_NUM; i++) { + bankcnt = bank[i].inuse; + if (bankcnt < least_load) { + least_load = bankcnt; + bankid = i; + } + } + + return bankid; +} + +static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid, + unsigned long *qpn) +{ + int id; + + id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL); + if (id < 0) { + id = ida_alloc_range(&bank->ida, bank->min, bank->max, + GFP_KERNEL); + if (id < 0) + return id; + } + + /* the QPN should keep increasing until the max value is reached. */ + bank->next = (id + 1) > bank->max ? bank->min : id + 1; + + /* the lower 3 bits is bankid */ + *qpn = (id << 3) | bankid; + + return 0; +} static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; unsigned long num = 0; + u8 bankid; int ret; if (hr_qp->ibqp.qp_type == IB_QPT_GSI) { @@ -169,13 +210,21 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hr_qp->doorbell_qpn = 1; } else { - ret = hns_roce_bitmap_alloc_range(&hr_dev->qp_table.bitmap, - 1, 1, &num); + spin_lock(&qp_table->bank_lock); + bankid = get_least_load_bankid_for_qp(qp_table->bank); + + ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, + &num); if (ret) { - ibdev_err(&hr_dev->ib_dev, "Failed to alloc bitmap\n"); - return -ENOMEM; + ibdev_err(&hr_dev->ib_dev, + "failed to alloc QPN, ret = %d\n", ret); + spin_unlock(&qp_table->bank_lock); + return ret; } + qp_table->bank[bankid].inuse++; + spin_unlock(&qp_table->bank_lock); + hr_qp->doorbell_qpn = (u32)num; } @@ -340,9 +389,15 @@ static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); } +static inline u8 get_qp_bankid(unsigned long qpn) +{ + /* The lower 3 bits of QPN are used to hash to different banks */ + return (u8)(qpn & GENMASK(2, 0)); +} + static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + u8 bankid; if (hr_qp->ibqp.qp_type == IB_QPT_GSI) return; @@ -350,7 +405,13 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (hr_qp->qpn < hr_dev->caps.reserved_qps) return; - hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR); + bankid = get_qp_bankid(hr_qp->qpn); + + ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3); + + spin_lock(&hr_dev->qp_table.bank_lock); + hr_dev->qp_table.bank[bankid].inuse--; + spin_unlock(&hr_dev->qp_table.bank_lock); } static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, @@ -1294,22 +1355,24 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - int reserved_from_top = 0; - int reserved_from_bot; - int ret; + unsigned int reserved_from_bot; + unsigned int i; mutex_init(&qp_table->scc_mutex); xa_init(&hr_dev->qp_table_xa); reserved_from_bot = hr_dev->caps.reserved_qps; - ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, - hr_dev->caps.num_qps - 1, reserved_from_bot, - reserved_from_top); - if (ret) { - dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n", - ret); - return ret; + for (i = 0; i < reserved_from_bot; i++) { + hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++; + hr_dev->qp_table.bank[get_qp_bankid(i)].min++; + } + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { + ida_init(&hr_dev->qp_table.bank[i].ida); + hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps / + HNS_ROCE_QP_BANK_NUM - 1; + hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min; } return 0; @@ -1317,5 +1380,8 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) { - hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap); + int i; + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) + ida_destroy(&hr_dev->qp_table.bank[i].ida); } From bfefae9f108dfa62eb9c16c9e97086fddb4ece04 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Thu, 26 Nov 2020 15:04:10 +0800 Subject: [PATCH 109/147] RDMA/hns: Add support for CQ stash Stash is a mechanism that uses the core information carried by the ARM AXI bus to access the L3 cache. It can be used to improve the performance by increasing the hit ratio of L3 cache. CQs need to enable stash by default. Link: https://lore.kernel.org/r/1606374251-21512-2-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_common.h | 12 +++++++ drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 37 ++++++++++++--------- 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index f5669ff8cfebc9..29469e15dfd36a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -53,6 +53,18 @@ #define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val)) +#define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l + +#define _hr_reg_enable(ptr, field_type, field_h, field_l) \ + ({ \ + const field_type *_ptr = ptr; \ + *((__le32 *)_ptr + (field_h) / 32) |= \ + cpu_to_le32(BIT((field_l) % 32)) + \ + BUILD_BUG_ON_ZERO((field_h) != (field_l)); \ + }) + +#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c0c8b1e3ddbbff..a5c6bb073569e1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -225,6 +225,7 @@ enum { HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), + HNS_ROCE_CAP_FLAG_STASH = BIT(17), }; #define HNS_ROCE_DB_TYPE_COUNT 2 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1bd81fb7b6e148..125ab4e03e5aa4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3168,6 +3168,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == HNS_ROCE_V3_CQE_SIZE ? 1 : 0); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(cq_context, CQC_STASH); + cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); roce_set_field(cq_context->byte_16_hop_addr, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index c06851767a35ef..a4c2be11e41879 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -267,23 +267,24 @@ enum hns_roce_sgid_type { }; struct hns_roce_v2_cq_context { - __le32 byte_4_pg_ceqn; - __le32 byte_8_cqn; - __le32 cqe_cur_blk_addr; - __le32 byte_16_hop_addr; - __le32 cqe_nxt_blk_addr; - __le32 byte_24_pgsz_addr; - __le32 byte_28_cq_pi; - __le32 byte_32_cq_ci; - __le32 cqe_ba; - __le32 byte_40_cqe_ba; - __le32 byte_44_db_record; - __le32 db_record_addr; - __le32 byte_52_cqe_cnt; - __le32 byte_56_cqe_period_maxcnt; - __le32 cqe_report_timer; - __le32 byte_64_se_cqe_idx; + __le32 byte_4_pg_ceqn; + __le32 byte_8_cqn; + __le32 cqe_cur_blk_addr; + __le32 byte_16_hop_addr; + __le32 cqe_nxt_blk_addr; + __le32 byte_24_pgsz_addr; + __le32 byte_28_cq_pi; + __le32 byte_32_cq_ci; + __le32 cqe_ba; + __le32 byte_40_cqe_ba; + __le32 byte_44_db_record; + __le32 db_record_addr; + __le32 byte_52_cqe_cnt; + __le32 byte_56_cqe_period_maxcnt; + __le32 cqe_report_timer; + __le32 byte_64_se_cqe_idx; }; + #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 @@ -360,6 +361,10 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) +#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l) + +#define CQC_STASH CQC_FIELD_LOC(63, 63) + struct hns_roce_srq_context { __le32 byte_4_srqn_srqst; __le32 byte_8_limit_wl; From f93c39bc95472dae3b5de71da5c005f47ece3148 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Thu, 26 Nov 2020 15:04:11 +0800 Subject: [PATCH 110/147] RDMA/hns: Add support for QP stash Stash is a mechanism that uses the core information carried by the ARM AXI bus to access the L3 cache. It can be used to improve the performance by increasing the hit ratio of L3 cache. QPs need to enable stash by default. Link: https://lore.kernel.org/r/1606374251-21512-3-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 + drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 130 +++++++++++---------- 2 files changed, 75 insertions(+), 61 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 125ab4e03e5aa4..1aa10321dc29f2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3977,6 +3977,12 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, hr_qp->access_flags = attr->qp_access_flags; roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn); + + if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ) + return; + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(&context->ext, QPCEX_STASH); } static void modify_qp_init_to_init(struct ib_qp *ibqp, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index a4c2be11e41879..f83867850c9af1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -466,68 +466,72 @@ enum hns_roce_v2_qp_state { HNS_ROCE_QP_NUM_ST }; +struct hns_roce_v2_qp_context_ex { + __le32 data[64]; +}; struct hns_roce_v2_qp_context { - __le32 byte_4_sqpn_tst; - __le32 wqe_sge_ba; - __le32 byte_12_sq_hop; - __le32 byte_16_buf_ba_pg_sz; - __le32 byte_20_smac_sgid_idx; - __le32 byte_24_mtu_tc; - __le32 byte_28_at_fl; - u8 dgid[GID_LEN_V2]; - __le32 dmac; - __le32 byte_52_udpspn_dmac; - __le32 byte_56_dqpn_err; - __le32 byte_60_qpst_tempid; - __le32 qkey_xrcd; - __le32 byte_68_rq_db; - __le32 rq_db_record_addr; - __le32 byte_76_srqn_op_en; - __le32 byte_80_rnr_rx_cqn; - __le32 byte_84_rq_ci_pi; - __le32 rq_cur_blk_addr; - __le32 byte_92_srq_info; - __le32 byte_96_rx_reqmsn; - __le32 rq_nxt_blk_addr; - __le32 byte_104_rq_sge; - __le32 byte_108_rx_reqepsn; - __le32 rq_rnr_timer; - __le32 rx_msg_len; - __le32 rx_rkey_pkt_info; - __le64 rx_va; - __le32 byte_132_trrl; - __le32 trrl_ba; - __le32 byte_140_raq; - __le32 byte_144_raq; - __le32 byte_148_raq; - __le32 byte_152_raq; - __le32 byte_156_raq; - __le32 byte_160_sq_ci_pi; - __le32 sq_cur_blk_addr; - __le32 byte_168_irrl_idx; - __le32 byte_172_sq_psn; - __le32 byte_176_msg_pktn; - __le32 sq_cur_sge_blk_addr; - __le32 byte_184_irrl_idx; - __le32 cur_sge_offset; - __le32 byte_192_ext_sge; - __le32 byte_196_sq_psn; - __le32 byte_200_sq_max; - __le32 irrl_ba; - __le32 byte_208_irrl; - __le32 byte_212_lsn; - __le32 sq_timer; - __le32 byte_220_retry_psn_msn; - __le32 byte_224_retry_msg; - __le32 rx_sq_cur_blk_addr; - __le32 byte_232_irrl_sge; - __le32 irrl_cur_sge_offset; - __le32 byte_240_irrl_tail; - __le32 byte_244_rnr_rxack; - __le32 byte_248_ack_psn; - __le32 byte_252_err_txcqn; - __le32 byte_256_sqflush_rqcqe; - __le32 ext[64]; + __le32 byte_4_sqpn_tst; + __le32 wqe_sge_ba; + __le32 byte_12_sq_hop; + __le32 byte_16_buf_ba_pg_sz; + __le32 byte_20_smac_sgid_idx; + __le32 byte_24_mtu_tc; + __le32 byte_28_at_fl; + u8 dgid[GID_LEN_V2]; + __le32 dmac; + __le32 byte_52_udpspn_dmac; + __le32 byte_56_dqpn_err; + __le32 byte_60_qpst_tempid; + __le32 qkey_xrcd; + __le32 byte_68_rq_db; + __le32 rq_db_record_addr; + __le32 byte_76_srqn_op_en; + __le32 byte_80_rnr_rx_cqn; + __le32 byte_84_rq_ci_pi; + __le32 rq_cur_blk_addr; + __le32 byte_92_srq_info; + __le32 byte_96_rx_reqmsn; + __le32 rq_nxt_blk_addr; + __le32 byte_104_rq_sge; + __le32 byte_108_rx_reqepsn; + __le32 rq_rnr_timer; + __le32 rx_msg_len; + __le32 rx_rkey_pkt_info; + __le64 rx_va; + __le32 byte_132_trrl; + __le32 trrl_ba; + __le32 byte_140_raq; + __le32 byte_144_raq; + __le32 byte_148_raq; + __le32 byte_152_raq; + __le32 byte_156_raq; + __le32 byte_160_sq_ci_pi; + __le32 sq_cur_blk_addr; + __le32 byte_168_irrl_idx; + __le32 byte_172_sq_psn; + __le32 byte_176_msg_pktn; + __le32 sq_cur_sge_blk_addr; + __le32 byte_184_irrl_idx; + __le32 cur_sge_offset; + __le32 byte_192_ext_sge; + __le32 byte_196_sq_psn; + __le32 byte_200_sq_max; + __le32 irrl_ba; + __le32 byte_208_irrl; + __le32 byte_212_lsn; + __le32 sq_timer; + __le32 byte_220_retry_psn_msn; + __le32 byte_224_retry_msg; + __le32 rx_sq_cur_blk_addr; + __le32 byte_232_irrl_sge; + __le32 irrl_cur_sge_offset; + __le32 byte_240_irrl_tail; + __le32 byte_244_rnr_rxack; + __le32 byte_248_ack_psn; + __le32 byte_252_err_txcqn; + __le32 byte_256_sqflush_rqcqe; + + struct hns_roce_v2_qp_context_ex ext; }; #define V2_QPC_BYTE_4_TST_S 0 @@ -896,6 +900,10 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16 #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16) +#define QPCEX_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context_ex, h, l) + +#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82) + #define V2_QP_RWE_S 1 /* rdma write enable */ #define V2_QP_RRE_S 2 /* rdma read enable */ #define V2_QP_ATE_S 3 /* rdma atomic enable */ From 1d11d26cf0d6974551049a3d7353ee1336b8632e Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 24 Nov 2020 18:56:17 -0600 Subject: [PATCH 111/147] RDMA/i40iw: Remove push code from i40iw The push feature does not work as expected in x722 and has historically been disabled in the driver. Purge all remaining code related to the push feature in i40iw. Link: https://lore.kernel.org/r/20201125005616.1800-3-shiraz.saleem@intel.com Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw.h | 1 - drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 52 +------------ drivers/infiniband/hw/i40iw/i40iw_d.h | 35 +++------ drivers/infiniband/hw/i40iw/i40iw_status.h | 1 - drivers/infiniband/hw/i40iw/i40iw_type.h | 18 ----- drivers/infiniband/hw/i40iw/i40iw_uk.c | 41 +---------- drivers/infiniband/hw/i40iw/i40iw_user.h | 8 -- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 86 +--------------------- 8 files changed, 18 insertions(+), 224 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 832b80de004fbe..6a79502c8b53cc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -274,7 +274,6 @@ struct i40iw_device { u8 max_sge; u8 iw_status; u8 send_term_ok; - bool push_mode; /* Initialized from parameter passed to driver */ /* x710 specific */ struct mutex pbl_mutex; diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 7ed9826221c186..c943d491b72bd3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -819,46 +819,6 @@ static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done( return ret_code; } -/** - * i40iw_sc_manage_push_page - Handle push page - * @cqp: struct for cqp hw - * @info: push page info - * @scratch: u64 saved to be used during cqp completion - * @post_sq: flag for cqp db to ring - */ -static enum i40iw_status_code i40iw_sc_manage_push_page( - struct i40iw_sc_cqp *cqp, - struct i40iw_cqp_manage_push_page_info *info, - u64 scratch, - bool post_sq) -{ - u64 *wqe; - u64 header; - - if (info->push_idx >= I40IW_MAX_PUSH_PAGE_COUNT) - return I40IW_ERR_INVALID_PUSH_PAGE_INDEX; - - wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); - if (!wqe) - return I40IW_ERR_RING_FULL; - - set_64bit_val(wqe, 16, info->qs_handle); - - header = LS_64(info->push_idx, I40IW_CQPSQ_MPP_PPIDX) | - LS_64(I40IW_CQP_OP_MANAGE_PUSH_PAGES, I40IW_CQPSQ_OPCODE) | - LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) | - LS_64(info->free_page, I40IW_CQPSQ_MPP_FREE_PAGE); - - i40iw_insert_wqe_hdr(wqe, header); - - i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE", - wqe, I40IW_CQP_WQE_SIZE * 8); - - if (post_sq) - i40iw_sc_cqp_post_sq(cqp); - return 0; -} - /** * i40iw_sc_manage_hmc_pm_func_table - manage of function table * @cqp: struct for cqp hw @@ -2859,9 +2819,7 @@ static enum i40iw_status_code i40iw_sc_qp_setctx( LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) | LS_64(qp->xmit_tph_en, I40IWQPC_XMITTPHEN) | LS_64(qp->rq_tph_en, I40IWQPC_RQTPHEN) | - LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN) | - LS_64(info->push_idx, I40IWQPC_PPIDX) | - LS_64(info->push_mode_en, I40IWQPC_PMENA); + LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN); set_64bit_val(qp_ctx, 8, qp->sq_pa); set_64bit_val(qp_ctx, 16, qp->rq_pa); @@ -4291,13 +4249,6 @@ static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev, pcmdinfo->in.u.add_arp_cache_entry.scratch, pcmdinfo->post_sq); break; - case OP_MANAGE_PUSH_PAGE: - status = i40iw_sc_manage_push_page( - pcmdinfo->in.u.manage_push_page.cqp, - &pcmdinfo->in.u.manage_push_page.info, - pcmdinfo->in.u.manage_push_page.scratch, - pcmdinfo->post_sq); - break; case OP_UPDATE_PE_SDS: /* case I40IW_CQP_OP_UPDATE_PE_SDS */ status = i40iw_update_pe_sds( @@ -5173,7 +5124,6 @@ static const struct i40iw_mr_ops iw_mr_ops = { }; static const struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { - .manage_push_page = i40iw_sc_manage_push_page, .manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table, .set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile, .commit_fpm_values = i40iw_sc_commit_fpm_values, diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index e8367d67575dee..86d5a33c57cc7e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -40,11 +40,6 @@ #define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024) #define I40IW_VF_DB_ADDR_OFFSET (64 * 1024) -#define I40IW_PUSH_OFFSET (4 * 1024 * 1024) -#define I40IW_PF_FIRST_PUSH_PAGE_INDEX 16 -#define I40IW_VF_PUSH_OFFSET ((8 + 64) * 1024) -#define I40IW_VF_FIRST_PUSH_PAGE_INDEX 2 - #define I40IW_PE_DB_SIZE_4M 1 #define I40IW_PE_DB_SIZE_8M 2 @@ -402,7 +397,6 @@ #define I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE 0x0e #define I40IW_CQP_OP_MANAGE_ARP 0x0f #define I40IW_CQP_OP_MANAGE_VF_PBLE_BP 0x10 -#define I40IW_CQP_OP_MANAGE_PUSH_PAGES 0x11 #define I40IW_CQP_OP_QUERY_RDMA_FEATURES 0x12 #define I40IW_CQP_OP_UPLOAD_CONTEXT 0x13 #define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14 @@ -843,7 +837,6 @@ #define I40IW_CQPSQ_MVPBP_PD_PLPBA_MASK \ (0x1fffffffffffffffULL << I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT) -/* Manage Push Page - MPP */ #define I40IW_INVALID_PUSH_PAGE_INDEX 0xffff #define I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT 0 @@ -1352,9 +1345,6 @@ #define I40IWQPSQ_ADDFRAGCNT_SHIFT 38 #define I40IWQPSQ_ADDFRAGCNT_MASK (0x7ULL << I40IWQPSQ_ADDFRAGCNT_SHIFT) -#define I40IWQPSQ_PUSHWQE_SHIFT 56 -#define I40IWQPSQ_PUSHWQE_MASK (1ULL << I40IWQPSQ_PUSHWQE_SHIFT) - #define I40IWQPSQ_STREAMMODE_SHIFT 58 #define I40IWQPSQ_STREAMMODE_MASK (1ULL << I40IWQPSQ_STREAMMODE_SHIFT) @@ -1740,18 +1730,17 @@ enum i40iw_alignment { #define OP_MW_ALLOC 20 #define OP_QP_FLUSH_WQES 21 #define OP_ADD_ARP_CACHE_ENTRY 22 -#define OP_MANAGE_PUSH_PAGE 23 -#define OP_UPDATE_PE_SDS 24 -#define OP_MANAGE_HMC_PM_FUNC_TABLE 25 -#define OP_SUSPEND 26 -#define OP_RESUME 27 -#define OP_MANAGE_VF_PBLE_BP 28 -#define OP_QUERY_FPM_VALUES 29 -#define OP_COMMIT_FPM_VALUES 30 -#define OP_REQUESTED_COMMANDS 31 -#define OP_COMPLETED_COMMANDS 32 -#define OP_GEN_AE 33 -#define OP_QUERY_RDMA_FEATURES 34 -#define OP_SIZE_CQP_STAT_ARRAY 35 +#define OP_UPDATE_PE_SDS 23 +#define OP_MANAGE_HMC_PM_FUNC_TABLE 24 +#define OP_SUSPEND 25 +#define OP_RESUME 26 +#define OP_MANAGE_VF_PBLE_BP 27 +#define OP_QUERY_FPM_VALUES 28 +#define OP_COMMIT_FPM_VALUES 29 +#define OP_REQUESTED_COMMANDS 30 +#define OP_COMPLETED_COMMANDS 31 +#define OP_GEN_AE 32 +#define OP_QUERY_RDMA_FEATURES 33 +#define OP_SIZE_CQP_STAT_ARRAY 34 #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h index d1c5855bd8c3bd..36a19c4e5bbabb 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_status.h +++ b/drivers/infiniband/hw/i40iw/i40iw_status.h @@ -61,7 +61,6 @@ enum i40iw_status_code { I40IW_ERR_QUEUE_EMPTY = -22, I40IW_ERR_INVALID_ALIGNMENT = -23, I40IW_ERR_FLUSHED_QUEUE = -24, - I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25, I40IW_ERR_INVALID_INLINE_DATA_SIZE = -26, I40IW_ERR_TIMEOUT = -27, I40IW_ERR_OPCODE_MISMATCH = -28, diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 1dbf3991cc54ef..394e182686cfec 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -387,7 +387,6 @@ struct i40iw_sc_qp { u8 *q2_buf; u64 qp_compl_ctx; u16 qs_handle; - u16 push_idx; u8 sq_tph_val; u8 rq_tph_val; u8 qp_state; @@ -749,8 +748,6 @@ struct i40iw_qp_host_ctx_info { struct i40iwarp_offload_info *iwarp_info; u32 send_cq_num; u32 rcv_cq_num; - u16 push_idx; - bool push_mode_en; bool tcp_info_valid; bool iwarp_info_valid; bool err_rq_idx_valid; @@ -937,12 +934,6 @@ struct i40iw_local_mac_ipaddr_entry_info { u8 entry_idx; }; -struct i40iw_cqp_manage_push_page_info { - u32 push_idx; - u16 qs_handle; - u8 free_page; -}; - struct i40iw_qp_flush_info { u16 sq_minor_code; u16 sq_major_code; @@ -1114,9 +1105,6 @@ struct i40iw_mr_ops { }; struct i40iw_cqp_misc_ops { - enum i40iw_status_code (*manage_push_page)(struct i40iw_sc_cqp *, - struct i40iw_cqp_manage_push_page_info *, - u64, bool); enum i40iw_status_code (*manage_hmc_pm_func_table)(struct i40iw_sc_cqp *, u64, u8, bool, bool); enum i40iw_status_code (*set_hmc_resource_profile)(struct i40iw_sc_cqp *, @@ -1253,12 +1241,6 @@ struct cqp_info { u64 scratch; } manage_vf_pble_bp; - struct { - struct i40iw_sc_cqp *cqp; - struct i40iw_cqp_manage_push_page_info info; - u64 scratch; - } manage_push_page; - struct { struct i40iw_sc_dev *dev; struct i40iw_upload_context_info info; diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 8afa5a67a86b5c..c3633c9944db4c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -114,17 +114,6 @@ void i40iw_qp_post_wr(struct i40iw_qp_uk *qp) qp->initial_ring.head = qp->sq_ring.head; } -/** - * i40iw_qp_ring_push_db - ring qp doorbell - * @qp: hw qp ptr - * @wqe_idx: wqe index - */ -static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx) -{ - set_32bit_val(qp->push_db, 0, LS_32((wqe_idx >> 2), I40E_PFPE_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id); - qp->initial_ring.head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); -} - /** * i40iw_qp_get_next_send_wqe - return next wqe ptr * @qp: hw qp ptr @@ -426,7 +415,6 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, u64 *wqe; u8 *dest, *src; struct i40iw_inline_rdma_write *op_info; - u64 *push; u64 header = 0; u32 wqe_idx; enum i40iw_status_code ret_code; @@ -453,7 +441,6 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) | LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | LS_64(1, I40IWQPSQ_INLINEDATAFLAG) | - LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) | LS_64(read_fence, I40IWQPSQ_READFENCE) | LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | @@ -475,14 +462,8 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, set_64bit_val(wqe, 24, header); - if (qp->push_db) { - push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20); - memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32); - i40iw_qp_ring_push_db(qp, wqe_idx); - } else { - if (post_sq) - i40iw_qp_post_wr(qp); - } + if (post_sq) + i40iw_qp_post_wr(qp); return 0; } @@ -507,7 +488,6 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, enum i40iw_status_code ret_code; bool read_fence = false; u8 wqe_size; - u64 *push; op_info = &info->op.inline_send; if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE) @@ -526,7 +506,6 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, LS_64(info->op_type, I40IWQPSQ_OPCODE) | LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | LS_64(1, I40IWQPSQ_INLINEDATAFLAG) | - LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) | LS_64(read_fence, I40IWQPSQ_READFENCE) | LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | @@ -548,14 +527,8 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, set_64bit_val(wqe, 24, header); - if (qp->push_db) { - push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20); - memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32); - i40iw_qp_ring_push_db(qp, wqe_idx); - } else { - if (post_sq) - i40iw_qp_post_wr(qp); - } + if (post_sq) + i40iw_qp_post_wr(qp); return 0; } @@ -772,7 +745,6 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, q_type = (u8)RS_64(qword3, I40IW_CQ_SQ); info->error = (bool)RS_64(qword3, I40IW_CQ_ERROR); - info->push_dropped = (bool)RS_64(qword3, I40IWCQ_PSHDROP); if (info->error) { info->comp_status = I40IW_COMPL_STATUS_FLUSHED; info->major_err = (bool)RS_64(qword3, I40IW_CQ_MAJERR); @@ -951,7 +923,6 @@ enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth) static const struct i40iw_qp_uk_ops iw_qp_uk_ops = { .iw_qp_post_wr = i40iw_qp_post_wr, - .iw_qp_ring_push_db = i40iw_qp_ring_push_db, .iw_rdma_write = i40iw_rdma_write, .iw_rdma_read = i40iw_rdma_read, .iw_send = i40iw_send, @@ -1009,11 +980,7 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp, qp->wqe_alloc_reg = info->wqe_alloc_reg; qp->qp_id = info->qp_id; - qp->sq_size = info->sq_size; - qp->push_db = info->push_db; - qp->push_wqe = info->push_wqe; - qp->max_sq_frag_cnt = info->max_sq_frag_cnt; sq_ring_size = qp->sq_size << sqshift; diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index b125925641e01b..93fc3081dd654f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -64,13 +64,11 @@ enum i40iw_device_capabilities_const { I40IW_MAX_SGE_RD = 1, I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647, I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647, - I40IW_MAX_PUSH_PAGE_COUNT = 4096, I40IW_MAX_PE_ENABLED_VF_COUNT = 32, I40IW_MAX_VF_FPM_ID = 47, I40IW_MAX_VF_PER_PF = 127, I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496, I40IW_MAX_INLINE_DATA_SIZE = 48, - I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48, I40IW_MAX_IRD_SIZE = 64, I40IW_MAX_ORD_SIZE = 127, I40IW_MAX_WQ_ENTRIES = 2048, @@ -272,7 +270,6 @@ struct i40iw_cq_poll_info { u16 minor_err; u8 op_type; bool stag_invalid_set; - bool push_dropped; bool error; bool is_srq; bool solicited_event; @@ -280,7 +277,6 @@ struct i40iw_cq_poll_info { struct i40iw_qp_uk_ops { void (*iw_qp_post_wr)(struct i40iw_qp_uk *); - void (*iw_qp_ring_push_db)(struct i40iw_qp_uk *, u32); enum i40iw_status_code (*iw_rdma_write)(struct i40iw_qp_uk *, struct i40iw_post_sq_info *, bool); enum i40iw_status_code (*iw_rdma_read)(struct i40iw_qp_uk *, @@ -340,8 +336,6 @@ struct i40iw_qp_uk { struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array; u64 *rq_wrid_array; u64 *shadow_area; - u32 *push_db; - u64 *push_wqe; struct i40iw_ring sq_ring; struct i40iw_ring rq_ring; struct i40iw_ring initial_ring; @@ -381,8 +375,6 @@ struct i40iw_qp_uk_init_info { u64 *shadow_area; struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array; u64 *rq_wrid_array; - u32 *push_db; - u64 *push_wqe; u32 qp_id; u32 sq_size; u32 rq_size; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index aab69c6ddd7cdd..65aedfe57e7766 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -179,78 +179,6 @@ static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) pgprot_noncached(vma->vm_page_prot), NULL); } -/** - * i40iw_alloc_push_page - allocate a push page for qp - * @iwdev: iwarp device - * @qp: hardware control qp - */ -static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp) -{ - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; - enum i40iw_status_code status; - - if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX) - return; - - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true); - if (!cqp_request) - return; - - atomic_inc(&cqp_request->refcount); - - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; - cqp_info->post_sq = 1; - - cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; - cqp_info->in.u.manage_push_page.info.free_page = 0; - cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; - cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; - - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - qp->push_idx = cqp_request->compl_info.op_ret_val; - else - i40iw_pr_err("CQP-OP Push page fail"); - i40iw_put_cqp_request(&iwdev->cqp, cqp_request); -} - -/** - * i40iw_dealloc_push_page - free a push page for qp - * @iwdev: iwarp device - * @qp: hardware control qp - */ -static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp) -{ - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; - enum i40iw_status_code status; - - if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) - return; - - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); - if (!cqp_request) - return; - - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; - cqp_info->post_sq = 1; - - cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx; - cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; - cqp_info->in.u.manage_push_page.info.free_page = 1; - cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; - cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; - - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; - else - i40iw_pr_err("CQP-OP Push page fail"); -} - /** * i40iw_alloc_pd - allocate protection domain * @pd: PD pointer @@ -348,7 +276,6 @@ void i40iw_free_qp_resources(struct i40iw_qp *iwqp) u32 qp_num = iwqp->ibqp.qp_num; i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); - i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp); if (qp_num) i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num); if (iwpbl->pbl_allocated) @@ -561,8 +488,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; - qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; - iwqp->iwdev = iwdev; iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info; @@ -606,8 +531,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, err_code = -EOPNOTSUPP; goto error; } - if (iwdev->push_mode) - i40iw_alloc_push_page(iwdev, qp); if (udata) { err_code = ib_copy_from_udata(&req, udata, sizeof(req)); if (err_code) { @@ -666,13 +589,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, ctx_info->iwarp_info_valid = true; ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; - if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) { - ctx_info->push_mode_en = false; - } else { - ctx_info->push_mode_en = true; - ctx_info->push_idx = qp->push_idx; - } - ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)iwqp->host_ctx.va, ctx_info); @@ -712,7 +628,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, uresp.actual_sq_size = sq_size; uresp.actual_rq_size = rq_size; uresp.qp_id = qp_num; - uresp.push_idx = qp->push_idx; + uresp.push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (err_code) { i40iw_pr_err("copy_to_udata failed\n"); From 0fd0175e30e487f8d70ecb2cdd67fbb514fdf50f Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Sat, 28 Nov 2020 18:22:37 +0800 Subject: [PATCH 112/147] RDMA/hns: Fix 0-length sge calculation error One RC SQ WQE can store 2 sges but UD can't, so ignore 2 valid sges of wr.sglist for RC which have been filled in WQE before setting extended sge. Either of RC and UD can not contain 0-length sges, so these 0-length sges should be skipped. Fixes: 54d6638765b0 ("RDMA/hns: Optimize WQE buffer size calculating process") Link: https://lore.kernel.org/r/1606558959-48510-2-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 24 +++++++++------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8575ad7acce237..8d37067a736d6f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -214,25 +214,20 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, return 0; } -static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, - unsigned int *sge_ind, unsigned int valid_num_sge) +static void set_extend_sge(struct hns_roce_qp *qp, struct ib_sge *sge, + unsigned int *sge_ind, unsigned int cnt) { struct hns_roce_v2_wqe_data_seg *dseg; - unsigned int cnt = valid_num_sge; - struct ib_sge *sge = wr->sg_list; unsigned int idx = *sge_ind; - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - cnt -= HNS_ROCE_SGE_IN_WQE; - sge += HNS_ROCE_SGE_IN_WQE; - } - while (cnt > 0) { dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); - set_data_seg_v2(dseg, sge); - idx++; + if (likely(sge->length)) { + set_data_seg_v2(dseg, sge); + idx++; + cnt--; + } sge++; - cnt--; } *sge_ind = idx; @@ -340,7 +335,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, } } - set_extend_sge(qp, wr, sge_ind, valid_num_sge); + set_extend_sge(qp, wr->sg_list + i, sge_ind, + valid_num_sge - HNS_ROCE_SGE_IN_WQE); } roce_set_field(rc_sq_wqe->byte_16, @@ -503,7 +499,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, if (ret) return ret; - set_extend_sge(qp, wr, &curr_idx, valid_num_sge); + set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge); /* * The pipeline can sequentially post all valid WQEs into WQ buffer, From d34895c319faa1e0fc1a48c3b06bba6a8a39ba44 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Sat, 28 Nov 2020 18:22:38 +0800 Subject: [PATCH 113/147] RDMA/hns: Bugfix for calculation of extended sge Page alignment is required when setting the number of extended sge according to the hardware's achivement. If the space of needed extended sge is greater than one page, the roundup_pow_of_two() can ensure that. But if the needed extended sge isn't 0 and can not be filled in a whole page, the driver should align it specifically. Fixes: 54d6638765b0 ("RDMA/hns: Optimize WQE buffer size calculating process") Link: https://lore.kernel.org/r/1606558959-48510-3-git-send-email-liweihang@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_qp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 62da30a1575aa7..0cfa374194e362 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -493,7 +493,12 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, } hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - hr_qp->sge.sge_cnt = cnt; + + /* If the number of extended sge is not zero, they MUST use the + * space of HNS_HW_PAGE_SIZE at least. + */ + hr_qp->sge.sge_cnt = cnt ? + max(cnt, (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE) : 0; return 0; } From 05201e01be937be47e4c970c0a9eb6b6fb375b1e Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Sat, 28 Nov 2020 18:22:39 +0800 Subject: [PATCH 114/147] RDMA/hns: Refactor process of setting extended sge The variable 'cnt' is used to represent the max number of sge an SQ WQE can use at first, then it means how many extended sge an SQ has. In addition, this function has no need to return a value. So refactor and encapsulate the parts of getting number of extended sge a WQE can use to make it easier to understand. Link: https://lore.kernel.org/r/1606558959-48510-4-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_qp.c | 60 ++++++++++++------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 0cfa374194e362..34aa086060d3f0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -465,42 +465,43 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return 0; } -static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, - struct hns_roce_qp *hr_qp, - struct ib_qp_cap *cap) +static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) { - u32 cnt; + /* GSI/UD QP only has extended sge */ + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) + return qp->sq.max_gs; - cnt = max(1U, cap->max_send_sge); - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { - hr_qp->sq.max_gs = roundup_pow_of_two(cnt); - hr_qp->sge.sge_cnt = 0; + if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) + return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; - return 0; - } + return 0; +} - hr_qp->sq.max_gs = cnt; +static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, + struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) +{ + u32 total_sge_cnt; + u32 wqe_sge_cnt; - /* UD sqwqe's sge use extend sge */ - if (hr_qp->ibqp.qp_type == IB_QPT_GSI || - hr_qp->ibqp.qp_type == IB_QPT_UD) { - cnt = roundup_pow_of_two(sq_wqe_cnt * hr_qp->sq.max_gs); - } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { - cnt = roundup_pow_of_two(sq_wqe_cnt * - (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); - } else { - cnt = 0; + hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; + + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + hr_qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; + return; } - hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; + hr_qp->sq.max_gs = max(1U, cap->max_send_sge); + + wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp); /* If the number of extended sge is not zero, they MUST use the * space of HNS_HW_PAGE_SIZE at least. */ - hr_qp->sge.sge_cnt = cnt ? - max(cnt, (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE) : 0; - - return 0; + if (wqe_sge_cnt) { + total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt); + hr_qp->sge.sge_cnt = max(total_sge_cnt, + (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); + } } static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, @@ -545,9 +546,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, return ret; } - ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); - if (ret) - return ret; + set_ext_sge_param(hr_dev, cnt, hr_qp, cap); hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; @@ -612,7 +611,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, { struct ib_device *ibdev = &hr_dev->ib_dev; u32 cnt; - int ret; if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || cap->max_send_sge > hr_dev->caps.max_sq_sg) { @@ -632,9 +630,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); hr_qp->sq.wqe_cnt = cnt; - ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); - if (ret) - return ret; + set_ext_sge_param(hr_dev, cnt, hr_qp, cap); /* sync the parameters of kernel QP to user's configuration */ cap->max_send_wr = cnt; From 87524494a7d939e6e120e893e2bdcc35599dfda1 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 1 Dec 2020 11:18:11 +0200 Subject: [PATCH 115/147] RDMA/efa: Use dma_set_mask_and_coherent() to simplify code Use dma_set_mask_and_coherent() instead of pci_set_dma_mask() followed by a pci_set_consistent_dma_mask(). Link: https://lore.kernel.org/r/20201201091811.37984-1-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_main.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index cb2f2c647ee565..0f578734bddb60 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -384,19 +384,12 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev) return err; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_width)); if (err) { - dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err); + dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", err); return err; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); - if (err) { - dev_err(&pdev->dev, - "err_pci_set_consistent_dma_mask failed %d\n", - err); - return err; - } dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; } From b9653b31d7767b7dccc8b24b660301be90449036 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 30 Nov 2020 09:58:35 +0200 Subject: [PATCH 116/147] RDMA/uverbs: Tidy input validation of ib_uverbs_rereg_mr() Unknown flags should be EOPNOTSUPP, only zero flags is EINVAL. Flags is actually the rereg action to perform. The checking of the start/hca_va/etc is also redundant and ib_umem_get() does these checks and returns proper error codes. Fixes: 7e6edb9b2e0b ("IB/core: Add user MR re-registration support") Link: https://lore.kernel.org/r/20201130075839.278575-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 402d0b8bf58e72..143a0e33fe5228 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -783,13 +783,15 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) if (ret) return ret; - if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) + if (!cmd.flags) return -EINVAL; + if (cmd.flags & ~IB_MR_REREG_SUPPORTED) + return -EOPNOTSUPP; + if ((cmd.flags & IB_MR_REREG_TRANS) && - (!cmd.start || !cmd.hca_va || 0 >= cmd.length || - (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) - return -EINVAL; + (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) + return -EINVAL; uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs); if (IS_ERR(uobj)) From adac4cb3c1ff5c47c9f47be5d017a0e054176e3c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 30 Nov 2020 09:58:36 +0200 Subject: [PATCH 117/147] RDMA/uverbs: Check ODP in ib_check_mr_access() as well No reason only one caller checks this. This properly blocks ODP from the rereg flow if the device does not support ODP. Link: https://lore.kernel.org/r/20201130075839.278575-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 19 +++++-------------- drivers/infiniband/core/uverbs_std_types_mr.c | 2 +- drivers/infiniband/hw/mlx5/devx.c | 2 +- include/rdma/ib_verbs.h | 6 +++++- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 143a0e33fe5228..817b25045acdc0 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -709,29 +709,20 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; - ret = ib_check_mr_access(cmd.access_flags); - if (ret) - return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); + ret = ib_check_mr_access(ib_dev, cmd.access_flags); + if (ret) + goto err_free; + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_free; } - if (cmd.access_flags & IB_ACCESS_ON_DEMAND) { - if (!(pd->device->attrs.device_cap_flags & - IB_DEVICE_ON_DEMAND_PAGING)) { - pr_debug("ODP support not available\n"); - ret = -EINVAL; - goto err_put; - } - } - mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, cmd.access_flags, &attrs->driver_udata); @@ -805,7 +796,7 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) } if (cmd.flags & IB_MR_REREG_ACCESS) { - ret = ib_check_mr_access(cmd.access_flags); + ret = ib_check_mr_access(mr->device, cmd.access_flags); if (ret) goto put_uobjs; } diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index dc58564417292a..dd4e76b26c7459 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -115,7 +115,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( if (!(attr.access_flags & IB_ZERO_BASED)) return -EINVAL; - ret = ib_check_mr_access(attr.access_flags); + ret = ib_check_mr_access(ib_dev, attr.access_flags); if (ret) return ret; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ad0173f62c0e95..819c142857d650 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2068,7 +2068,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - err = ib_check_mr_access(access); + err = ib_check_mr_access(&dev->ib_dev, access); if (err) return err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7bee8abae35cf4..4fcbc6d3d0e00f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4183,7 +4183,8 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, struct inode *inode, struct ib_udata *udata); int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); -static inline int ib_check_mr_access(int flags) +static inline int ib_check_mr_access(struct ib_device *ib_dev, + unsigned int flags) { /* * Local write permission is required if remote write or @@ -4196,6 +4197,9 @@ static inline int ib_check_mr_access(int flags) if (flags & ~IB_ACCESS_SUPPORTED) return -EINVAL; + if (flags & IB_ACCESS_ON_DEMAND && + !(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) + return -EINVAL; return 0; } From 6e0954b11c056570cb29676a84e2f8dc4d1dd05e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 30 Nov 2020 09:58:37 +0200 Subject: [PATCH 118/147] RDMA/uverbs: Allow drivers to create a new HW object during rereg_mr mlx5 has an ugly flow where it tries to allocate a new MR and replace the existing MR in the same memory during rereg. This is very complicated and buggy. Instead of trying to replace in-place inside the driver, provide support from uverbs to change the entire HW object assigned to a handle during rereg_mr. Since destroying a MR is allowed to fail (ie if a MW is pointing at it) and can't be detected in advance, the algorithm creates a completely new uobject to hold the new MR and swaps the IDR entries of the two objects. The old MR in the temporary IDR entry is destroyed, and if it fails rereg_mr succeeds and destruction is deferred to FD release. This complexity is why this cannot live in a driver safely. Link: https://lore.kernel.org/r/20201130075839.278575-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 51 +++++++++++++ drivers/infiniband/core/uverbs_cmd.c | 85 ++++++++++++++++----- drivers/infiniband/hw/hns/hns_roce_device.h | 7 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 15 ++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 8 +- drivers/infiniband/hw/mlx4/mr.c | 16 ++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +- drivers/infiniband/hw/mlx5/mr.c | 15 ++-- include/rdma/ib_verbs.h | 7 +- include/rdma/uverbs_types.h | 5 ++ 10 files changed, 160 insertions(+), 55 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index c44079b9158e14..75eafd9208aa34 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -595,6 +595,27 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj) WARN_ON(old != NULL); } +static void swap_idr_uobjects(struct ib_uobject *obj_old, + struct ib_uobject *obj_new) +{ + struct ib_uverbs_file *ufile = obj_old->ufile; + void *old; + + /* + * New must be an object that been allocated but not yet committed, this + * moves the pre-committed state to obj_old, new still must be comitted. + */ + old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY, + GFP_KERNEL); + if (WARN_ON(old != obj_old)) + return; + + swap(obj_old->id, obj_new->id); + + old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL); + WARN_ON(old != NULL); +} + static void alloc_commit_fd_uobject(struct ib_uobject *uobj) { int fd = uobj->id; @@ -640,6 +661,35 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj, up_read(&ufile->hw_destroy_rwsem); } +/* + * new_uobj will be assigned to the handle currently used by to_uobj, and + * to_uobj will be destroyed. + * + * Upon return the caller must do: + * rdma_alloc_commit_uobject(new_uobj) + * uobj_put_destroy(to_uobj) + * + * to_uobj must have a write get but the put mode switches to destroy once + * this is called. + */ +void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj, + struct uverbs_attr_bundle *attrs) +{ + assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE); + + if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object || + !to_uobj->uapi_object->type_class->swap_uobjects)) + return; + + to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj); + + /* + * If this fails then the uobject is still completely valid (though with + * a new ID) and we leak it until context close. + */ + uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs); +} + /* * This consumes the kref for uobj. It is up to the caller to unwind the HW * object and anything else connected to uobj before calling this. @@ -747,6 +797,7 @@ const struct uverbs_obj_type_class uverbs_idr_class = { .lookup_put = lookup_put_idr_uobject, .destroy_hw = destroy_hw_idr_uobject, .remove_handle = remove_handle_idr_uobject, + .swap_uobjects = swap_idr_uobjects, }; EXPORT_SYMBOL(uverbs_idr_class); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 817b25045acdc0..98a5d36813ff78 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -764,11 +764,14 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_rereg_mr cmd; struct ib_uverbs_rereg_mr_resp resp; - struct ib_pd *pd = NULL; struct ib_mr *mr; - struct ib_pd *old_pd; int ret; struct ib_uobject *uobj; + struct ib_uobject *new_uobj; + struct ib_device *ib_dev; + struct ib_pd *orig_pd; + struct ib_pd *new_pd; + struct ib_mr *new_mr; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) @@ -801,31 +804,69 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) goto put_uobjs; } + orig_pd = mr->pd; if (cmd.flags & IB_MR_REREG_PD) { - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, - attrs); - if (!pd) { + new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, + attrs); + if (!new_pd) { ret = -EINVAL; goto put_uobjs; } + } else { + new_pd = mr->pd; } - old_pd = mr->pd; - ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start, - cmd.length, cmd.hca_va, - cmd.access_flags, pd, - &attrs->driver_udata); - if (ret) + /* + * The driver might create a new HW object as part of the rereg, we need + * to have a uobject ready to hold it. + */ + new_uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); + if (IS_ERR(new_uobj)) { + ret = PTR_ERR(new_uobj); goto put_uobj_pd; - - if (cmd.flags & IB_MR_REREG_PD) { - atomic_inc(&pd->usecnt); - mr->pd = pd; - atomic_dec(&old_pd->usecnt); } - if (cmd.flags & IB_MR_REREG_TRANS) - mr->iova = cmd.hca_va; + new_mr = ib_dev->ops.rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length, + cmd.hca_va, cmd.access_flags, new_pd, + &attrs->driver_udata); + if (IS_ERR(new_mr)) { + ret = PTR_ERR(new_mr); + goto put_new_uobj; + } + if (new_mr) { + new_mr->device = new_pd->device; + new_mr->pd = new_pd; + new_mr->type = IB_MR_TYPE_USER; + new_mr->dm = NULL; + new_mr->sig_attrs = NULL; + new_mr->uobject = uobj; + atomic_inc(&new_pd->usecnt); + new_mr->iova = cmd.hca_va; + new_uobj->object = new_mr; + + rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR); + rdma_restrack_set_name(&new_mr->res, NULL); + rdma_restrack_add(&new_mr->res); + + /* + * The new uobj for the new HW object is put into the same spot + * in the IDR and the old uobj & HW object is deleted. + */ + rdma_assign_uobject(uobj, new_uobj, attrs); + rdma_alloc_commit_uobject(new_uobj, attrs); + uobj_put_destroy(uobj); + new_uobj = NULL; + uobj = NULL; + mr = new_mr; + } else { + if (cmd.flags & IB_MR_REREG_PD) { + atomic_dec(&orig_pd->usecnt); + mr->pd = new_pd; + atomic_inc(&new_pd->usecnt); + } + if (cmd.flags & IB_MR_REREG_TRANS) + mr->iova = cmd.hca_va; + } memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; @@ -833,12 +874,16 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) ret = uverbs_response(attrs, &resp, sizeof(resp)); +put_new_uobj: + if (new_uobj) + uobj_alloc_abort(new_uobj, attrs); put_uobj_pd: if (cmd.flags & IB_MR_REREG_PD) - uobj_put_obj_read(pd); + uobj_put_obj_read(new_pd); put_uobjs: - uobj_put_write(uobj); + if (uobj) + uobj_put_write(uobj); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a5c6bb073569e1..70ae37bad77ed2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1223,9 +1223,10 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata); +struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 87e2e6236c6922..98671925debf70 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -328,9 +328,10 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, return ret; } -int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata) +struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct ib_device *ib_dev = &hr_dev->ib_dev; @@ -341,11 +342,11 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, int ret; if (!mr->enabled) - return -EINVAL; + return ERR_PTR(-EINVAL); mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + return ERR_CAST(mailbox); mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, @@ -390,12 +391,12 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return 0; + return NULL; free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return ret; + return ERR_PTR(ret); } int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 58df06492d69e4..78c9bb79ec75f7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -908,10 +908,10 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); -int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, - u64 start, u64 length, u64 virt_addr, - int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata); +struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, const struct ib_gid_attr *attr); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 426fed005d538b..50becc0e4b6223 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -456,10 +456,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(err); } -int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, - u64 start, u64 length, u64 virt_addr, - int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata) +struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(mr->device); struct mlx4_ib_mr *mmr = to_mmr(mr); @@ -472,9 +472,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, * race exists. */ err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); - if (err) - return err; + return ERR_PTR(err); if (flags & IB_MR_REREG_PD) { err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, @@ -542,8 +541,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, release_mpt_entry: mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); - - return err; + if (err) + return ERR_PTR(err); + return NULL; } static int diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 718e59fce006d4..ab84d4efbda31c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1254,9 +1254,9 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int access_flags, - struct ib_pd *pd, struct ib_udata *udata); +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int access_flags, + struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b6116f6d065de3..778cc08e17ad46 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1620,9 +1620,10 @@ static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, return err; } -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int new_access_flags, - struct ib_pd *new_pd, struct ib_udata *udata) +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, + int new_access_flags, struct ib_pd *new_pd, + struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); struct mlx5_ib_mr *mr = to_mmr(ib_mr); @@ -1638,10 +1639,10 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, start, virt_addr, length, access_flags); if (!mr->umem) - return -EINVAL; + return ERR_PTR(-EINVAL); if (is_odp_mr(mr)) - return -EOPNOTSUPP; + return ERR_PTR(-EOPNOTSUPP); if (flags & IB_MR_REREG_TRANS) { addr = virt_addr; @@ -1717,14 +1718,14 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, len, access_flags); - return 0; + return NULL; err: ib_umem_release(mr->umem); mr->umem = NULL; clean_mr(dev, mr); - return err; + return ERR_PTR(err); } static int diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4fcbc6d3d0e00f..3be1d1194a17f7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2433,9 +2433,10 @@ struct ib_device_ops { struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); - int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_pd *pd, struct ib_udata *udata); + struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index a27e9fb4903f3d..ccd11631c167c0 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -71,6 +71,8 @@ struct uverbs_obj_type_class { enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); + void (*swap_uobjects)(struct ib_uobject *obj_old, + struct ib_uobject *obj_new); }; struct uverbs_obj_type { @@ -116,6 +118,9 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, bool hw_obj_valid); void rdma_alloc_commit_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); +void rdma_assign_uobject(struct ib_uobject *to_uobj, + struct ib_uobject *new_uobj, + struct uverbs_attr_bundle *attrs); /* * uverbs_uobject_get is called in order to increase the reference count on From 38f8ff5b4438876a7d5b2f8b54eb46c7d5154457 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 30 Nov 2020 09:58:38 +0200 Subject: [PATCH 119/147] RDMA/mlx5: Reorganize mlx5_ib_reg_user_mr() This function handles an ODP and regular MR flow all mushed together, even though the two flows are quite different. Split them into two dedicated functions. Link: https://lore.kernel.org/r/20201130075839.278575-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 +- drivers/infiniband/hw/mlx5/mr.c | 249 ++++++++++++++------------- drivers/infiniband/hw/mlx5/odp.c | 16 +- 3 files changed, 140 insertions(+), 129 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index ab84d4efbda31c..fac495e7834e17 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1340,7 +1340,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); -int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable); +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1362,7 +1362,7 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } -static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { return -EOPNOTSUPP; } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 778cc08e17ad46..8fb8f8a2c72e9e 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -56,6 +56,10 @@ enum { static void create_mkey_callback(int status, struct mlx5_async_work *context); +static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, + struct ib_umem *umem, u64 iova, + int access_flags, unsigned int page_size, + bool populate); static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, struct ib_pd *pd) @@ -875,32 +879,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static struct ib_umem *mr_umem_get(struct mlx5_ib_dev *dev, u64 start, - u64 length, int access_flags) -{ - struct ib_umem *u; - - if (access_flags & IB_ACCESS_ON_DEMAND) { - struct ib_umem_odp *odp; - - odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, - &mlx5_mn_ops); - if (IS_ERR(odp)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", - PTR_ERR(odp)); - return ERR_CAST(odp); - } - return &odp->umem; - } - - u = ib_umem_get(&dev->ib_dev, start, length, access_flags); - if (IS_ERR(u)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); - return u; - } - return u; -} - static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = @@ -957,9 +935,18 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, return &cache->ent[order]; } -static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, - struct ib_umem *umem, u64 iova, - int access_flags) +static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, + u64 length, int access_flags) +{ + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + mr->ibmr.length = length; + mr->access_flags = access_flags; +} + +static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, + struct ib_umem *umem, u64 iova, + int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_cache_ent *ent; @@ -971,16 +958,26 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, return ERR_PTR(-EINVAL); ent = mr_cache_ent_from_order( dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); - if (!ent) - return ERR_PTR(-E2BIG); - - /* Matches access in alloc_cache_mr() */ - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) - return ERR_PTR(-EOPNOTSUPP); + /* + * Matches access in alloc_cache_mr(). If the MR can't come from the + * cache then synchronously create an uncached one. + */ + if (!ent || ent->limit == 0 || + !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { + mutex_lock(&dev->slow_path_mutex); + mr = reg_create(NULL, pd, umem, iova, access_flags, page_size, + false); + mutex_unlock(&dev->slow_path_mutex); + return mr; + } mr = get_cache_mr(ent); if (!mr) { mr = create_cache_mr(ent); + /* + * The above already tried to do the same stuff as reg_create(), + * no reason to try it again. + */ if (IS_ERR(mr)) return mr; } @@ -993,6 +990,8 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd, mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; mr->page_shift = order_base_2(page_size); + mr->umem = umem; + set_mr_fields(dev, mr, umem->length, access_flags); return mr; } @@ -1279,10 +1278,10 @@ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) */ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, struct ib_umem *umem, u64 iova, - int access_flags, bool populate) + int access_flags, unsigned int page_size, + bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - unsigned int page_size; struct mlx5_ib_mr *mr; __be64 *pas; void *mkc; @@ -1291,11 +1290,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); - page_size = - mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); - if (WARN_ON(!page_size)) - return ERR_PTR(-EINVAL); - + if (!page_size) { + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, + 0, iova); + if (!page_size) + return ERR_PTR(-EINVAL); + } mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -1352,6 +1352,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, mr->mmkey.type = MLX5_MKEY_MR; mr->desc_size = sizeof(struct mlx5_mtt); mr->dev = dev; + mr->umem = umem; + set_mr_fields(dev, mr, umem->length, access_flags); kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); @@ -1368,15 +1370,6 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, return ERR_PTR(err); } -static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 length, int access_flags) -{ - mr->ibmr.lkey = mr->mmkey.key; - mr->ibmr.rkey = mr->mmkey.key; - mr->ibmr.length = length; - mr->access_flags = access_flags; -} - static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, u64 length, int acc, int mode) { @@ -1471,70 +1464,32 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, attr->access_flags, mode); } -struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int access_flags, - struct ib_udata *udata) +static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; bool xlt_with_umr; - struct ib_umem *umem; int err; - if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) - return ERR_PTR(-EOPNOTSUPP); - - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", - start, virt_addr, length, access_flags); - - xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length); - /* ODP requires xlt update via umr to work. */ - if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND)) - return ERR_PTR(-EINVAL); - - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && - length == U64_MAX) { - if (virt_addr != start) - return ERR_PTR(-EINVAL); - if (!(access_flags & IB_ACCESS_ON_DEMAND) || - !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) - return ERR_PTR(-EINVAL); - - mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); - if (IS_ERR(mr)) - return ERR_CAST(mr); - return &mr->ibmr; - } - - umem = mr_umem_get(dev, start, length, access_flags); - if (IS_ERR(umem)) - return ERR_CAST(umem); - + xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length); if (xlt_with_umr) { - mr = alloc_mr_from_cache(pd, umem, virt_addr, access_flags); - if (IS_ERR(mr)) - mr = NULL; - } - - if (!mr) { + mr = alloc_cacheable_mr(pd, umem, iova, access_flags); + } else { mutex_lock(&dev->slow_path_mutex); - mr = reg_create(NULL, pd, umem, virt_addr, access_flags, - !xlt_with_umr); + mr = reg_create(NULL, pd, umem, iova, access_flags, 0, true); mutex_unlock(&dev->slow_path_mutex); } - if (IS_ERR(mr)) { - err = PTR_ERR(mr); - goto error; + ib_umem_release(umem); + return ERR_CAST(mr); } mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); - mr->umem = umem; - atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); - set_mr_fields(dev, mr, length, access_flags); + atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); - if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { + if (xlt_with_umr) { /* * If the MR was created with reg_create then it will be * configured properly but left disabled. It is safe to go ahead @@ -1546,32 +1501,88 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(err); } } + return &mr->ibmr; +} - if (is_odp_mr(mr)) { - to_ib_umem_odp(mr->umem)->private = mr; - init_waitqueue_head(&mr->q_deferred_work); - atomic_set(&mr->num_deferred_work, 0); - err = xa_err(xa_store(&dev->odp_mkeys, - mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, - GFP_KERNEL)); - if (err) { - dereg_mr(dev, mr); - return ERR_PTR(err); - } +static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_odp *odp; + struct mlx5_ib_mr *mr; + int err; - err = mlx5_ib_init_odp_mr(mr, xlt_with_umr); - if (err) { - dereg_mr(dev, mr); - return ERR_PTR(err); - } + if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + return ERR_PTR(-EOPNOTSUPP); + + if (!start && length == U64_MAX) { + if (iova != 0) + return ERR_PTR(-EINVAL); + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + return ERR_PTR(-EINVAL); + + mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); + if (IS_ERR(mr)) + return ERR_CAST(mr); + return &mr->ibmr; } + /* ODP requires xlt update via umr to work. */ + if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + return ERR_PTR(-EINVAL); + + odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, + &mlx5_mn_ops); + if (IS_ERR(odp)) + return ERR_CAST(odp); + + mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags); + if (IS_ERR(mr)) { + ib_umem_release(&odp->umem); + return ERR_CAST(mr); + } + + odp->private = mr; + init_waitqueue_head(&mr->q_deferred_work); + atomic_set(&mr->num_deferred_work, 0); + err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL)); + if (err) + goto err_dereg_mr; + + err = mlx5_ib_init_odp_mr(mr); + if (err) + goto err_dereg_mr; return &mr->ibmr; -error: - ib_umem_release(umem); + +err_dereg_mr: + dereg_mr(dev, mr); return ERR_PTR(err); } +struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem *umem; + + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) + return ERR_PTR(-EOPNOTSUPP); + + mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, iova, length, access_flags); + + if (access_flags & IB_ACCESS_ON_DEMAND) + return create_user_odp_mr(pd, start, length, iova, access_flags, + udata); + umem = ib_umem_get(&dev->ib_dev, start, length, access_flags); + if (IS_ERR(umem)) + return ERR_CAST(umem); + return create_real_mr(pd, umem, iova, access_flags); +} + /** * mlx5_mr_cache_invalidate - Fence all DMA on the MR * @mr: The MR to fence @@ -1661,7 +1672,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, atomic_sub(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); ib_umem_release(mr->umem); - mr->umem = mr_umem_get(dev, addr, len, access_flags); + mr->umem = ib_umem_get(&dev->ib_dev, addr, len, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); mr->umem = NULL; @@ -1685,7 +1696,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (err) goto err; - mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, true); + mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, 0, true); if (IS_ERR(mr)) { err = PTR_ERR(mr); mr = to_mmr(ib_mr); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 5c853ec1b0d81e..f4a28a01218741 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -536,6 +536,10 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct mlx5_ib_mr *imr; int err; + if (!mlx5_ib_can_load_pas_with_umr(dev, + MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) + return ERR_PTR(-EOPNOTSUPP); + umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); @@ -831,17 +835,13 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, flags); } -int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { - u32 flags = MLX5_PF_FLAGS_SNAPSHOT; int ret; - if (enable) - flags |= MLX5_PF_FLAGS_ENABLE; - - ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), - mr->umem->address, mr->umem->length, NULL, - flags); + ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address, + mr->umem->length, NULL, + MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE); return ret >= 0 ? 0 : ret; } From ef3642c4f54d3493c92c71faf46139b2473bc532 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 30 Nov 2020 09:58:39 +0200 Subject: [PATCH 120/147] RDMA/mlx5: Fix error unwinds for rereg_mr This is all a giant train wreck of error handling, in many cases the MR is left in some corrupted state where continuing on is going to lead to chaos, or various unwinds/order is missed. rereg had three possible completely different actions, depending on flags and various details about the MR. Split the three actions into three functions, and call the right action from the start. For each action carefully design the error handling to fit the action: - UMR access/PD update is a simple UMR, if it fails the MR isn't changed, so do nothing - PAS update over UMR is multiple UMR operations. To keep everything sane revoke access to the MKey while it is being changed and restore it once the MR is correct. - Recreating the mkey should completely build a parallel MR with a fully loaded PAS then swap and destroy the old one. If it fails the original should be left untouched. This is handled in the core code. Directly call the normal MR creation functions, possibly re-using the existing umem. Add support for working with ODP MRs. The READ/WRITE access flags can be changed by UMR and we can trivially convert to/from ODP MRs using the logic to build a completely new MR. This new logic also fixes various problems with MRs continuing to work while their PAS lists are no longer valid, eg during a page size change. Link: https://lore.kernel.org/r/20201130075839.278575-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 316 +++++++++++++++++++------------- 1 file changed, 188 insertions(+), 128 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8fb8f8a2c72e9e..281032d2827f3b 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -56,10 +56,9 @@ enum { static void create_mkey_callback(int status, struct mlx5_async_work *context); -static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, - struct ib_umem *umem, u64 iova, - int access_flags, unsigned int page_size, - bool populate); +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags, + unsigned int page_size, bool populate); static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, struct ib_pd *pd) @@ -134,15 +133,6 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start, - u64 length) -{ - if (!mr->cache_ent) - return false; - return ((u64)1 << mr->cache_ent->order) * MLX5_ADAPTER_PAGE_SIZE >= - length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); -} - static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = @@ -965,8 +955,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, if (!ent || ent->limit == 0 || !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { mutex_lock(&dev->slow_path_mutex); - mr = reg_create(NULL, pd, umem, iova, access_flags, page_size, - false); + mr = reg_create(pd, umem, iova, access_flags, page_size, false); mutex_unlock(&dev->slow_path_mutex); return mr; } @@ -1276,10 +1265,9 @@ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) * If ibmr is NULL it will be allocated by reg_create. * Else, the given ibmr will be used. */ -static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, - struct ib_umem *umem, u64 iova, - int access_flags, unsigned int page_size, - bool populate) +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags, + unsigned int page_size, bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr; @@ -1290,13 +1278,9 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); - if (!page_size) { - page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, - 0, iova); - if (!page_size) - return ERR_PTR(-EINVAL); - } - mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); + if (!page_size) + return ERR_PTR(-EINVAL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -1362,11 +1346,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, err_2: kvfree(in); - err_1: - if (!ibmr) - kfree(mr); - + kfree(mr); return ERR_PTR(err); } @@ -1476,8 +1457,11 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, if (xlt_with_umr) { mr = alloc_cacheable_mr(pd, umem, iova, access_flags); } else { + unsigned int page_size = mlx5_umem_find_best_pgsz( + umem, mkc, log_page_size, 0, iova); + mutex_lock(&dev->slow_path_mutex); - mr = reg_create(NULL, pd, umem, iova, access_flags, 0, true); + mr = reg_create(pd, umem, iova, access_flags, page_size, true); mutex_unlock(&dev->slow_path_mutex); } if (IS_ERR(mr)) { @@ -1608,135 +1592,211 @@ int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) return mlx5_ib_post_send_wait(mr->dev, &umrwr); } -static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, - int access_flags, int flags) +/* + * True if the change in access flags can be done via UMR, only some access + * flags can be updated. + */ +static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, + unsigned int current_access_flags, + unsigned int target_access_flags) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_umr_wr umrwr = {}; + unsigned int diffs = current_access_flags ^ target_access_flags; + + if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) + return false; + return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, + target_access_flags); +} + +static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_umr_wr umrwr = { + .wr = { + .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS, + .opcode = MLX5_IB_WR_UMR, + }, + .mkey = mr->mmkey.key, + .pd = pd, + .access_flags = access_flags, + }; int err; - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; + err = mlx5_ib_post_send_wait(dev, &umrwr); + if (err) + return err; - umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.mkey = mr->mmkey.key; + mr->access_flags = access_flags; + mr->mmkey.pd = to_mpd(pd)->pdn; + return 0; +} + +static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, + struct ib_umem *new_umem, + int new_access_flags, u64 iova, + unsigned long *page_size) +{ + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + + /* We only track the allocated sizes of MRs from the cache */ + if (!mr->cache_ent) + return false; + if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) + return false; + + *page_size = + mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!*page_size)) + return false; + return (1ULL << mr->cache_ent->order) >= + ib_umem_num_dma_blocks(new_umem, *page_size); +} + +static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags, int flags, struct ib_umem *new_umem, + u64 iova, unsigned long page_size) +{ + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE; + struct ib_umem *old_umem = mr->umem; + int err; + + /* + * To keep everything simple the MR is revoked before we start to mess + * with it. This ensure the change is atomic relative to any use of the + * MR. + */ + err = mlx5_mr_cache_invalidate(mr); + if (err) + return err; - if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) { - umrwr.pd = pd; - umrwr.access_flags = access_flags; - umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; + if (flags & IB_MR_REREG_PD) { + mr->ibmr.pd = pd; + mr->mmkey.pd = to_mpd(pd)->pdn; + upd_flags |= MLX5_IB_UPD_XLT_PD; + } + if (flags & IB_MR_REREG_ACCESS) { + mr->access_flags = access_flags; + upd_flags |= MLX5_IB_UPD_XLT_ACCESS; } - err = mlx5_ib_post_send_wait(dev, &umrwr); + mr->ibmr.length = new_umem->length; + mr->mmkey.iova = iova; + mr->mmkey.size = new_umem->length; + mr->page_shift = order_base_2(page_size); + mr->umem = new_umem; + err = mlx5_ib_update_mr_pas(mr, upd_flags); + if (err) { + /* + * The MR is revoked at this point so there is no issue to free + * new_umem. + */ + mr->umem = old_umem; + return err; + } - return err; + atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages); + ib_umem_release(old_umem); + atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages); + return 0; } struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, - int new_access_flags, struct ib_pd *new_pd, + u64 length, u64 iova, int new_access_flags, + struct ib_pd *new_pd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); struct mlx5_ib_mr *mr = to_mmr(ib_mr); - struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; - int access_flags = flags & IB_MR_REREG_ACCESS ? - new_access_flags : - mr->access_flags; - int upd_flags = 0; - u64 addr, len; int err; - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", - start, virt_addr, length, access_flags); + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) + return ERR_PTR(-EOPNOTSUPP); - if (!mr->umem) - return ERR_PTR(-EINVAL); + mlx5_ib_dbg( + dev, + "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, iova, length, new_access_flags); - if (is_odp_mr(mr)) + if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) return ERR_PTR(-EOPNOTSUPP); - if (flags & IB_MR_REREG_TRANS) { - addr = virt_addr; - len = length; - } else { - addr = mr->umem->address; - len = mr->umem->length; - } + if (!(flags & IB_MR_REREG_ACCESS)) + new_access_flags = mr->access_flags; + if (!(flags & IB_MR_REREG_PD)) + new_pd = ib_mr->pd; - if (flags != IB_MR_REREG_PD) { - /* - * Replace umem. This needs to be done whether or not UMR is - * used. - */ - flags |= IB_MR_REREG_TRANS; - atomic_sub(ib_umem_num_pages(mr->umem), - &dev->mdev->priv.reg_pages); - ib_umem_release(mr->umem); - mr->umem = ib_umem_get(&dev->ib_dev, addr, len, access_flags); - if (IS_ERR(mr->umem)) { - err = PTR_ERR(mr->umem); - mr->umem = NULL; - goto err; + if (!(flags & IB_MR_REREG_TRANS)) { + struct ib_umem *umem; + + /* Fast path for PD/access change */ + if (can_use_umr_rereg_access(dev, mr->access_flags, + new_access_flags)) { + err = umr_rereg_pd_access(mr, new_pd, new_access_flags); + if (err) + return ERR_PTR(err); + return NULL; } - atomic_add(ib_umem_num_pages(mr->umem), - &dev->mdev->priv.reg_pages); - } + /* DM or ODP MR's don't have a umem so we can't re-use it */ + if (!mr->umem || is_odp_mr(mr)) + goto recreate; - if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags, - access_flags) || - !mlx5_ib_can_load_pas_with_umr(dev, len) || - (flags & IB_MR_REREG_TRANS && - !mlx5_ib_pas_fits_in_mr(mr, addr, len))) { /* - * UMR can't be used - MKey needs to be replaced. + * Only one active MR can refer to a umem at one time, revoke + * the old MR before assigning the umem to the new one. */ - if (mr->cache_ent) - detach_mr_from_cache(mr); - err = destroy_mkey(dev, mr); + err = mlx5_mr_cache_invalidate(mr); if (err) - goto err; + return ERR_PTR(err); + umem = mr->umem; + mr->umem = NULL; + atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); - mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, 0, true); - if (IS_ERR(mr)) { - err = PTR_ERR(mr); - mr = to_mmr(ib_mr); - goto err; - } - } else { - /* - * Send a UMR WQE - */ - mr->ibmr.pd = pd; - mr->access_flags = access_flags; - mr->mmkey.iova = addr; - mr->mmkey.size = len; - mr->mmkey.pd = to_mpd(pd)->pdn; + return create_real_mr(new_pd, umem, mr->mmkey.iova, + new_access_flags); + } - if (flags & IB_MR_REREG_TRANS) { - upd_flags = MLX5_IB_UPD_XLT_ADDR; - if (flags & IB_MR_REREG_PD) - upd_flags |= MLX5_IB_UPD_XLT_PD; - if (flags & IB_MR_REREG_ACCESS) - upd_flags |= MLX5_IB_UPD_XLT_ACCESS; - err = mlx5_ib_update_mr_pas(mr, upd_flags); - } else { - err = rereg_umr(pd, mr, access_flags, flags); + /* + * DM doesn't have a PAS list so we can't re-use it, odp does but the + * logic around releasing the umem is different + */ + if (!mr->umem || is_odp_mr(mr)) + goto recreate; + + if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && + can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { + struct ib_umem *new_umem; + unsigned long page_size; + + new_umem = ib_umem_get(&dev->ib_dev, start, length, + new_access_flags); + if (IS_ERR(new_umem)) + return ERR_CAST(new_umem); + + /* Fast path for PAS change */ + if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova, + &page_size)) { + err = umr_rereg_pas(mr, new_pd, new_access_flags, flags, + new_umem, iova, page_size); + if (err) { + ib_umem_release(new_umem); + return ERR_PTR(err); + } + return NULL; } - - if (err) - goto err; + return create_real_mr(new_pd, new_umem, iova, new_access_flags); } - set_mr_fields(dev, mr, len, access_flags); - - return NULL; - -err: - ib_umem_release(mr->umem); - mr->umem = NULL; - - clean_mr(dev, mr); - return ERR_PTR(err); + /* + * Everything else has no state we can preserve, just create a new MR + * from scratch + */ +recreate: + return mlx5_ib_reg_user_mr(new_pd, start, length, iova, + new_access_flags, udata); } static int From c277f98b3e3e2cc3e28836bf4125a95dc0e1dd54 Mon Sep 17 00:00:00 2001 From: Yejune Deng Date: Mon, 30 Nov 2020 16:52:56 +0800 Subject: [PATCH 121/147] RDMA/i40iw: Replace atomic_add_return(1, ..) atomic_inc_return() is a little neater Link: https://lore.kernel.org/r/1606726376-7675-1-git-send-email-yejune.deng@gmail.com Signed-off-by: Yejune Deng Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 3053c345a5a343..9acc0ecc9a43e4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2426,7 +2426,7 @@ static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node, } break; case I40IW_CM_STATE_MPAREQ_RCVD: - atomic_add_return(1, &cm_node->passive_state); + atomic_inc(&cm_node->passive_state); break; case I40IW_CM_STATE_ESTABLISHED: case I40IW_CM_STATE_SYN_RCVD: @@ -3020,7 +3020,7 @@ static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 i40iw_cleanup_retrans_entry(cm_node); if (!loopback) { - passive_state = atomic_add_return(1, &cm_node->passive_state); + passive_state = atomic_inc_return(&cm_node->passive_state); if (passive_state == I40IW_SEND_RESET_EVENT) { cm_node->state = I40IW_CM_STATE_CLOSED; i40iw_rem_ref_cm_node(cm_node); @@ -3678,7 +3678,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -EINVAL; } - passive_state = atomic_add_return(1, &cm_node->passive_state); + passive_state = atomic_inc_return(&cm_node->passive_state); if (passive_state == I40IW_SEND_RESET_EVENT) { i40iw_rem_ref_cm_node(cm_node); return -ECONNRESET; From c63e1c4dfc33d1bdae395ee8fbcbfad4830b12c0 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 30 Nov 2020 05:13:06 -0800 Subject: [PATCH 122/147] RDMA/bnxt_re: Fix max_qp_wrs reported While creating qps, the driver adds one extra entry to the sq size passed by the ULPs in order to avoid queue full condition. When ULPs creates QPs with max_qp_wr reported, driver creates QP with 1 more than the max_wqes supported by HW. Create QP fails in this case. To avoid this error, reduce 1 entry in max_qp_wqes and report it to the stack. Link: https://lore.kernel.org/r/1606741986-16477-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 64d44f51db4b6c..6316179583a6f0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -118,7 +118,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, * 128 WQEs needs to be reserved for the HW (8916). Prevent * reporting the max number */ - attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS; + attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ? 6 : sb->max_sge; attr->max_cq = le32_to_cpu(sb->max_cq); From 2988ca08ba65848f2705023b054fd8bfc0109c38 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 1 Dec 2020 13:08:55 +0100 Subject: [PATCH 123/147] IB: Fix kernel-doc markups Some functions have different names between their prototypes and the kernel-doc markup. Others need to be fixed, as kernel-doc markups should use this format: identifier - description Link: https://lore.kernel.org/r/78b98c41a5a0f4c0106433d305b143028a4168b0.1606823973.git.mchehab+huawei@kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 5 +++-- drivers/infiniband/core/cq.c | 4 ++-- drivers/infiniband/core/iwpm_util.h | 2 +- drivers/infiniband/core/sa_query.c | 3 ++- drivers/infiniband/core/verbs.c | 4 ++-- drivers/infiniband/sw/rdmavt/ah.c | 2 +- drivers/infiniband/sw/rdmavt/mcast.c | 12 ++++++------ drivers/infiniband/sw/rdmavt/qp.c | 8 ++++---- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h | 2 +- .../infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c | 2 +- drivers/infiniband/ulp/srpt/ib_srpt.h | 2 +- include/rdma/ib_verbs.h | 11 +++++++++++ 13 files changed, 36 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 167e436ae11ded..46c48690f909b9 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1251,7 +1251,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) EXPORT_SYMBOL(ib_cm_listen); /** - * Create a new listening ib_cm_id and listen on the given service ID. + * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on + * the given service ID. * * If there's an existing ID listening on that same device and service ID, * return it. @@ -1764,7 +1765,7 @@ static u16 cm_get_bth_pkey(struct cm_work *work) } /** - * Convert OPA SGID to IB SGID + * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID * ULPs (such as IPoIB) do not understand OPA GIDs and will * reject them as the local_gid will not match the sgid. Therefore, * change the pathrec's SGID to an IB SGID. diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 12ebacf529587d..d4248bbe74da8a 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -123,7 +123,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs, } /** - * ib_process_direct_cq - process a CQ in caller context + * ib_process_cq_direct - process a CQ in caller context * @cq: CQ to process * @budget: number of CQEs to poll for * @@ -197,7 +197,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) } /** - * __ib_alloc_cq allocate a completion queue + * __ib_alloc_cq - allocate a completion queue * @dev: device to allocate the CQ for * @private: driver private data, accessible from cq->cq_context * @nr_cqe: number of CQEs to allocate diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 1bf87d9fd0bdd8..eeb8e6010907da 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -141,7 +141,7 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request); int iwpm_get_nlmsg_seq(void); /** - * iwpm_add_reminfo - Add remote address info of the connecting peer + * iwpm_add_remote_info - Add remote address info of the connecting peer * to the remote info hash table * @reminfo: The remote info to be added */ diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8c930bf1df894b..89a831fa1885b8 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1435,7 +1435,8 @@ enum opa_pr_supported { }; /** - * Check if current PR query can be an OPA query. + * opa_pr_query_possible - Check if current PR query can be an OPA query. + * * Retuns PR_NOT_SUPPORTED if a path record query is not * possible, PR_OPA_SUPPORTED if an OPA path record query * is possible and PR_IB_SUPPORTED if an IB path record diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5d4c7c263665cb..54e053b5306043 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -244,7 +244,7 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); /* Protection domains */ /** - * ib_alloc_pd - Allocates an unused protection domain. + * __ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. * @flags: protection domain flags * @caller: caller's build-time module name @@ -1666,7 +1666,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp) qp->qp_type == IB_QPT_XRC_TGT); } -/** +/* * IB core internal function to perform QP attributes modification. */ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index f9754dcd250b77..a3e5b368c5e7b3 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -126,7 +126,7 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, } /** - * rvt_destory_ah - Destory an address handle + * rvt_destroy_ah - Destroy an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * Return: 0 on success diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index dd11c6fcd06045..5233a63d99a653 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -54,7 +54,7 @@ #include "mcast.h" /** - * rvt_driver_mcast - init resources for multicast + * rvt_driver_mcast_init - init resources for multicast * @rdi: rvt dev struct * * This is per device that registers with rdmavt @@ -69,7 +69,7 @@ void rvt_driver_mcast_init(struct rvt_dev_info *rdi) } /** - * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct + * rvt_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct * @qp: the QP to link */ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) @@ -98,7 +98,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) } /** - * mcast_alloc - allocate the multicast GID structure + * rvt_mcast_alloc - allocate the multicast GID structure * @mgid: the multicast GID * @lid: the muilticast LID (host order) * @@ -181,7 +181,7 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, EXPORT_SYMBOL(rvt_mcast_find); /** - * mcast_add - insert mcast GID into table and attach QP struct + * rvt_mcast_add - insert mcast GID into table and attach QP struct * @mcast: the mcast GID table * @mqp: the QP to attach * @@ -426,8 +426,8 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } /** - *rvt_mast_tree_empty - determine if any qps are attached to any mcast group - *@rdi: rvt dev struct + * rvt_mcast_tree_empty - determine if any qps are attached to any mcast group + * @rdi: rvt dev struct * * Return: in use count */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index e9db6bf106186f..22fa9bde541993 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1827,7 +1827,7 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } /** - * rvt_post_receive - post a receive on a QP + * rvt_post_recv - post a receive on a QP * @ibqp: the QP to post the receive on * @wr: the WR to post * @bad_wr: the first bad WR is put here @@ -2249,7 +2249,7 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, } /** - * rvt_post_srq_receive - post a receive on a shared receive queue + * rvt_post_srq_recv - post a receive on a shared receive queue * @ibsrq: the SRQ to post the receive on * @wr: the list of work requests to post * @bad_wr: A pointer to the first WR to cause a problem is put here @@ -2501,7 +2501,7 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) EXPORT_SYMBOL(rvt_get_rwqe); /** - * qp_comm_est - handle trap with QP established + * rvt_comm_est - handle trap with QP established * @qp: the QP */ void rvt_comm_est(struct rvt_qp *qp) @@ -2947,7 +2947,7 @@ static enum ib_wc_status loopback_qp_drop(struct rvt_ibport *rvp, } /** - * ruc_loopback - handle UC and RC loopback requests + * rvt_ruc_loopback - handle UC and RC loopback requests * @sqp: the sending QP * * This is called from rvt_do_send() to forward a WQE addressed to the same HFI diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 3690e28cc7ea2a..1a459b35000cc1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -739,7 +739,7 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, } /** - * iscsi_iser_set_param() - set class connection parameter + * iscsi_iser_conn_get_stats() - get iscsi connection statistics * @cls_conn: iscsi class connection * @stats: iscsi stats to output * diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h index f64519872297e4..012fc27c5c93b2 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -437,7 +437,7 @@ struct opa_veswport_trap { } __packed; /** - * struct opa_vnic_iface_macs_entry - single entry in the mac list + * struct opa_vnic_iface_mac_entry - single entry in the mac list * @mac_addr: MAC address */ struct opa_vnic_iface_mac_entry { diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c index 868b5aec1537b7..292c037aa239dc 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c @@ -74,7 +74,7 @@ void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event) } /** - * opa_vnic_get_error_counters - get summary counters + * opa_vnic_get_summary_counters - get summary counters * @adapter: vnic port adapter * @cntrs: pointer to destination summary counters structure * diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index bdeb010efee680..76e66f630c17ab 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -347,7 +347,7 @@ struct srpt_nexus { }; /** - * struct srpt_port_attib - attributes for SRPT port + * struct srpt_port_attrib - attributes for SRPT port * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. * @srp_sq_size: Shared receive queue (SRQ) size. diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3be1d1194a17f7..06a565203cfcd8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3394,6 +3394,17 @@ enum ib_pd_flags { struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); +/** + * ib_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + * + * Every PD has a local_dma_lkey which can be used as the lkey value for local + * memory operations. + */ #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) From 53ef4999f07d9c75cdc8effb0cc8c581dc39b1a1 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 2 Dec 2020 09:29:20 +0800 Subject: [PATCH 124/147] RDMA/hns: Move capability flags of QP and CQ to hns-abi.h These flags will be returned to the userspace through ABI, so they should be defined in hns-abi.h. Furthermore, there is no need to include hns-abi.h in every source files, it just needs to be included in the common header file. Link: https://lore.kernel.org/r/1606872560-17823-1-git-send-email-liweihang@huawei.com Reported-by: kernel test robot Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 1 - drivers/infiniband/hw/hns/hns_roce_device.h | 11 +---------- drivers/infiniband/hw/hns/hns_roce_main.c | 1 - drivers/infiniband/hw/hns/hns_roce_pd.c | 1 - drivers/infiniband/hw/hns/hns_roce_qp.c | 1 - drivers/infiniband/hw/hns/hns_roce_srq.c | 1 - include/uapi/rdma/hns-abi.h | 10 ++++++++++ 7 files changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 68f355fba425b5..5e6d68830fa5a8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -36,7 +36,6 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" -#include #include "hns_roce_common.h" static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 70ae37bad77ed2..60b8349cd2f87b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_DEVICE_H #include +#include #define DRV_NAME "hns_roce" @@ -131,16 +132,6 @@ enum { SERV_TYPE_UD, }; -enum hns_roce_qp_caps { - HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), - HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), - HNS_ROCE_QP_CAP_OWNER_DB = BIT(2), -}; - -enum hns_roce_cq_flags { - HNS_ROCE_CQ_FLAG_RECORD_DB = BIT(0), -}; - enum hns_roce_qp_state { HNS_ROCE_QP_STATE_RST, HNS_ROCE_QP_STATE_INIT, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f01590d8c3cfe6..e8aa8075ffcd32 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -40,7 +40,6 @@ #include #include "hns_roce_common.h" #include "hns_roce_device.h" -#include #include "hns_roce_hem.h" /** diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 98f69496adb495..45ec91db15539a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -32,7 +32,6 @@ #include #include -#include #include "hns_roce_device.h" static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 34aa086060d3f0..121d3b4c2edbf8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -39,7 +39,6 @@ #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" -#include static void flush_work_handle(struct work_struct *work) { diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 27646b9e35dfd2..36c6bcb8526987 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -4,7 +4,6 @@ */ #include -#include #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 9ec85f76e9ac04..90b739d05adfe7 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -43,6 +43,10 @@ struct hns_roce_ib_create_cq { __u32 reserved; }; +enum hns_roce_cq_cap_flags { + HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0, +}; + struct hns_roce_ib_create_cq_resp { __aligned_u64 cqn; /* Only 32 bits used, 64 for compat */ __aligned_u64 cap_flags; @@ -69,6 +73,12 @@ struct hns_roce_ib_create_qp { __aligned_u64 sdb_addr; }; +enum hns_roce_qp_cap_flags { + HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, + HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, + HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, +}; + struct hns_roce_ib_create_qp_resp { __aligned_u64 cap_flags; }; From ca991a7d14d4835b302bcd182fdbf54470f45520 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 3 Dec 2020 21:08:07 +0200 Subject: [PATCH 125/147] RDMA/mlx5: Assign dev to DM MR Currently, DM MR registration flow doesn't set the mlx5_ib_dev pointer and can cause a NULL pointer dereference if userspace dumps the MR via rdma tool. Assign the IB device together with the other fields and remove the redundant reference of mlx5_ib_dev from mlx5_ib_mr. Cc: stable@vger.kernel.org Fixes: 6c29f57ea475 ("IB/mlx5: Device memory mr registration support") Link: https://lore.kernel.org/r/20201203190807.127189-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +++- drivers/infiniband/hw/mlx5/mr.c | 17 ++++++------ drivers/infiniband/hw/mlx5/odp.c | 40 ++++++++++++++------------- drivers/infiniband/hw/mlx5/restrack.c | 2 +- 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index fac495e7834e17..c33d6fd64fb67e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -669,7 +669,6 @@ struct mlx5_ib_mr { struct mlx5_shared_mr_info *smr_info; struct list_head list; struct mlx5_cache_ent *cache_ent; - struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; void *descs_alloc; @@ -1107,6 +1106,11 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } +static inline struct mlx5_ib_dev *mr_to_mdev(struct mlx5_ib_mr *mr) +{ + return to_mdev(mr->ibmr.device); +} + static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata) { struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 281032d2827f3b..6fa869c30e3f75 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -137,8 +137,8 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); - struct mlx5_ib_dev *dev = mr->dev; struct mlx5_cache_ent *ent = mr->cache_ent; + struct mlx5_ib_dev *dev = ent->dev; unsigned long flags; if (status) { @@ -176,7 +176,6 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) if (!mr) return NULL; mr->cache_ent = ent; - mr->dev = ent->dev; set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); @@ -931,6 +930,7 @@ static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->ibmr.length = length; + mr->ibmr.device = &dev->ib_dev; mr->access_flags = access_flags; } @@ -1062,7 +1062,7 @@ static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, size_t nents, size_t ent_size, unsigned int flags) { - struct mlx5_ib_dev *dev = mr->dev; + struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct device *ddev = &dev->mdev->pdev->dev; dma_addr_t dma; void *xlt; @@ -1124,7 +1124,7 @@ static unsigned int xlt_wr_final_send_flags(unsigned int flags) int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags) { - struct mlx5_ib_dev *dev = mr->dev; + struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct device *ddev = &dev->mdev->pdev->dev; void *xlt; struct mlx5_umr_wr wr; @@ -1203,7 +1203,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, */ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) { - struct mlx5_ib_dev *dev = mr->dev; + struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct device *ddev = &dev->mdev->pdev->dev; struct ib_block_iter biter; struct mlx5_mtt *cur_mtt; @@ -1335,7 +1335,6 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, } mr->mmkey.type = MLX5_MKEY_MR; mr->desc_size = sizeof(struct mlx5_mtt); - mr->dev = dev; mr->umem = umem; set_mr_fields(dev, mr, umem->length, access_flags); kvfree(in); @@ -1579,17 +1578,17 @@ int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) { struct mlx5_umr_wr umrwr = {}; - if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = mr->dev->umrc.pd; + umrwr.pd = mr_to_mdev(mr)->umrc.pd; umrwr.mkey = mr->mmkey.key; umrwr.ignore_free_state = 1; - return mlx5_ib_post_send_wait(mr->dev, &umrwr); + return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr); } /* diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index f4a28a01218741..aa2413b50adc99 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -102,7 +102,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, if (flags & MLX5_IB_UPD_XLT_ZAP) { for (; pklm != end; pklm++, idx++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); pklm->va = 0; } return; @@ -129,7 +129,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * locking around the xarray. */ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); - lockdep_assert_held(&imr->dev->odp_srcu); + lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu); for (; pklm != end; pklm++, idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -139,7 +139,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, pklm->key = cpu_to_be32(mtt->ibmr.lkey); pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); } else { - pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); pklm->va = 0; } } @@ -199,7 +199,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) mutex_unlock(&odp->umem_mutex); if (!mr->cache_ent) { - mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); + mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey); WARN_ON(mr->descs); } } @@ -222,19 +222,19 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) WARN_ON(atomic_read(&mr->num_deferred_work)); if (need_imr_xlt) { - srcu_key = srcu_read_lock(&mr->dev->odp_srcu); + srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); + srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key); } dma_fence_odp_mr(mr); mr->parent = NULL; - mlx5_mr_cache_free(mr->dev, mr); + mlx5_mr_cache_free(mr_to_mdev(mr), mr); ib_umem_odp_release(odp); if (atomic_dec_and_test(&imr->num_deferred_work)) wake_up(&imr->q_deferred_work); @@ -274,7 +274,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) goto out_unlock; atomic_inc(&imr->num_deferred_work); - call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, + call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu, free_implicit_child_mr_rcu); out_unlock: @@ -476,12 +476,13 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY, - imr->access_flags); + ret = mr = mlx5_mr_cache_alloc( + mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags); if (IS_ERR(mr)) goto out_umem; mr->ibmr.pd = imr->ibmr.pd; + mr->ibmr.device = &mr_to_mdev(imr)->ib_dev; mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; @@ -517,11 +518,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_mr; } - mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); + mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; out_mr: - mlx5_mr_cache_free(imr->dev, mr); + mlx5_mr_cache_free(mr_to_mdev(imr), mr); out_umem: ib_umem_odp_release(odp); return ret; @@ -555,6 +556,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->umem = &umem_odp->umem; imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; + imr->ibmr.device = &dev->ib_dev; imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; atomic_set(&imr->num_deferred_work, 0); @@ -588,7 +590,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); - struct mlx5_ib_dev *dev = imr->dev; + struct mlx5_ib_dev *dev = mr_to_mdev(imr); struct list_head destroy_list; struct mlx5_ib_mr *mtt; struct mlx5_ib_mr *tmp; @@ -658,10 +660,10 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) { /* Prevent new page faults and prefetch requests from succeeding */ - xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&mr->dev->odp_srcu); + synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); @@ -705,7 +707,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, if (ret < 0) { if (ret != -EAGAIN) - mlx5_ib_err(mr->dev, + mlx5_ib_err(mr_to_mdev(mr), "Failed to update mkey page tables\n"); goto out; } @@ -795,7 +797,7 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); if (err) { - mlx5_ib_err(imr->dev, "Failed to update PAS\n"); + mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n"); return err; } return ret; @@ -815,7 +817,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - lockdep_assert_held(&mr->dev->odp_srcu); + lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; @@ -1783,7 +1785,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ WARN_ON(!work->num_sge); - dev = work->frags[0].mr->dev; + dev = mr_to_mdev(work->frags[0].mr); /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < work->num_sge; ++i) { diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 887270dd3ce207..4ac429e72004c3 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -116,7 +116,7 @@ static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); - return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY, + return fill_res_raw(msg, mr_to_mdev(mr), MLX5_SGMT_TYPE_PRM_QUERY_MKEY, mlx5_mkey_to_idx(mr->mmkey.key)); } From 0583531bb9ef30a5c4ce00b4ee10b6707768eead Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Dec 2020 18:42:56 +0100 Subject: [PATCH 126/147] RDMA/iser: Remove in_interrupt() usage iser_initialize_task_headers() uses in_interrupt() to find out if it is safe to acquire a mutex. in_interrupt() is deprecated as it is ill defined and does not provide what it suggests. Aside of that it covers only parts of the contexts in which a mutex may not be acquired. The following callchains exist: iscsi_queuecommand() *locks* iscsi_session::frwd_lock -> iscsi_prep_scsi_cmd_pdu() -> session->tt->init_task() (iscsi_iser_task_init()) -> iser_initialize_task_headers() -> iscsi_iser_task_xmit() (iscsi_transport::xmit_task) -> iscsi_iser_task_xmit_unsol_data() -> iser_send_data_out() -> iser_initialize_task_headers() iscsi_data_xmit() *locks* iscsi_session::frwd_lock -> iscsi_prep_mgmt_task() -> session->tt->init_task() (iscsi_iser_task_init()) -> iser_initialize_task_headers() -> iscsi_prep_scsi_cmd_pdu() -> session->tt->init_task() (iscsi_iser_task_init()) -> iser_initialize_task_headers() __iscsi_conn_send_pdu() caller has iscsi_session::frwd_lock -> iscsi_prep_mgmt_task() -> session->tt->init_task() (iscsi_iser_task_init()) -> iser_initialize_task_headers() -> session->tt->xmit_task() ( The only callchain that is close to be invoked in preemptible context: iscsi_xmitworker() worker -> iscsi_data_xmit() -> iscsi_xmit_task() -> conn->session->tt->xmit_task() (iscsi_iser_task_xmit() In iscsi_iser_task_xmit() there is this check: if (!task->sc) return iscsi_iser_mtask_xmit(conn, task); so it does end up in iser_initialize_task_headers() and iser_initialize_task_headers() relies on iscsi_task::sc == NULL. Remove conditional locking of iser_conn::state_mutex because there is no call chain to do so. Remove the goto label and return early now that there is no clean up needed. Link: https://lore.kernel.org/r/20201204174256.62xfcvudndt7oufl@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Cc: Sagi Grimberg Cc: Max Gurtovoy Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 1a459b35000cc1..4792b9bf400ffd 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -187,23 +187,14 @@ iser_initialize_task_headers(struct iscsi_task *task, struct iser_device *device = iser_conn->ib_conn.device; struct iscsi_iser_task *iser_task = task->dd_data; u64 dma_addr; - const bool mgmt_task = !task->sc && !in_interrupt(); - int ret = 0; - if (unlikely(mgmt_task)) - mutex_lock(&iser_conn->state_mutex); - - if (unlikely(iser_conn->state != ISER_CONN_UP)) { - ret = -ENODEV; - goto out; - } + if (unlikely(iser_conn->state != ISER_CONN_UP)) + return -ENODEV; dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, ISER_HEADERS_LEN, DMA_TO_DEVICE); - if (ib_dma_mapping_error(device->ib_device, dma_addr)) { - ret = -ENOMEM; - goto out; - } + if (ib_dma_mapping_error(device->ib_device, dma_addr)) + return -ENOMEM; tx_desc->inv_wr.next = NULL; tx_desc->reg_wr.wr.next = NULL; @@ -214,11 +205,8 @@ iser_initialize_task_headers(struct iscsi_task *task, tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey; iser_task->iser_conn = iser_conn; -out: - if (unlikely(mgmt_task)) - mutex_unlock(&iser_conn->state_mutex); - return ret; + return 0; } /** From 7f1d2dfa307e760af13677895b4e874e9c251a5b Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 31 Oct 2020 06:46:38 -0700 Subject: [PATCH 127/147] RDMA/mlx5: Remove unneeded semicolon A semicolon is not needed after a switch statement. Link: https://lore.kernel.org/r/20201031134638.2135060-1-trix@redhat.com Signed-off-by: Tom Rix Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9d32f31732316e..0cb7cc642d87d6 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3078,7 +3078,7 @@ static int ib_to_mlx5_rate_map(u8 rate) return 5; default: return rate + MLX5_STAT_RATE_OFFSET; - }; + } return 0; } From e7f870f5fda75fcaf9de09316e6e456f5f035516 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 3 Dec 2020 21:06:59 +0200 Subject: [PATCH 128/147] MAINTAINERS: SOFT-ROCE: Change Zhu Yanjun's email address Change Zhu's working email to his private one. Link: https://lore.kernel.org/r/20201203190659.126932-1-leon@kernel.org Signed-off-by: Zhu Yanjun Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index d9fb83d67055c5..22831c2991ab54 100644 --- a/.mailmap +++ b/.mailmap @@ -341,3 +341,4 @@ Wolfram Sang Wolfram Sang Yakir Yang Yusuke Goda +Zhu Yanjun diff --git a/MAINTAINERS b/MAINTAINERS index 2daa6ee673f7f2..73e53207c92007 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16195,7 +16195,7 @@ F: drivers/infiniband/sw/siw/ F: include/uapi/rdma/siw-abi.h SOFT-ROCE DRIVER (rxe) -M: Zhu Yanjun +M: Zhu Yanjun L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/sw/rxe/ From d1dec0cae5539d678c1e265ba4fcf783c8ec4733 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 7 Dec 2020 18:32:55 +0100 Subject: [PATCH 129/147] RDMA/core: Update kernel documentation for ib_create_named_qp() Commit 66f57b871efc ("RDMA/restrack: Support all QP types") extends ib_create_qp() to a named ib_create_named_qp(), which takes the caller's name as argument, but it did not add the new argument description to the function's kerneldoc. make htmldocs warns: ./drivers/infiniband/core/verbs.c:1206: warning: Function parameter or member 'caller' not described in 'ib_create_named_qp' Add a description for this new argument based on the description of the same argument in other related functions. Fixes: 66f57b871efc ("RDMA/restrack: Support all QP types") Link: https://lore.kernel.org/r/20201207173255.13355-1-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 54e053b5306043..9137a25bb52105 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1197,6 +1197,7 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, * @qp_init_attr: A list of initial attributes required to create the * QP. If QP creation succeeds, then the attributes are updated to * the actual capabilities of the created QP. + * @caller: caller's build-time module name * * NOTE: for user qp use ib_create_qp_user with valid udata! */ From 286e1d3f9ba89c7db5eecd30f47f9e333843ea13 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 8 Dec 2020 09:35:43 +0200 Subject: [PATCH 130/147] RDMA/core: Clean up cq pool mechanism MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CQ pool mechanism had two problems: 1. The CQ pool lists were uninitialized in the device registration error flow. As a result, all the list pointers remained NULL. This caused the kernel to crash (in procedure ib_cq_pool_destroy) when that error flow was taken (and unregister called). The stack trace snippet: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0×0000) ? not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI . . . RIP: 0010:ib_cq_pool_destroy+0x1b/0×70 [ib_core] . . . Call Trace: disable_device+0x9f/0×130 [ib_core] __ib_unregister_device+0x35/0×90 [ib_core] ib_register_device+0x529/0×610 [ib_core] __mlx5_ib_add+0x3a/0×70 [mlx5_ib] mlx5_add_device+0x87/0×1c0 [mlx5_core] mlx5_register_interface+0x74/0xc0 [mlx5_core] do_one_initcall+0x4b/0×1f4 do_init_module+0x5a/0×223 load_module+0x1938/0×1d40 2. At device unregister, when cleaning up the cq pool, the cq's in the pool lists were freed, but the cq entries were left in the list. The fix for the first issue is to initialize the cq pool lists when the ib_device structure is allocated for a new device (in procedure _ib_alloc_device). The fix for the second problem is to delete cq entries from the pool lists when cleaning up the cq pool. In addition, procedure ib_cq_pool_destroy() is renamed to the more appropriate name ib_cq_pool_cleanup(). Fixes: 4aa1615268a8 ("RDMA/core: Fix ordering of CQ pool destruction") Link: https://lore.kernel.org/r/20201208073545.9723-2-leon@kernel.org Suggested-by: Jason Gunthorpe Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 3 +-- drivers/infiniband/core/cq.c | 12 ++---------- drivers/infiniband/core/device.c | 9 ++++++--- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index baa86c86efad5d..315f7a297eee35 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -402,7 +402,6 @@ void rdma_umap_priv_init(struct rdma_umap_priv *priv, struct vm_area_struct *vma, struct rdma_user_mmap_entry *entry); -void ib_cq_pool_init(struct ib_device *dev); -void ib_cq_pool_destroy(struct ib_device *dev); +void ib_cq_pool_cleanup(struct ib_device *dev); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index d4248bbe74da8a..433b426729d4ce 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -349,16 +349,7 @@ void ib_free_cq(struct ib_cq *cq) } EXPORT_SYMBOL(ib_free_cq); -void ib_cq_pool_init(struct ib_device *dev) -{ - unsigned int i; - - spin_lock_init(&dev->cq_pools_lock); - for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) - INIT_LIST_HEAD(&dev->cq_pools[i]); -} - -void ib_cq_pool_destroy(struct ib_device *dev) +void ib_cq_pool_cleanup(struct ib_device *dev) { struct ib_cq *cq, *n; unsigned int i; @@ -367,6 +358,7 @@ void ib_cq_pool_destroy(struct ib_device *dev) list_for_each_entry_safe(cq, n, &dev->cq_pools[i], pool_entry) { WARN_ON(cq->cqe_used); + list_del(&cq->pool_entry); cq->shared = false; ib_free_cq(cq); } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3ab1edea6acbe9..11485b8748a2ac 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -570,6 +570,7 @@ static void rdma_init_coredev(struct ib_core_device *coredev, struct ib_device *_ib_alloc_device(size_t size) { struct ib_device *device; + unsigned int i; if (WARN_ON(size < sizeof(struct ib_device))) return NULL; @@ -601,6 +602,10 @@ struct ib_device *_ib_alloc_device(size_t size) init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); + spin_lock_init(&device->cq_pools_lock); + for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++) + INIT_LIST_HEAD(&device->cq_pools[i]); + device->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | @@ -1262,7 +1267,7 @@ static void disable_device(struct ib_device *device) remove_client_context(device, cid); } - ib_cq_pool_destroy(device); + ib_cq_pool_cleanup(device); /* Pairs with refcount_set in enable_device */ ib_device_put(device); @@ -1307,8 +1312,6 @@ static int enable_device_and_get(struct ib_device *device) goto out; } - ib_cq_pool_init(device); - down_read(&clients_rwsem); xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { ret = add_client_context(device, client); From 779e0bf47632c609c59f527f9711ecd3214dccb0 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 8 Dec 2020 09:35:44 +0200 Subject: [PATCH 131/147] RDMA/core: Do not indicate device ready when device enablement fails In procedure ib_register_device, procedure kobject_uevent is called (advertising that the device is ready for userspace usage) even when device_enable_and_get() returned an error. As a result, various RDMA modules attempted to register for the device even while the device driver was preparing to unregister the device. Fix this by advertising the device availability only after enabling the device succeeds. Fixes: e7a5b4aafd82 ("RDMA/device: Don't fire uevent before device is fully initialized") Link: https://lore.kernel.org/r/20201208073545.9723-3-leon@kernel.org Suggested-by: Leon Romanovsky Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 11485b8748a2ac..e96f979e6d52df 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1397,9 +1397,6 @@ int ib_register_device(struct ib_device *device, const char *name, } ret = enable_device_and_get(device); - dev_set_uevent_suppress(&device->dev, false); - /* Mark for userspace that device is ready */ - kobject_uevent(&device->dev.kobj, KOBJ_ADD); if (ret) { void (*dealloc_fn)(struct ib_device *); @@ -1419,8 +1416,12 @@ int ib_register_device(struct ib_device *device, const char *name, ib_device_put(device); __ib_unregister_device(device); device->ops.dealloc_driver = dealloc_fn; + dev_set_uevent_suppress(&device->dev, false); return ret; } + dev_set_uevent_suppress(&device->dev, false); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); ib_device_put(device); return 0; From e0da68994d16b46384cce7b86eb645f1ef7c51ef Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Tue, 8 Dec 2020 09:35:45 +0200 Subject: [PATCH 132/147] RDMA/uverbs: Fix incorrect variable type Fix incorrect type of max_entries in UVERBS_METHOD_QUERY_GID_TABLE - max_entries is of type size_t although it can take negative values. The following static check revealed it: drivers/infiniband/core/uverbs_std_types_device.c:338 ib_uverbs_handler_UVERBS_METHOD_QUERY_GID_TABLE() warn: 'max_entries' unsigned <= 0 Fixes: 9f85cbe50aa0 ("RDMA/uverbs: Expose the new GID query API to user space") Link: https://lore.kernel.org/r/20201208073545.9723-4-leon@kernel.org Reported-by: Dan Carpenter Signed-off-by: Avihai Horon Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_device.c | 14 +++++--------- include/rdma/uverbs_ioctl.h | 10 ++++++++++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 302f898c5833fc..9ec6971056fa85 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -317,8 +317,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( struct ib_device *ib_dev; size_t user_entry_size; ssize_t num_entries; - size_t max_entries; - size_t num_bytes; + int max_entries; u32 flags; int ret; @@ -336,19 +335,16 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, user_entry_size); if (max_entries <= 0) - return -EINVAL; + return max_entries ?: -EINVAL; ucontext = ib_uverbs_get_ucontext(attrs); if (IS_ERR(ucontext)) return PTR_ERR(ucontext); ib_dev = ucontext->device; - if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes)) - return -EINVAL; - - entries = uverbs_zalloc(attrs, num_bytes); - if (!entries) - return -ENOMEM; + entries = uverbs_kcalloc(attrs, max_entries, sizeof(*entries)); + if (IS_ERR(entries)) + return PTR_ERR(entries); num_entries = rdma_query_gid_table(ib_dev, entries, max_entries); if (num_entries < 0) diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index bf167ef6c68891..39ef204753ec16 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -865,6 +865,16 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, { return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); } + +static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle, + size_t n, size_t size) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return ERR_PTR(-EOVERFLOW); + return uverbs_zalloc(bundle, bytes); +} int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val); From 6f320f6990ee2dd13df89707f1a219ecfe2960ad Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 8 Dec 2020 21:39:28 +0200 Subject: [PATCH 133/147] RDMA/mlx4: Remove bogus dev_base_lock usage It is not clear what this lock protects. If the authors wanted to ensure that "dev" does not disappear, that is impossible, given the following code path: mlx4_ib_netdev_event (under RTNL mutex) -> mlx4_ib_scan_netdevs -> mlx4_ib_update_qps Also, the dev_base_lock does not protect dev->dev_addr either. So it serves no purpose here. Remove it. Link: https://lore.kernel.org/r/20201208193928.1500893-1-vladimir.oltean@nxp.com Reviewed-by: Leon Romanovsky Signed-off-by: Vladimir Oltean Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f0864f40ea1ac0..e3cd402c079afc 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2265,10 +2265,7 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, u64 release_mac = MLX4_IB_INVALID_MAC; struct mlx4_ib_qp *qp; - read_lock(&dev_base_lock); new_smac = mlx4_mac_to_u64(dev->dev_addr); - read_unlock(&dev_base_lock); - atomic64_set(&ibdev->iboe.mac[port - 1], new_smac); /* no need for update QP1 and mac registration in non-SRIOV */ From 1c0ca9cd1741687f529498ddb899805fc2c51caa Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Fri, 11 Dec 2020 09:37:27 +0800 Subject: [PATCH 134/147] RDMA/hns: Limit the length of data copied between kernel and userspace For ib_copy_from_user(), the length of udata may not be the same as that of cmd. For ib_copy_to_user(), the length of udata may not be the same as that of resp. So limit the length to prevent out-of-bounds read and write operations from ib_copy_from_user() and ib_copy_to_user(). Fixes: de77503a5940 ("RDMA/hns: RDMA/hns: Assign rq head pointer when enable rq record db") Fixes: 633fb4d9fdaa ("RDMA/hns: Use structs to describe the uABI instead of opencoding") Fixes: ae85bf92effc ("RDMA/hns: Optimize qp param setup flow") Fixes: 6fd610c5733d ("RDMA/hns: Support 0 hop addressing for SRQ buffer") Fixes: 9d9d4ff78884 ("RDMA/hns: Update the kernel header file of hns") Link: https://lore.kernel.org/r/1607650657-35992-2-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 5 +++-- drivers/infiniband/hw/hns/hns_roce_main.c | 3 ++- drivers/infiniband/hw/hns/hns_roce_pd.c | 11 ++++++----- drivers/infiniband/hw/hns/hns_roce_qp.c | 9 ++++++--- drivers/infiniband/hw/hns/hns_roce_srq.c | 10 +++++----- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 5e6d68830fa5a8..e67c2b99be01eb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -276,7 +276,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, if (udata) { ret = ib_copy_from_udata(&ucmd, udata, - min(sizeof(ucmd), udata->inlen)); + min(udata->inlen, sizeof(ucmd))); if (ret) { ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", ret); @@ -315,7 +315,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, if (udata) { resp.cqn = hr_cq->cqn; - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); if (ret) goto err_cqc; } diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index e8aa8075ffcd32..bc46624261a7b7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -327,7 +327,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, resp.cqe_size = hr_dev->caps.cqe_sz; - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); if (ret) goto error_fail_copy_to_udata; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 45ec91db15539a..8dccb6e6970fb3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -69,16 +69,17 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) } if (udata) { - struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn}; + struct hns_roce_ib_alloc_pd_resp resp = {.pdn = pd->pdn}; - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) { hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); - ibdev_err(ib_dev, "failed to copy to udata\n"); - return -EFAULT; + ibdev_err(ib_dev, "failed to copy to udata, ret = %d\n", ret); } } - return 0; + return ret; } int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 121d3b4c2edbf8..c85513d500b542 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -924,9 +924,12 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } if (udata) { - if (ib_copy_from_udata(ucmd, udata, sizeof(*ucmd))) { - ibdev_err(ibdev, "Failed to copy QP ucmd\n"); - return -EFAULT; + ret = ib_copy_from_udata(ucmd, udata, + min(udata->inlen, sizeof(*ucmd))); + if (ret) { + ibdev_err(ibdev, + "failed to copy QP ucmd, ret = %d\n", ret); + return ret; } ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 36c6bcb8526987..6a3ebb36b4df11 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -303,7 +303,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->max_gs = init_attr->attr.max_sge; if (udata) { - ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + ret = ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd))); if (ret) { ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n", ret); @@ -346,11 +347,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, resp.srqn = srq->srqn; if (udata) { - if (ib_copy_to_udata(udata, &resp, - min(udata->outlen, sizeof(resp)))) { - ret = -EFAULT; + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) goto err_srqc_alloc; - } } return 0; From 4ddeacf68a3dd05f346b63f4507e1032a15cc3cc Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Fri, 11 Dec 2020 09:37:28 +0800 Subject: [PATCH 135/147] RDMA/hns: Normalization the judgment of some features Whether to enable the these features should better depend on the enable flags, not the value of related fields. Fixes: 5c1f167af112 ("RDMA/hns: Init SRQ table for hip08") Fixes: 3cb2c996c9dc ("RDMA/hns: Add support for SCCC in size of 64 Bytes") Link: https://lore.kernel.org/r/1607650657-35992-3-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hem.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_main.c | 8 ++++---- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 5c302aecf05030..359e5dd6854544 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1028,7 +1028,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { - if (hr_dev->caps.srqc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); @@ -1038,7 +1038,7 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) if (hr_dev->caps.cqc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); - if (hr_dev->caps.sccc_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); if (hr_dev->caps.trrl_entry_sz) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index bc46624261a7b7..dbb1af1d0cf267 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -606,7 +606,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) goto err_unmap_trrl; } - if (hr_dev->caps.srqc_entry_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, HEM_TYPE_SRQC, hr_dev->caps.srqc_entry_sz, @@ -618,7 +618,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } - if (hr_dev->caps.sccc_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.sccc_table, HEM_TYPE_SCCC, @@ -679,11 +679,11 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table); err_unmap_ctx: - if (hr_dev->caps.sccc_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); err_unmap_srq: - if (hr_dev->caps.srqc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); err_unmap_cq: diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index c85513d500b542..76a7c95bbbd25c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -334,7 +334,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) } } - if (hr_dev->caps.sccc_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { /* Alloc memory for SCC CTX */ ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, hr_qp->qpn); From 603bee935f38080a3674c763c50787751e387779 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Fri, 11 Dec 2020 09:37:29 +0800 Subject: [PATCH 136/147] RDMA/hns: Do shift on traffic class when using RoCEv2 The high 6 bits of traffic class in GRH is DSCP (Differentiated Services Codepoint), the driver should shift it before the hardware gets it when using RoCEv2. Fixes: 606bf89e98ef ("RDMA/hns: Refactor for hns_roce_v2_modify_qp function") Fixes: fba429fcf9a5 ("RDMA/hns: Fix missing fields in address vector") Link: https://lore.kernel.org/r/1607650657-35992-4-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_ah.c | 2 +- drivers/infiniband/hw/hns/hns_roce_device.h | 8 ++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 10 +++------- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index b09ef3335e9688..c9a44dbe60d42e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -77,7 +77,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, ah->av.flowlabel = grh->flow_label; ah->av.udp_sport = get_ah_udp_sport(ah_attr); ah->av.sl = rdma_ah_get_sl(ah_attr); - ah->av.tclass = grh->traffic_class; + ah->av.tclass = get_tclass(grh); memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 60b8349cd2f87b..4ba6ff57f68d35 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1148,6 +1148,14 @@ static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift) return ilog2(to_hr_hem_entries_count(count, buf_shift)); } +#define DSCP_SHIFT 2 + +static inline u8 get_tclass(const struct ib_global_route *grh) +{ + return grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP ? + grh->traffic_class >> DSCP_SHIFT : grh->traffic_class; +} + int hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8d37067a736d6f..7a0c1abde02de3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4596,15 +4596,11 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - if (is_udp) - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); - else - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class); - + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, get_tclass(&attr->ah_attr.grh)); roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, 0); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, V2_QPC_BYTE_28_FL_S, grh->flow_label); roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, From 94a8c4dfcdb2b4fcb3dfafc39c1033a0b4637c86 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Fri, 11 Dec 2020 09:37:30 +0800 Subject: [PATCH 137/147] RDMA/hns: Avoid filling sl in high 3 bits of vlan_id Only the low 12 bits of vlan_id is valid, and service level has been filled in Address Vector. So there is no need to fill sl in vlan_id in Address Vector. Fixes: 7406c0036f85 ("RDMA/hns: Only record vlan info for HIP08") Link: https://lore.kernel.org/r/1607650657-35992-5-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_ah.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index c9a44dbe60d42e..cc258edec3313c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -36,9 +36,6 @@ #include #include "hns_roce_device.h" -#define VLAN_SL_MASK 7 -#define VLAN_SL_SHIFT 13 - static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) { u32 fl = ah_attr->grh.flow_label; @@ -84,18 +81,12 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, /* HIP08 needs to record vlan info in Address Vector */ if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { - ah->av.vlan_en = 0; - ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, &ah->av.vlan_id, NULL); if (ret) return ret; - if (ah->av.vlan_id < VLAN_N_VID) { - ah->av.vlan_en = 1; - ah->av.vlan_id |= (rdma_ah_get_sl(ah_attr) & VLAN_SL_MASK) << - VLAN_SL_SHIFT; - } + ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID; } return ret; From f75506833eed65cc537293508b7edd5788d67e23 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Fri, 11 Dec 2020 09:37:31 +0800 Subject: [PATCH 138/147] RDMA/hns: WARN_ON if get a reserved sl from users According to the RoCE v1 specification, the sl (service level) 0-7 are mapped directly to priorities 0-7 respectively, sl 8-15 are reserved. The driver should verify whether the value of sl is larger than 7, if so, an exception should be returned. Link: https://lore.kernel.org/r/1607650657-35992-6-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7a0c1abde02de3..b2b80528c3bbad 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -433,6 +433,10 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); + + if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) + return -EINVAL; + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); From 29b52027ac354f2a0e5c4d17ca1b621a1644949d Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Fri, 11 Dec 2020 09:37:32 +0800 Subject: [PATCH 139/147] RDMA/hns: Remove unnecessary access right set during INIT2INIT As the qp access right is checked and setted in common function hns_roce_v2_set_opt_fields(), there is no need to set again for a special case INIT2INIT. Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC") Fixes: 7db82697b8bf ("RDMA/hns: Add support for extended atomic in userspace") Link: https://lore.kernel.org/r/1607650657-35992-7-git-send-email-liweihang@huawei.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 46 ---------------------- 1 file changed, 46 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b2b80528c3bbad..6d80cda701dda2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3975,7 +3975,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1); - hr_qp->access_flags = attr->qp_access_flags; roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn); @@ -4004,51 +4003,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, V2_QPC_BYTE_4_TST_S, 0); - if (attr_mask & IB_QP_ACCESS_FLAGS) { - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_WRITE)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - 0); - roce_set_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, 0); - } else { - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_WRITE)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - 0); - roce_set_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, 0); - } - roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, From dc93a0d987fcfe93b132871e72d4ea5aff36dd5c Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Fri, 11 Dec 2020 09:37:33 +0800 Subject: [PATCH 140/147] RDMA/hns: Fix coding style issues Just format the code without modifying anything, including fixing some redundant and missing blanks and spaces and changing the variable definition order. Link: https://lore.kernel.org/r/1607650657-35992-8-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cmd.c | 27 +++++++++++----------- drivers/infiniband/hw/hns/hns_roce_cmd.h | 4 ++-- drivers/infiniband/hw/hns/hns_roce_cq.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hem.c | 20 ++++++++-------- drivers/infiniband/hw/hns/hns_roce_hem.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 9 +++----- drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 +++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 6 ++--- drivers/infiniband/hw/hns/hns_roce_main.c | 6 ++--- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 1 - 13 files changed, 43 insertions(+), 51 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 455d533dd7c4aa..c493d7644b577f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -36,9 +36,9 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" -#define CMD_POLL_TOKEN 0xffff -#define CMD_MAX_NUM 32 -#define CMD_TOKEN_MASK 0x1f +#define CMD_POLL_TOKEN 0xffff +#define CMD_MAX_NUM 32 +#define CMD_TOKEN_MASK 0x1f static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, @@ -93,8 +93,8 @@ static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, u64 out_param) { - struct hns_roce_cmd_context - *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask]; + struct hns_roce_cmd_context *context = + &hr_dev->cmd.context[token % hr_dev->cmd.max_cmds]; if (token != context->token) return; @@ -164,8 +164,8 @@ static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, int ret; down(&hr_dev->cmd.event_sem); - ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, timeout); + ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, timeout); up(&hr_dev->cmd.event_sem); return ret; @@ -231,9 +231,8 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; int i; - hr_cmd->context = kmalloc_array(hr_cmd->max_cmds, - sizeof(*hr_cmd->context), - GFP_KERNEL); + hr_cmd->context = + kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL); if (!hr_cmd->context) return -ENOMEM; @@ -262,8 +261,8 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) hr_cmd->use_events = 0; } -struct hns_roce_cmd_mailbox - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) +struct hns_roce_cmd_mailbox * +hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) { struct hns_roce_cmd_mailbox *mailbox; @@ -271,8 +270,8 @@ struct hns_roce_cmd_mailbox if (!mailbox) return ERR_PTR(-ENOMEM); - mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, - &mailbox->dma); + mailbox->buf = + dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, &mailbox->dma); if (!mailbox->buf) { kfree(mailbox); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 1915bacaded0a3..8e63b827f28cc4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -143,8 +143,8 @@ int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, unsigned long timeout); -struct hns_roce_cmd_mailbox - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); +struct hns_roce_cmd_mailbox * +hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index e67c2b99be01eb..f25e535f6e7b6a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -40,9 +40,9 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_cq_table *cq_table; - struct ib_device *ibdev = &hr_dev->ib_dev; u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; int ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 359e5dd6854544..303c8ddd95c933 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -209,9 +209,9 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, { struct device *dev = hr_dev->dev; u32 chunk_ba_num; + u32 chunk_size; u32 table_idx; u32 bt_num; - u32 chunk_size; if (get_hem_table_config(hr_dev, mhop, table->type)) return -EINVAL; @@ -343,15 +343,15 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, { spinlock_t *lock = &hr_dev->bt_cmd_lock; struct device *dev = hr_dev->dev; - long end; - unsigned long flags; struct hns_roce_hem_iter iter; void __iomem *bt_cmd; __le32 bt_cmd_val[2]; __le32 bt_cmd_h = 0; + unsigned long flags; __le32 bt_cmd_l; - u64 bt_ba; int ret = 0; + u64 bt_ba; + long end; /* Find the HEM(Hardware Entry Memory) entry */ unsigned long i = (obj & (table->num_obj - 1)) / @@ -651,8 +651,8 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { struct device *dev = hr_dev->dev; - int ret = 0; unsigned long i; + int ret = 0; if (hns_roce_check_whether_mhop(hr_dev, table->type)) return hns_roce_table_mhop_get(hr_dev, table, obj); @@ -800,14 +800,14 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem_chunk *chunk; struct hns_roce_hem_mhop mhop; struct hns_roce_hem *hem; - void *addr = NULL; unsigned long mhop_obj = obj; unsigned long obj_per_chunk; unsigned long idx_offset; int offset, dma_offset; + void *addr = NULL; + u32 hem_idx = 0; int length; int i, j; - u32 hem_idx = 0; if (!table->lowmem) return NULL; @@ -977,8 +977,8 @@ static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev, { struct hns_roce_hem_mhop mhop; u32 buf_chunk_size; - int i; u64 obj; + int i; if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop)) return; @@ -1313,8 +1313,8 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, const struct hns_roce_buf_region *regions, int region_cnt) { - struct roce_hem_item *hem, *temp_hem, *root_hem; struct list_head temp_list[HNS_ROCE_MAX_BT_REGION]; + struct roce_hem_item *hem, *temp_hem, *root_hem; const struct hns_roce_buf_region *r; struct list_head temp_root; struct list_head temp_btm; @@ -1419,8 +1419,8 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; - int ret; int unit; + int ret; int i; if (region_cnt > HNS_ROCE_MAX_BT_REGION) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index c6bd98228a440a..13fdeb3274e780 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -175,4 +175,4 @@ static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter) return sg_dma_address(&iter->chunk->mem[iter->page_idx]); } -#endif /*_HNS_ROCE_HEM_H*/ +#endif /* _HNS_ROCE_HEM_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index f18380f827dd87..eb0fd7231a39b2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -239,7 +239,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, break; } - /*Ctrl field, ctrl set type: sig, solic, imm, fence */ + /* Ctrl field, ctrl set type: sig, solic, imm, fence */ /* SO wait for conforming application scenarios */ ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ? cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | @@ -300,7 +300,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, } ctrl->flag |= cpu_to_le32(HNS_ROCE_WQE_INLINE); } else { - /*sqe num is two */ + /* sqe num is two */ for (i = 0; i < wr->num_sge; i++) set_data_seg(dseg + i, wr->sg_list + i); @@ -1165,7 +1165,7 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) } raq->e_raq_buf->map = addr; - /* Configure raq extended address. 48bit 4K align*/ + /* Configure raq extended address. 48bit 4K align */ roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12); /* Configure raq_shift */ @@ -2760,7 +2760,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qpc_bytes_16, QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); - } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { roce_set_field(context->qpc_bytes_4, QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, @@ -3795,7 +3794,6 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, int event_type; while ((aeqe = next_aeqe_sw_v1(eq))) { - /* Make sure we read the AEQ entry after we have checked the * ownership bit */ @@ -3900,7 +3898,6 @@ static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev, u32 cqn; while ((ceqe = next_ceqe_sw_v1(eq))) { - /* Make sure we read CEQ entry after we have checked the * ownership bit */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index ffd0156080f526..46ab0a321d2113 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -419,7 +419,7 @@ struct hns_roce_wqe_data_seg { struct hns_roce_wqe_raddr_seg { __le32 rkey; - __le32 len;/* reserved */ + __le32 len; /* reserved */ __le64 raddr; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 6d80cda701dda2..941a70b5ecff17 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1025,8 +1025,8 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; - unsigned long instance_stage; /* the current instance stage */ - unsigned long reset_stage; /* the current reset stage */ + unsigned long instance_stage; /* the current instance stage */ + unsigned long reset_stage; /* the current reset stage */ unsigned long reset_cnt; bool sw_resetting; bool hw_resetting; @@ -2451,7 +2451,6 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, if (i < (pg_num - 1)) entry[i].blk_ba1_nxt_ptr |= (i + 1) << HNS_ROCE_LINK_TABLE_NXT_PTR_S; - } link_tbl->npages = pg_num; link_tbl->pg_sz = buf_chk_sz; @@ -5619,16 +5618,14 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: hns_roce_cq_event(hr_dev, cqn, event_type); break; - case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: - break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, le16_to_cpu(aeqe->event.cmd.token), aeqe->event.cmd.status, le64_to_cpu(aeqe->event.cmd.out_param)); break; + case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: - break; case HNS_ROCE_EVENT_TYPE_FLR: break; default: diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index fac85369d84fbd..bdaccf86460ddb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -449,7 +449,7 @@ struct hns_roce_srq_context { #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1 #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1) -enum{ +enum { V2_MPT_ST_VALID = 0x1, V2_MPT_ST_FREE = 0x2, }; @@ -1094,9 +1094,9 @@ struct hns_roce_v2_ud_send_wqe { u8 sgid_index; u8 smac_index; u8 dgid[GID_LEN_V2]; - }; -#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 + +#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 #define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) #define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7 diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index dbb1af1d0cf267..3f3de323c4bf82 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -555,8 +555,8 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, @@ -713,8 +713,8 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) */ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; spin_lock_init(&hr_dev->sm_lock); spin_lock_init(&hr_dev->bt_cmd_lock); @@ -838,8 +838,8 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) int hns_roce_init(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; if (hr_dev->hw->reset) { ret = hr_dev->hw->reset(hr_dev, true); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 98671925debf70..b9a7e73209a30c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -167,10 +167,10 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - int ret; unsigned long mtpt_idx = key_to_hw_index(mr->key); - struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; + struct device *dev = hr_dev->dev; + int ret; /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 76a7c95bbbd25c..f89c52bf01d415 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -113,8 +113,8 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, enum hns_roce_event type) { - struct ib_event event; struct ib_qp *ibqp = &hr_qp->ibqp; + struct ib_event event; if (ibqp->event_handler) { event.device = ibqp->device; diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 6a3ebb36b4df11..e2e778733874a2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -239,7 +239,6 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, err = -ENOMEM; goto err_idx_mtr; } - } return 0; From 62f3b70ed656640ecb63432014f4bb258cb1975a Mon Sep 17 00:00:00 2001 From: Xinhao Liu Date: Fri, 11 Dec 2020 09:37:34 +0800 Subject: [PATCH 141/147] RDMA/hns: Clear redundant variable initialization There is no need to initialize some variable because they will be assigned with a value later. Link: https://lore.kernel.org/r/1607650657-35992-9-git-send-email-liweihang@huawei.com Signed-off-by: Xinhao Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hem.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 303c8ddd95c933..f19bbcc292eb6c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -887,7 +887,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, unsigned long buf_chunk_size; unsigned long bt_chunk_size; unsigned long bt_chunk_num; - unsigned long num_bt_l0 = 0; + unsigned long num_bt_l0; u32 hop_num; if (get_hem_table_config(hr_dev, &mhop, type)) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index eb0fd7231a39b2..0f4273d3062c75 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -353,8 +353,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, unsigned long flags = 0; unsigned int wqe_idx; int ret = 0; - int nreq = 0; - int i = 0; + int nreq; + int i; u32 reg_val; spin_lock_irqsave(&hr_qp->rq.lock, flags); @@ -2300,7 +2300,7 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) struct hns_roce_qp *cur_qp = NULL; unsigned long flags; int npolled; - int ret = 0; + int ret; spin_lock_irqsave(&hr_cq->lock, flags); @@ -4123,7 +4123,7 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn]; struct device *dev = &hr_dev->pdev->dev; dma_addr_t tmp_dma_addr; - u32 eqcuridx_val = 0; + u32 eqcuridx_val; u32 eqconsindx_val; u32 eqshift_val; __le32 tmp2 = 0; From dcdc366acf8ffc29f091a09e08b4e46caa0a0f21 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Fri, 11 Dec 2020 09:37:35 +0800 Subject: [PATCH 142/147] RDMA/hns: Fix incorrect symbol types Types of some fields, variables and parameters of some functions should be unsigned. Link: https://lore.kernel.org/r/1607650657-35992-10-git-send-email-liweihang@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cmd.c | 10 ++-- drivers/infiniband/hw/hns/hns_roce_cmd.h | 2 +- drivers/infiniband/hw/hns/hns_roce_common.h | 14 +++--- drivers/infiniband/hw/hns/hns_roce_db.c | 8 ++-- drivers/infiniband/hw/hns/hns_roce_device.h | 53 +++++++++++---------- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 8 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 30 ++++++------ drivers/infiniband/hw/hns/hns_roce_main.c | 2 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 11 ++--- drivers/infiniband/hw/hns/hns_roce_qp.c | 8 ++-- 10 files changed, 73 insertions(+), 73 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index c493d7644b577f..339e3fd98b0b48 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -60,7 +60,7 @@ static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { struct device *dev = hr_dev->dev; int ret; @@ -78,7 +78,7 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned long timeout) + u8 op_modifier, u16 op, unsigned int timeout) { int ret; @@ -108,7 +108,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { struct hns_roce_cmdq *cmd = &hr_dev->cmd; struct hns_roce_cmd_context *context; @@ -159,7 +159,7 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned long timeout) + u8 op_modifier, u16 op, unsigned int timeout) { int ret; @@ -173,7 +173,7 @@ static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { int ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 8e63b827f28cc4..8025e7f657fa66 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -141,7 +141,7 @@ enum { int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout); + unsigned int timeout); struct hns_roce_cmd_mailbox * hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 29469e15dfd36a..5afee04fb02c0e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -38,19 +38,19 @@ #define roce_raw_write(value, addr) \ __raw_writel((__force u32)cpu_to_le32(value), (addr)) -#define roce_get_field(origin, mask, shift) \ - (((le32_to_cpu(origin)) & (mask)) >> (shift)) +#define roce_get_field(origin, mask, shift) \ + ((le32_to_cpu(origin) & (mask)) >> (u32)(shift)) #define roce_get_bit(origin, shift) \ roce_get_field((origin), (1ul << (shift)), (shift)) -#define roce_set_field(origin, mask, shift, val) \ - do { \ - (origin) &= ~cpu_to_le32(mask); \ - (origin) |= cpu_to_le32(((u32)(val) << (shift)) & (mask)); \ +#define roce_set_field(origin, mask, shift, val) \ + do { \ + (origin) &= ~cpu_to_le32(mask); \ + (origin) |= cpu_to_le32(((u32)(val) << (u32)(shift)) & (mask)); \ } while (0) -#define roce_set_bit(origin, shift, val) \ +#define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val)) #define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index bff6abdccfb0c3..5cb7376ce97897 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -95,8 +95,8 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir( static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir, struct hns_roce_db *db, int order) { - int o; - int i; + unsigned long o; + unsigned long i; for (o = order; o <= 1; ++o) { i = find_first_bit(pgdir->bits[o], HNS_ROCE_DB_PER_PAGE >> o); @@ -154,8 +154,8 @@ int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) { - int o; - int i; + unsigned long o; + unsigned long i; mutex_lock(&hr_dev->pgdir_mutex); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 4ba6ff57f68d35..89c0c74e233cb2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -311,7 +311,7 @@ struct hns_roce_hem_table { }; struct hns_roce_buf_region { - int offset; /* page offset */ + u32 offset; /* page offset */ u32 count; /* page count */ int hopnum; /* addressing hop num */ }; @@ -331,10 +331,10 @@ struct hns_roce_buf_attr { size_t size; /* region size */ int hopnum; /* multi-hop addressing hop num */ } region[HNS_ROCE_MAX_BT_REGION]; - int region_count; /* valid region count */ + unsigned int region_count; /* valid region count */ unsigned int page_shift; /* buffer page shift */ bool fixed_page; /* decide page shift is fixed-size or maximum size */ - int user_access; /* umem access flag */ + unsigned int user_access; /* umem access flag */ bool mtt_only; /* only alloc buffer-required MTT memory */ }; @@ -345,7 +345,7 @@ struct hns_roce_hem_cfg { unsigned int buf_pg_shift; /* buffer page shift */ unsigned int buf_pg_count; /* buffer page count */ struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; - int region_count; + unsigned int region_count; }; /* memory translate region */ @@ -393,7 +393,7 @@ struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; u32 wqe_cnt; /* WQE num */ - int max_gs; + u32 max_gs; int offset; int wqe_shift; /* WQE size */ u32 head; @@ -459,8 +459,8 @@ struct hns_roce_db { } u; dma_addr_t dma; void *virt_addr; - int index; - int order; + unsigned long index; + unsigned long order; }; struct hns_roce_cq { @@ -508,8 +508,8 @@ struct hns_roce_srq { u64 *wrid; struct hns_roce_idx_que idx_que; spinlock_t lock; - int head; - int tail; + u16 head; + u16 tail; struct mutex mutex; void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; @@ -747,11 +747,11 @@ struct hns_roce_eq { int type_flag; /* Aeq:1 ceq:0 */ int eqn; u32 entries; - int log_entries; + u32 log_entries; int eqe_size; int irq; int log_page_size; - int cons_index; + u32 cons_index; struct hns_roce_buf_list *buf_list; int over_ignore; int coalesce; @@ -759,7 +759,7 @@ struct hns_roce_eq { int hop_num; struct hns_roce_mtr mtr; u16 eq_max_cnt; - int eq_period; + u32 eq_period; int shift; int event_type; int sub_type; @@ -782,8 +782,8 @@ struct hns_roce_caps { u32 max_sq_inline; u32 max_rq_sg; u32 max_extend_sg; - int num_qps; - u32 reserved_qps; + u32 num_qps; + u32 reserved_qps; int num_qpc_timer; int num_cqc_timer; int num_srqs; @@ -795,7 +795,7 @@ struct hns_roce_caps { u32 max_srq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; - int num_cqs; + u32 num_cqs; u32 max_cqes; u32 min_cqes; u32 min_wqes; @@ -804,7 +804,7 @@ struct hns_roce_caps { int num_aeq_vectors; int num_comp_vectors; int num_other_vectors; - int num_mtpts; + u32 num_mtpts; u32 num_mtt_segs; u32 num_cqe_segs; u32 num_srqwqe_segs; @@ -921,7 +921,7 @@ struct hns_roce_hw { int (*post_mbox)(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event); - int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); + int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned int timeout); int (*rst_prc_mbox)(struct hns_roce_dev *hr_dev); int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr); @@ -1096,15 +1096,16 @@ static inline struct hns_roce_qp return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); } -static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) +static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, + unsigned int offset) { return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) + (offset & ((1 << buf->trunk_shift) - 1)); } -static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) +static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) { - int offset = idx << buf->page_shift; + unsigned int offset = idx << buf->page_shift; return buf->trunk_list[offset >> buf->trunk_shift].map + (offset & ((1 << buf->trunk_shift) - 1)); @@ -1179,7 +1180,7 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr); int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int page_cnt); + dma_addr_t *pages, unsigned int page_cnt); int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); @@ -1263,10 +1264,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); -void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n); -void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n); -void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n); -bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, +void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n); +void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n); +void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n); +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, struct ib_cq *ib_cq); enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, @@ -1296,7 +1297,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); -int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 0f4273d3062c75..9c69742fd399a9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -288,7 +288,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ret = -EINVAL; *bad_wr = wr; dev_err(dev, "inline len(1-%d)=%d, illegal", - ctrl->msg_length, + le32_to_cpu(ctrl->msg_length), hr_dev->caps.max_sq_inline); goto out; } @@ -1639,7 +1639,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, } static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, - unsigned long timeout) + unsigned int timeout) { u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG; unsigned long end; @@ -3600,10 +3600,10 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) return 0; } -static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not) +static void set_eq_cons_index_v1(struct hns_roce_eq *eq, u32 req_not) { roce_raw_write((eq->cons_index & HNS_ROCE_V1_CONS_IDX_M) | - (req_not << eq->log_entries), eq->doorbell); + (req_not << eq->log_entries), eq->doorbell); } static void hns_roce_v1_wq_catas_err_handle(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 941a70b5ecff17..6bd83f2ade6a21 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -656,7 +656,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, unsigned int sge_idx; unsigned int wqe_idx; void *wqe = NULL; - int nreq; + u32 nreq; int ret; spin_lock_irqsave(&qp->sq.lock, flags); @@ -834,7 +834,7 @@ static void *get_srq_wqe(struct hns_roce_srq *srq, int n) return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); } -static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n) +static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n) { return hns_roce_buf_offset(idx_que->mtr.kmem, n << idx_que->entry_shift); @@ -875,12 +875,12 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_v2_db srq_db; unsigned long flags; + unsigned int ind; __le32 *srq_idx; int ret = 0; int wqe_idx; void *wqe; int nreq; - int ind; int i; spin_lock_irqsave(&srq->lock, flags); @@ -1125,7 +1125,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, upper_32_bits(dma)); roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG, - ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); + (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0); roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0); } else { @@ -1133,7 +1133,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG, upper_32_bits(dma)); roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG, - ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); + (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0); roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0); } @@ -1919,8 +1919,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) } } -static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, - int *buf_page_size, int *bt_page_size, u32 hem_type) +static void calc_pg_sz(u32 obj_num, u32 obj_size, u32 hop_num, u32 ctx_bt_num, + u32 *buf_page_size, u32 *bt_page_size, u32 hem_type) { u64 obj_per_chunk; u64 bt_chunk_size = PAGE_SIZE; @@ -2399,10 +2399,10 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, u32 buf_chk_sz; dma_addr_t t; int func_num = 1; - int pg_num_a; - int pg_num_b; - int pg_num; - int size; + u32 pg_num_a; + u32 pg_num_b; + u32 pg_num; + u32 size; int i; switch (type) { @@ -2598,7 +2598,7 @@ static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev) struct hns_roce_cmq_desc desc; struct hns_roce_mbox_status *mb_st = (struct hns_roce_mbox_status *)desc.data; - enum hns_roce_cmd_return_status status; + int status; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); @@ -2669,7 +2669,7 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, } static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, - unsigned long timeout) + unsigned int timeout) { struct device *dev = hr_dev->dev; unsigned long end; @@ -3067,7 +3067,7 @@ static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size); } -static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) +static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, unsigned int n) { struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe); @@ -3414,7 +3414,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, int is_send; u16 wqe_ctr; u32 opcode; - int qpn; + u32 qpn; int ret; /* Find cqe according to consumer index */ diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 3f3de323c4bf82..d9179bae4989dc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -53,7 +53,7 @@ * GID[0][0], GID[1][0],.....GID[N - 1][0], * And so on */ -int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) { return gid_index * hr_dev->caps.num_ports + port; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index b9a7e73209a30c..f807ece7bd36b4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -510,7 +510,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); ret = 0; } else { - mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size); + mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size); ret = mr->npages; } @@ -829,12 +829,12 @@ static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, } int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int page_cnt) + dma_addr_t *pages, unsigned int page_cnt) { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; + unsigned int i; int err; - int i; /* * Only use the first page address as root ba when hopnum is 0, this @@ -871,13 +871,12 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int mtt_count, left; int start_index; - int mtt_count; int total = 0; __le64 *mtts; - int npage; + u32 npage; u64 addr; - int left; if (!mtt_buf || mtt_max < 1) goto done; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index f89c52bf01d415..9a0851f68233da 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1322,22 +1322,22 @@ static inline void *get_wqe(struct hns_roce_qp *hr_qp, int offset) return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); } -void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift)); } -void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift)); } -void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); } -bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, struct ib_cq *ib_cq) { struct hns_roce_cq *hr_cq; From 61918e9b008492f48577692428aca3cebf56111a Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Fri, 11 Dec 2020 09:37:36 +0800 Subject: [PATCH 143/147] RDMA/hns: Fix inaccurate prints Some %d in print format string should be %u, and some prints miss the useful errno or are in nonstandard format. Just fix above issues. Link: https://lore.kernel.org/r/1607650657-35992-11-git-send-email-liweihang@huawei.com Signed-off-by: Yixing Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 4 +- drivers/infiniband/hw/hns/hns_roce_cq.c | 35 +++++++------ drivers/infiniband/hw/hns/hns_roce_hem.c | 18 +++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 30 +++++------ drivers/infiniband/hw/hns/hns_roce_mr.c | 10 ++-- drivers/infiniband/hw/hns/hns_roce_pd.c | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 61 +++++++++++++--------- drivers/infiniband/hw/hns/hns_roce_srq.c | 37 +++++++------ 8 files changed, 107 insertions(+), 90 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index dad2b9ba7b7ac6..4bcaaa0524b127 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -260,7 +260,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, end = start + buf_cnt; if (end > buf->npages) { dev_err(hr_dev->dev, - "Failed to check kmem bufs, end %d + %d total %d!\n", + "failed to check kmem bufs, end %d + %d total %u!\n", start, buf_cnt, buf->npages); return -EINVAL; } @@ -282,7 +282,7 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, u64 addr; if (page_shift < HNS_HW_PAGE_SHIFT) { - dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n", + dev_err(hr_dev->dev, "failed to check umem page shift %u!\n", page_shift); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index f25e535f6e7b6a..8533fc2d8df273 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -49,29 +49,29 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts), &dma_handle); - if (ret < 1) { - ibdev_err(ibdev, "Failed to find CQ mtr\n"); + if (!ret) { + ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret); return -EINVAL; } cq_table = &hr_dev->cq_table; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ bitmap, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret); return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - ibdev_err(ibdev, "Failed to get CQ(0x%lx) context, err %d\n", + ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n", hr_cq->cqn, ret); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - ibdev_err(ibdev, "Failed to xa_store CQ\n"); + ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret); goto err_put; } @@ -90,7 +90,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(ibdev, - "Failed to send create cmd for CQ(0x%lx), err %d\n", + "failed to send create cmd for CQ(0x%lx), ret = %d.\n", hr_cq->cqn, ret); goto err_xa; } @@ -146,7 +146,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int err; + int ret; buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; @@ -154,13 +154,13 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, buf_attr.region_count = 1; buf_attr.fixed_page = true; - err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, + ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); - if (err) - ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err); + if (ret) + ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret); - return err; + return ret; } static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) @@ -254,13 +254,13 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, return -EOPNOTSUPP; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n", + ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n", cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } if (vector >= hr_dev->caps.num_comp_vectors) { - ibdev_err(ibdev, "Failed to check CQ vector=%d max=%d\n", + ibdev_err(ibdev, "failed to check CQ vector = %d, max = %d.\n", vector, hr_dev->caps.num_comp_vectors); return -EINVAL; } @@ -278,7 +278,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", + ibdev_err(ibdev, "failed to copy CQ udata, ret = %d.\n", ret); return ret; } @@ -288,19 +288,20 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret); return ret; } ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ db, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret); goto err_cq_buf; } ret = alloc_cqc(hr_dev, hr_cq); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ context, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc CQ context, ret = %d.\n", ret); goto err_cq_db; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index f19bbcc292eb6c..edc9d6b98d9546 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -195,7 +195,7 @@ static int get_hem_table_config(struct hns_roce_dev *hr_dev, mhop->hop_num = hr_dev->caps.gmv_hop_num; break; default: - dev_err(dev, "Table %d not support multi-hop addressing!\n", + dev_err(dev, "table %u not support multi-hop addressing!\n", type); return -EINVAL; } @@ -243,8 +243,8 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, mhop->l0_idx = table_idx; break; default: - dev_err(dev, "Table %d not support hop_num = %d!\n", - table->type, mhop->hop_num); + dev_err(dev, "table %u not support hop_num = %u!\n", + table->type, mhop->hop_num); return -EINVAL; } if (mhop->l0_idx >= mhop->ba_l0_num) @@ -449,13 +449,13 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev, index->buf = l0_idx; break; default: - ibdev_err(ibdev, "Table %d not support mhop.hop_num = %d!\n", + ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n", table->type, mhop->hop_num); return -EINVAL; } if (unlikely(index->buf >= table->num_hem)) { - ibdev_err(ibdev, "Table %d exceed hem limt idx %llu,max %lu!\n", + ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n", table->type, index->buf, table->num_hem); return -EINVAL; } @@ -725,15 +725,15 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, step_idx = hop_num; if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx)) - ibdev_warn(ibdev, "Clear hop%d HEM failed.\n", hop_num); + ibdev_warn(ibdev, "failed to clear hop%u HEM.\n", hop_num); if (index->inited & HEM_INDEX_L1) if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) - ibdev_warn(ibdev, "Clear HEM step 1 failed.\n"); + ibdev_warn(ibdev, "failed to clear HEM step 1.\n"); if (index->inited & HEM_INDEX_L0) if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) - ibdev_warn(ibdev, "Clear HEM step 0 failed.\n"); + ibdev_warn(ibdev, "failed to clear HEM step 0.\n"); } } @@ -1249,7 +1249,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, } if (offset < r->offset) { - dev_err(hr_dev->dev, "invalid offset %d,min %d!\n", + dev_err(hr_dev->dev, "invalid offset %d, min %u!\n", offset, r->offset); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 6bd83f2ade6a21..a91c54c343fd1b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -361,7 +361,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, } else if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || hr_qp->state == IB_QPS_RTR)) { - ibdev_err(ibdev, "failed to post WQE, QP state %d!\n", + ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n", hr_qp->state); return -EINVAL; } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { @@ -680,7 +680,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); if (unlikely(wr->num_sge > qp->sq.max_gs)) { - ibdev_err(ibdev, "num_sge=%d > qp->sq.max_gs=%d\n", + ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n", wr->num_sge, qp->sq.max_gs); ret = -EINVAL; *bad_wr = wr; @@ -765,7 +765,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { - ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n", + ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", wr->num_sge, hr_qp->rq.max_gs); ret = -EINVAL; *bad_wr = wr; @@ -1950,8 +1950,8 @@ static void calc_pg_sz(u32 obj_num, u32 obj_size, u32 hop_num, u32 ctx_bt_num, obj_per_chunk = ctx_bt_num * obj_per_chunk_default; break; default: - pr_err("Table %d not support hop_num = %d!\n", hem_type, - hop_num); + pr_err("table %u not support hop_num = %u!\n", hem_type, + hop_num); return; } @@ -3683,7 +3683,7 @@ static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type, break; default: dev_warn(hr_dev->dev, - "Table %d not to be written by mailbox!\n", type); + "table %u not to be written by mailbox!\n", type); return -EINVAL; } @@ -3819,7 +3819,7 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, case HEM_TYPE_GMV: return 0; default: - dev_warn(dev, "Table %d not to be destroyed by mailbox!\n", + dev_warn(dev, "table %u not to be destroyed by mailbox!\n", table->type); return 0; } @@ -4411,7 +4411,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask); if (ret) { - ibdev_err(ibdev, "failed to config sq buf, ret %d\n", ret); + ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret); return ret; } @@ -4902,7 +4902,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, /* SW pass context to HW */ ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp); if (ret) { - ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret); + ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret); goto out; } @@ -4995,7 +4995,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context); if (ret) { - ibdev_err(ibdev, "failed to query QPC, ret = %d\n", ret); + ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret); ret = -EINVAL; goto out; } @@ -5118,7 +5118,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hr_qp->state, IB_QPS_RESET); if (ret) ibdev_err(ibdev, - "failed to modify QP to RST, ret = %d\n", + "failed to modify QP to RST, ret = %d.\n", ret); } @@ -5157,7 +5157,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) ibdev_err(&hr_dev->ib_dev, - "failed to destroy QP 0x%06lx, ret = %d\n", + "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", hr_qp->qpn, ret); hns_roce_qp_destroy(hr_dev, hr_qp, udata); @@ -5180,7 +5180,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d\n", ret); + ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d.\n", ret); goto out; } @@ -5190,7 +5190,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, clr->qpn = cpu_to_le32(hr_qp->qpn); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d\n", ret); + ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d.\n", ret); goto out; } @@ -5439,7 +5439,7 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when modifying CQ, ret = %d\n", + "failed to process cmd when modifying CQ, ret = %d.\n", ret); return ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index f807ece7bd36b4..1bcffd93ff3e37 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -185,14 +185,14 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, else ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); if (ret) { - dev_err(dev, "Write mtpt fail!\n"); + dev_err(dev, "failed to write mtpt, ret = %d.\n", ret); goto err_page; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "CREATE_MPT failed (%d)\n", ret); + dev_err(dev, "failed to create mpt, ret = %d.\n", ret); goto err_page; } @@ -496,7 +496,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); if (ret < 1) { - ibdev_err(ibdev, "failed to store sg pages %d %d, cnt = %d.\n", + ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n", mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); goto err_page_list; } @@ -850,7 +850,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, if (r->offset + r->count > page_cnt) { err = -EINVAL; ibdev_err(ibdev, - "Failed to check mtr%d end %d + %d, max %d\n", + "failed to check mtr%u end %u + %u, max %u.\n", i, r->offset, r->count, page_cnt); return err; } @@ -858,7 +858,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); if (err) { ibdev_err(ibdev, - "Failed to map mtr%d offset %d, err %d\n", + "failed to map mtr%u offset %u, ret = %d.\n", i, r->offset, err); return err; } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 8dccb6e6970fb3..cca818d05a8f76 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -64,7 +64,7 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn); if (ret) { - ibdev_err(ib_dev, "failed to alloc pd, ret = %d\n", ret); + ibdev_err(ib_dev, "failed to alloc pd, ret = %d.\n", ret); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9a0851f68233da..d8e2fe5558d29d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -513,12 +513,12 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, /* Sanity check SQ size before proceeding */ if (ucmd->log_sq_stride > max_sq_stride || ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ stride size\n"); + ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n"); return -EINVAL; } if (cap->max_send_sge > hr_dev->caps.max_sq_sg) { - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ SGE size %d\n", + ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n", cap->max_send_sge); return -EINVAL; } @@ -621,7 +621,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes)); if (cnt > hr_dev->caps.max_wqes) { - ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n", + ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n", cnt); return -EINVAL; } @@ -795,7 +795,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, &hr_qp->sdb); if (ret) { ibdev_err(ibdev, - "Failed to map user SQ doorbell\n"); + "failed to map user SQ doorbell, ret = %d.\n", + ret); goto err_out; } hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; @@ -807,7 +808,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, &hr_qp->rdb); if (ret) { ibdev_err(ibdev, - "Failed to map user RQ doorbell\n"); + "failed to map user RQ doorbell, ret = %d.\n", + ret); goto err_sdb; } hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; @@ -824,7 +826,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0); if (ret) { ibdev_err(ibdev, - "Failed to alloc kernel RQ doorbell\n"); + "failed to alloc kernel RQ doorbell, ret = %d.\n", + ret); goto err_out; } *hr_qp->rdb.db_record = 0; @@ -867,14 +870,14 @@ static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev, sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL); if (ZERO_OR_NULL_PTR(sq_wrid)) { - ibdev_err(ibdev, "Failed to alloc SQ wrid\n"); + ibdev_err(ibdev, "failed to alloc SQ wrid.\n"); return -ENOMEM; } if (hr_qp->rq.wqe_cnt) { rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL); if (ZERO_OR_NULL_PTR(rq_wrid)) { - ibdev_err(ibdev, "Failed to alloc RQ wrid\n"); + ibdev_err(ibdev, "failed to alloc RQ wrid.\n"); ret = -ENOMEM; goto err_sq; } @@ -934,11 +937,15 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); if (ret) - ibdev_err(ibdev, "Failed to set user SQ size\n"); + ibdev_err(ibdev, + "failed to set user SQ size, ret = %d.\n", + ret); } else { ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) - ibdev_err(ibdev, "Failed to set kernel SQ size\n"); + ibdev_err(ibdev, + "failed to set kernel SQ size, ret = %d.\n", + ret); } return ret; @@ -967,45 +974,48 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { - ibdev_err(ibdev, "Failed to set QP param\n"); + ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret); return ret; } if (!udata) { ret = alloc_kernel_wrid(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc wrid\n"); + ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n", + ret); return ret; } } ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP doorbell\n"); + ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n", + ret); goto err_wrid; } ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP buffer\n"); + ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret); goto err_db; } ret = alloc_qpn(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QPN\n"); + ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret); goto err_buf; } ret = alloc_qpc(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP context\n"); + ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n", + ret); goto err_qpn; } ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr); if (ret) { - ibdev_err(ibdev, "Failed to store QP\n"); + ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret); goto err_qpc; } @@ -1169,9 +1179,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) { - ibdev_err(&hr_dev->ib_dev, - "attr port_num invalid.attr->port_num=%d\n", - attr->port_num); + ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n", + attr->port_num); return -EINVAL; } @@ -1179,8 +1188,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) { ibdev_err(&hr_dev->ib_dev, - "attr pkey_index invalid.attr->pkey_index=%d\n", - attr->pkey_index); + "invalid attr, pkey_index = %u.\n", + attr->pkey_index); return -EINVAL; } } @@ -1188,16 +1197,16 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) { ibdev_err(&hr_dev->ib_dev, - "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n", - attr->max_rd_atomic); + "invalid attr, max_rd_atomic = %u.\n", + attr->max_rd_atomic); return -EINVAL; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) { ibdev_err(&hr_dev->ib_dev, - "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n", - attr->max_dest_rd_atomic); + "invalid attr, max_dest_rd_atomic = %u.\n", + attr->max_dest_rd_atomic); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index e2e778733874a2..c4ae57e4173a19 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -92,7 +92,8 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); if (ret < 1) { - ibdev_err(ibdev, "Failed to find mtr for SRQ WQE\n"); + ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", + ret); return -ENOBUFS; } @@ -100,32 +101,34 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, ARRAY_SIZE(mtts_idx), &dma_handle_idx); if (ret < 1) { - ibdev_err(ibdev, "Failed to find mtr for SRQ idx\n"); + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); return -ENOBUFS; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ number, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ number, ret = %d.\n", ret); return -ENOMEM; } ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); if (ret) { - ibdev_err(ibdev, "Failed to get SRQC table, err %d\n", ret); + ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret); goto err_out; } ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); if (ret) { - ibdev_err(ibdev, "Failed to store SRQC, err %d\n", ret); + ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); goto err_put; } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR_OR_NULL(mailbox)) { ret = -ENOMEM; - ibdev_err(ibdev, "Failed to alloc mailbox for SRQC\n"); + ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); goto err_xa; } @@ -136,7 +139,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - ibdev_err(ibdev, "Failed to config SRQC, err %d\n", ret); + ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); goto err_xa; } @@ -197,7 +200,8 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hr_dev->caps.srqwqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) - ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err); + ibdev_err(ibdev, + "failed to alloc SRQ buf mtr, ret = %d.\n", err); return err; } @@ -228,14 +232,15 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) { - ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err); + ibdev_err(ibdev, + "failed to alloc SRQ idx mtr, ret = %d.\n", err); return err; } if (!udata) { idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) { - ibdev_err(ibdev, "Failed to alloc SRQ idx bitmap\n"); + ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n"); err = -ENOMEM; goto err_idx_mtr; } @@ -305,7 +310,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n", + ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", ret); return ret; } @@ -313,20 +318,21 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ buffer, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ buffer, ret = %d.\n", ret); return ret; } ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ idx, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret); goto err_buf_alloc; } if (!udata) { ret = alloc_srq_wrid(hr_dev, srq); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ wrid, err %d\n", + ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n", ret); goto err_idx_alloc; } @@ -338,7 +344,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ context, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ context, ret = %d.\n", ret); goto err_wrid_alloc; } From d8cc403b70de61160aaafddd776ee53aa5aa77eb Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Fri, 11 Dec 2020 09:37:37 +0800 Subject: [PATCH 144/147] RDMA/hns: Simplify AEQE process for different types of queue There is no need to get queue number repeatly for different queues from an AEQE entity, as they are the same. Furthermore, redefine the AEQE structure to make the codes more readable. In addition, HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW is removed because the hardware never reports this event. Link: https://lore.kernel.org/r/1607650657-35992-12-git-send-email-liweihang@huawei.com Signed-off-by: Yixian Liu Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 26 ++----------- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 16 +++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 43 +++++++-------------- 3 files changed, 23 insertions(+), 62 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 89c0c74e233cb2..55d538625e3613 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -160,7 +160,6 @@ enum hns_roce_event { /* 0x10 and 0x11 is unused in currently application case */ HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12, HNS_ROCE_EVENT_TYPE_MB = 0x13, - HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14, HNS_ROCE_EVENT_TYPE_FLR = 0x15, }; @@ -636,10 +635,9 @@ enum { struct hns_roce_work { struct hns_roce_dev *hr_dev; struct work_struct work; - u32 qpn; - u32 cqn; int event_type; int sub_type; + u32 queue_num; }; struct hns_roce_qp { @@ -707,28 +705,10 @@ struct hns_roce_aeqe { __le32 asyn; union { struct { - __le32 qp; + __le32 num; u32 rsv0; u32 rsv1; - } qp_event; - - struct { - __le32 srq; - u32 rsv0; - u32 rsv1; - } srq_event; - - struct { - __le32 cq; - u32 rsv0; - u32 rsv1; - } cq_event; - - struct { - __le32 ceqe; - u32 rsv0; - u32 rsv1; - } ce_event; + } queue_event; struct { __le64 out_param; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 9c69742fd399a9..f68585ff8e8a51 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3683,10 +3683,10 @@ static void hns_roce_v1_qp_err_handle(struct hns_roce_dev *hr_dev, int phy_port; int qpn; - qpn = roce_get_field(aeqe->event.qp_event.qp, + qpn = roce_get_field(aeqe->event.queue_event.num, HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S); - phy_port = roce_get_field(aeqe->event.qp_event.qp, + phy_port = roce_get_field(aeqe->event.queue_event.num, HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M, HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S); if (qpn <= 1) @@ -3717,9 +3717,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; u32 cqn; - cqn = roce_get_field(aeqe->event.cq_event.cq, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); + cqn = roce_get_field(aeqe->event.queue_event.num, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: @@ -3848,12 +3848,6 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: hns_roce_v1_db_overflow_handle(hr_dev, aeqe); break; - case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: - dev_warn(dev, "CEQ 0x%lx overflow.\n", - roce_get_field(aeqe->event.ce_event.ceqe, - HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M, - HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S)); - break; default: dev_warn(dev, "Unhandled event %d on EQ %d at idx %u.\n", event_type, eq->eqn, eq->cons_index); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a91c54c343fd1b..833e1f259936f1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5450,8 +5450,6 @@ static void hns_roce_irq_work_handle(struct work_struct *work) struct hns_roce_work *irq_work = container_of(work, struct hns_roce_work, work); struct ib_device *ibdev = &irq_work->hr_dev->ib_dev; - u32 qpn = irq_work->qpn; - u32 cqn = irq_work->cqn; switch (irq_work->event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -5467,15 +5465,15 @@ static void hns_roce_irq_work_handle(struct work_struct *work) break; case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n", - qpn, irq_work->sub_type); + irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: ibdev_err(ibdev, "Invalid request local work queue 0x%x error.\n", - qpn); + irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: ibdev_err(ibdev, "Local access violation work queue 0x%x error, sub_event type is: %d\n", - qpn, irq_work->sub_type); + irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: ibdev_warn(ibdev, "SRQ limit reach.\n"); @@ -5487,10 +5485,10 @@ static void hns_roce_irq_work_handle(struct work_struct *work) ibdev_err(ibdev, "SRQ catas error.\n"); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - ibdev_err(ibdev, "CQ 0x%x access err.\n", cqn); + ibdev_err(ibdev, "CQ 0x%x access err.\n", irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - ibdev_warn(ibdev, "CQ 0x%x overflow\n", cqn); + ibdev_warn(ibdev, "CQ 0x%x overflow\n", irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: ibdev_warn(ibdev, "DB overflow.\n"); @@ -5506,8 +5504,7 @@ static void hns_roce_irq_work_handle(struct work_struct *work) } static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq, - u32 qpn, u32 cqn) + struct hns_roce_eq *eq, u32 queue_num) { struct hns_roce_work *irq_work; @@ -5517,10 +5514,9 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); irq_work->hr_dev = hr_dev; - irq_work->qpn = qpn; - irq_work->cqn = cqn; irq_work->event_type = eq->event_type; irq_work->sub_type = eq->sub_type; + irq_work->queue_num = queue_num; queue_work(hr_dev->irq_workq, &(irq_work->work)); } @@ -5572,10 +5568,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); int aeqe_found = 0; int event_type; + u32 queue_num; int sub_type; - u32 srqn; - u32 qpn; - u32 cqn; while (aeqe) { /* Make sure we read AEQ entry after we have checked the @@ -5589,15 +5583,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M, HNS_ROCE_V2_AEQE_SUB_TYPE_S); - qpn = roce_get_field(aeqe->event.qp_event.qp, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); - cqn = roce_get_field(aeqe->event.cq_event.cq, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); - srqn = roce_get_field(aeqe->event.srq_event.srq, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + queue_num = roce_get_field(aeqe->event.queue_event.num, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -5608,15 +5596,15 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - hns_roce_qp_event(hr_dev, qpn, event_type); + hns_roce_qp_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: - hns_roce_srq_event(hr_dev, srqn, event_type); + hns_roce_srq_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_cq_event(hr_dev, cqn, event_type); + hns_roce_cq_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, @@ -5625,7 +5613,6 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, le64_to_cpu(aeqe->event.cmd.out_param)); break; case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: - case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: case HNS_ROCE_EVENT_TYPE_FLR: break; default: @@ -5642,7 +5629,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, if (eq->cons_index > (2 * eq->entries - 1)) eq->cons_index = 0; - hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn); + hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); aeqe = next_aeqe_sw_v2(eq); } From d21a1240f5169a07a230d72e0e6d3773b2a088b4 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 10 Dec 2020 11:42:59 -0600 Subject: [PATCH 145/147] RDMA/rxe: Use acquire/release for memory ordering Change work and completion queues to use smp_load_acquire() and smp_store_release() to synchronize between driver and users. This commit goes with a matching series of commits in the rxe user space provider. Link: https://lore.kernel.org/r/20201210174258.5234-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_cq.c | 5 -- drivers/infiniband/sw/rxe/rxe_queue.h | 94 +++++++++++++++++---------- drivers/infiniband/sw/rxe/rxe_verbs.c | 11 ---- include/uapi/rdma/rdma_user_rxe.h | 21 ++++++ 4 files changed, 81 insertions(+), 50 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 43394c3f29d411..b315ebf041ac8b 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -123,11 +123,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); - /* make sure all changes to the CQ are written before we update the - * producer pointer - */ - smp_wmb(); - advance_producer(cq->queue); spin_unlock_irqrestore(&cq->cq_lock, flags); diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 7d434a6837a7f4..2902ca7b288c15 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -7,9 +7,11 @@ #ifndef RXE_QUEUE_H #define RXE_QUEUE_H +/* for definition of shared struct rxe_queue_buf */ +#include + /* implements a simple circular buffer that can optionally be * shared between user space and the kernel and can be resized - * the requested element size is rounded up to a power of 2 * and the number of elements in the buffer is also rounded * up to a power of 2. Since the queue is empty when the @@ -17,28 +19,6 @@ * of the queue is one less than the number of element slots */ -/* this data structure is shared between user space and kernel - * space for those cases where the queue is shared. It contains - * the producer and consumer indices. Is also contains a copy - * of the queue size parameters for user space to use but the - * kernel must use the parameters in the rxe_queue struct - * this MUST MATCH the corresponding librxe struct - * for performance reasons arrange to have producer and consumer - * pointers in separate cache lines - * the kernel should always mask the indices to avoid accessing - * memory outside of the data area - */ -struct rxe_queue_buf { - __u32 log2_elem_size; - __u32 index_mask; - __u32 pad_1[30]; - __u32 producer_index; - __u32 pad_2[31]; - __u32 consumer_index; - __u32 pad_3[31]; - __u8 data[]; -}; - struct rxe_queue { struct rxe_dev *rxe; struct rxe_queue_buf *buf; @@ -46,7 +26,7 @@ struct rxe_queue { size_t buf_size; size_t elem_size; unsigned int log2_elem_size; - unsigned int index_mask; + u32 index_mask; }; int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, @@ -76,26 +56,56 @@ static inline int next_index(struct rxe_queue *q, int index) static inline int queue_empty(struct rxe_queue *q) { - return ((q->buf->producer_index - q->buf->consumer_index) - & q->index_mask) == 0; + u32 prod; + u32 cons; + + /* make sure all changes to queue complete before + * testing queue empty + */ + prod = smp_load_acquire(&q->buf->producer_index); + /* same */ + cons = smp_load_acquire(&q->buf->consumer_index); + + return ((prod - cons) & q->index_mask) == 0; } static inline int queue_full(struct rxe_queue *q) { - return ((q->buf->producer_index + 1 - q->buf->consumer_index) - & q->index_mask) == 0; + u32 prod; + u32 cons; + + /* make sure all changes to queue complete before + * testing queue full + */ + prod = smp_load_acquire(&q->buf->producer_index); + /* same */ + cons = smp_load_acquire(&q->buf->consumer_index); + + return ((prod + 1 - cons) & q->index_mask) == 0; } static inline void advance_producer(struct rxe_queue *q) { - q->buf->producer_index = (q->buf->producer_index + 1) - & q->index_mask; + u32 prod; + + prod = (q->buf->producer_index + 1) & q->index_mask; + + /* make sure all changes to queue complete before + * changing producer index + */ + smp_store_release(&q->buf->producer_index, prod); } static inline void advance_consumer(struct rxe_queue *q) { - q->buf->consumer_index = (q->buf->consumer_index + 1) - & q->index_mask; + u32 cons; + + cons = (q->buf->consumer_index + 1) & q->index_mask; + + /* make sure all changes to queue complete before + * changing consumer index + */ + smp_store_release(&q->buf->consumer_index, cons); } static inline void *producer_addr(struct rxe_queue *q) @@ -112,12 +122,28 @@ static inline void *consumer_addr(struct rxe_queue *q) static inline unsigned int producer_index(struct rxe_queue *q) { - return q->buf->producer_index; + u32 index; + + /* make sure all changes to queue + * complete before getting producer index + */ + index = smp_load_acquire(&q->buf->producer_index); + index &= q->index_mask; + + return index; } static inline unsigned int consumer_index(struct rxe_queue *q) { - return q->buf->consumer_index; + u32 index; + + /* make sure all changes to queue + * complete before getting consumer index + */ + index = smp_load_acquire(&q->buf->consumer_index); + index &= q->index_mask; + + return index; } static inline void *addr_from_index(struct rxe_queue *q, unsigned int index) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 2fbea2b2d72a17..a031514e2f41a7 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -244,11 +244,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) recv_wqe->dma.cur_sge = 0; recv_wqe->dma.sge_offset = 0; - /* make sure all changes to the work queue are written before we - * update the producer pointer - */ - smp_wmb(); - advance_producer(rq->queue); return 0; @@ -633,12 +628,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, if (unlikely(err)) goto err1; - /* - * make sure all changes to the work queue are - * written before we update the producer pointer - */ - smp_wmb(); - advance_producer(sq->queue); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index e591d8c1f3cf10..068433e2229dea 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -181,4 +181,25 @@ struct rxe_modify_srq_cmd { __aligned_u64 mmap_info_addr; }; +/* This data structure is stored at the base of work and + * completion queues shared between user space and kernel space. + * It contains the producer and consumer indices. Is also + * contains a copy of the queue size parameters for user space + * to use but the kernel must use the parameters in the + * rxe_queue struct. For performance reasons arrange to have + * producer and consumer indices in separate cache lines + * the kernel should always mask the indices to avoid accessing + * memory outside of the data area + */ +struct rxe_queue_buf { + __u32 log2_elem_size; + __u32 index_mask; + __u32 pad_1[30]; + __u32 producer_index; + __u32 pad_2[31]; + __u32 consumer_index; + __u32 pad_3[31]; + __u8 data[]; +}; + #endif /* RDMA_USER_RXE_H */ From e89938902927a54abebccc9537991aca5237dfaf Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 13 Dec 2020 15:29:36 +0200 Subject: [PATCH 146/147] RDMA/mlx5: Fix MR cache memory leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the MR cache entry invalidation failed, then we detach this entry from the cache, therefore we must to free the memory as well. Allcation backtrace for the leaker: [<00000000d8e423b0>] alloc_cache_mr+0x23/0xc0 [mlx5_ib] [<000000001f21304c>] create_cache_mr+0x3f/0xf0 [mlx5_ib] [<000000009d6b45dc>] mlx5_ib_alloc_implicit_mr+0x41/0×210 [mlx5_ib] [<00000000879d0d68>] mlx5_ib_reg_user_mr+0x9e/0×6e0 [mlx5_ib] [<00000000be74bf89>] create_qp+0x2fc/0xf00 [ib_uverbs] [<000000001a532d22>] ib_uverbs_handler_UVERBS_METHOD_COUNTERS_READ+0x1d9/0×230 [ib_uverbs] [<0000000070f46001>] rdma_alloc_commit_uobject+0xb5/0×120 [ib_uverbs] [<000000006d8a0b38>] uverbs_alloc+0x2b/0xf0 [ib_uverbs] [<00000000075217c9>] ksysioctl+0x234/0×7d0 [<00000000eb5c120b>] __x64_sys_ioctl+0x16/0×20 [<00000000db135b48>] do_syscall_64+0x59/0×2e0 Fixes: 1769c4c57548 ("RDMA/mlx5: Always remove MRs from the cache before destroying them") Link: https://lore.kernel.org/r/20201213132940.345554-2-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6fa869c30e3f75..24f8d59a42eae6 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -643,6 +643,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) if (mlx5_mr_cache_invalidate(mr)) { detach_mr_from_cache(mr); destroy_mkey(dev, mr); + kfree(mr); return; } From e246b7c035d74abfb3507fa10082d0c42cc016c3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 13 Dec 2020 15:29:39 +0200 Subject: [PATCH 147/147] RDMA/cma: Don't overwrite sgid_attr after device is released As part of the cma_dev release, that pointer will be set to NULL. In case it happens in rdma_bind_addr() (part of an error flow), the next call to addr_handler() will have a call to cma_acquire_dev_by_src_ip() which will overwrite sgid_attr without releasing it. WARNING: CPU: 2 PID: 108 at drivers/infiniband/core/cma.c:606 cma_bind_sgid_attr drivers/infiniband/core/cma.c:606 [inline] WARNING: CPU: 2 PID: 108 at drivers/infiniband/core/cma.c:606 cma_acquire_dev_by_src_ip+0x470/0x4b0 drivers/infiniband/core/cma.c:649 CPU: 2 PID: 108 Comm: kworker/u8:1 Not tainted 5.10.0-rc6+ #257 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: ib_addr process_one_req RIP: 0010:cma_bind_sgid_attr drivers/infiniband/core/cma.c:606 [inline] RIP: 0010:cma_acquire_dev_by_src_ip+0x470/0x4b0 drivers/infiniband/core/cma.c:649 Code: 66 d9 4a ff 4d 8b 6e 10 49 8d bd 1c 08 00 00 e8 b6 d6 4a ff 45 0f b6 bd 1c 08 00 00 41 83 e7 01 e9 49 fd ff ff e8 90 c5 29 ff <0f> 0b e9 80 fe ff ff e8 84 c5 29 ff 4c 89 f7 e8 2c d9 4a ff 4d 8b RSP: 0018:ffff8881047c7b40 EFLAGS: 00010293 RAX: ffff888104789c80 RBX: 0000000000000001 RCX: ffffffff820b8ef8 RDX: 0000000000000000 RSI: ffffffff820b9080 RDI: ffff88810cd4c998 RBP: ffff8881047c7c08 R08: ffff888104789c80 R09: ffffed10209f4036 R10: ffff888104fa01ab R11: ffffed10209f4035 R12: ffff88810cd4c800 R13: ffff888105750e28 R14: ffff888108f0a100 R15: ffff88810cd4c998 FS: 0000000000000000(0000) GS:ffff888119c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000104e60005 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: addr_handler+0x266/0x350 drivers/infiniband/core/cma.c:3190 process_one_req+0xa3/0x300 drivers/infiniband/core/addr.c:645 process_one_work+0x54c/0x930 kernel/workqueue.c:2272 worker_thread+0x82/0x830 kernel/workqueue.c:2418 kthread+0x1ca/0x220 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:296 Fixes: ff11c6cd521f ("RDMA/cma: Introduce and use cma_acquire_dev_by_src_ip()") Link: https://lore.kernel.org/r/20201213132940.345554-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c06c87a4dc5e7d..c51b84b2d2f375 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -477,6 +477,10 @@ static void cma_release_dev(struct rdma_id_private *id_priv) list_del(&id_priv->list); cma_dev_put(id_priv->cma_dev); id_priv->cma_dev = NULL; + if (id_priv->id.route.addr.dev_addr.sgid_attr) { + rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); + id_priv->id.route.addr.dev_addr.sgid_attr = NULL; + } mutex_unlock(&lock); } @@ -1861,9 +1865,6 @@ static void _destroy_id(struct rdma_id_private *id_priv, kfree(id_priv->id.route.path_rec); - if (id_priv->id.route.addr.dev_addr.sgid_attr) - rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); - put_net(id_priv->id.route.addr.dev_addr.net); rdma_restrack_del(&id_priv->res); kfree(id_priv);