diff --git a/SOURCES/10000-add-sxe2-rdma-core.patch b/SOURCES/10000-add-sxe2-rdma-core.patch index 2fac59ac309565e9dd8458fd23ce33c7d25d5eb3..bdd1f51e0cbea57985a916b25c61d5aba528da12 100755 --- a/SOURCES/10000-add-sxe2-rdma-core.patch +++ b/SOURCES/10000-add-sxe2-rdma-core.patch @@ -1,9668 +1,9668 @@ -diff -Naur rdma-core-48.0/CMakeLists.txt rdma-core-48.0.bak/CMakeLists.txt ---- rdma-core-48.0/CMakeLists.txt 2026-05-26 10:44:41.360128441 +0800 -+++ rdma-core-48.0.bak/CMakeLists.txt 2026-05-26 10:31:41.449978895 +0800 -@@ -733,7 +733,6 @@ +diff -Naur rdma-core-48.0.bak/CMakeLists.txt rdma-core-48.0/CMakeLists.txt +--- rdma-core-48.0.bak/CMakeLists.txt 2026-05-27 16:55:02.755758364 +0800 ++++ rdma-core-48.0/CMakeLists.txt 2026-05-27 17:10:06.650938527 +0800 +@@ -733,6 +733,7 @@ #add_subdirectory(providers/ocrdma) add_subdirectory(providers/qedr) add_subdirectory(providers/vmw_pvrdma) --add_subdirectory(providers/sxe2) ++add_subdirectory(providers/sxe2) endif() add_subdirectory(providers/hfi1verbs) -diff -Naur rdma-core-48.0/kernel-headers/CMakeLists.txt rdma-core-48.0.bak/kernel-headers/CMakeLists.txt ---- rdma-core-48.0/kernel-headers/CMakeLists.txt 2026-05-26 10:39:16.261981850 +0800 -+++ rdma-core-48.0.bak/kernel-headers/CMakeLists.txt 2026-05-26 10:31:41.421978710 +0800 -@@ -26,7 +26,6 @@ +diff -Naur rdma-core-48.0.bak/kernel-headers/CMakeLists.txt rdma-core-48.0/kernel-headers/CMakeLists.txt +--- rdma-core-48.0.bak/kernel-headers/CMakeLists.txt 2026-05-27 16:55:02.750758330 +0800 ++++ rdma-core-48.0/kernel-headers/CMakeLists.txt 2026-05-27 17:28:11.530756597 +0800 +@@ -26,6 +26,7 @@ rdma/rvt-abi.h rdma/siw-abi.h rdma/vmw_pvrdma-abi.h -- rdma/sxe2-abi.h ++ rdma/sxe2-abi.h ) publish_internal_headers(rdma/hfi -@@ -81,7 +80,6 @@ +@@ -80,6 +81,7 @@ rdma/rdma_user_rxe.h rdma/siw-abi.h rdma/vmw_pvrdma-abi.h -- rdma/sxe2-abi.h ++ rdma/sxe2-abi.h ) publish_headers(infiniband -diff -Naur rdma-core-48.0/kernel-headers/rdma/ib_user_ioctl_verbs.h rdma-core-48.0.bak/kernel-headers/rdma/ib_user_ioctl_verbs.h ---- rdma-core-48.0/kernel-headers/rdma/ib_user_ioctl_verbs.h 2026-05-26 10:35:17.353404424 +0800 -+++ rdma-core-48.0.bak/kernel-headers/rdma/ib_user_ioctl_verbs.h 2026-05-26 10:31:41.422978717 +0800 -@@ -254,7 +254,6 @@ +diff -Naur rdma-core-48.0.bak/kernel-headers/rdma/ib_user_ioctl_verbs.h rdma-core-48.0/kernel-headers/rdma/ib_user_ioctl_verbs.h +--- rdma-core-48.0.bak/kernel-headers/rdma/ib_user_ioctl_verbs.h 2026-05-27 16:55:02.748758317 +0800 ++++ rdma-core-48.0/kernel-headers/rdma/ib_user_ioctl_verbs.h 2026-05-27 16:58:50.304296342 +0800 +@@ -254,6 +254,7 @@ RDMA_DRIVER_SIW, RDMA_DRIVER_ERDMA, RDMA_DRIVER_MANA, -- RDMA_DRIVER_SXE2, ++ RDMA_DRIVER_SXE2, }; enum ib_uverbs_gid_type { -diff -Naur rdma-core-48.0/kernel-headers/rdma/sxe2-abi.h rdma-core-48.0.bak/kernel-headers/rdma/sxe2-abi.h ---- rdma-core-48.0/kernel-headers/rdma/sxe2-abi.h 2026-05-26 10:35:55.120653790 +0800 -+++ rdma-core-48.0.bak/kernel-headers/rdma/sxe2-abi.h 1970-01-01 08:00:00.000000000 +0800 -@@ -1,117 +0,0 @@ --/** -- * Copyright (C) -- * -- * @file sxe2_abi.h -- * @author zhaozy -- * @date 2024年6月6日 -- * @brief verbs扩展命令结构定义 -- * @note 创建文件 -- */ --#ifndef __SXE2_ABI_STRUCT_H__ --#define __SXE2_ABI_STRUCT_H__ -- --#include -- --#define SXE2_RDMA_ABI_VER (5) -- --struct sxe2_create_ah_resp { -- __u32 ah_id; -- __u8 rsvd[4]; --}; -- --struct sxe2_modify_qp_req { -- __u8 sq_flush; -- __u8 rq_flush; -- __u8 llwqe_enable; -- __u8 new_page_alloc; -- __u32 llwqe_page_index; --}; -- --struct sxe2_modify_qp_resp { -- __u8 rd_fence_rate; -- __u8 rsvd[3]; -- __u32 db_mmap_size; -- __u32 db_page_id; -- __u32 rsvd1; -- __u64 db_mmap_offset; --}; -- --struct sxe2_create_qp_req { -- __u32 sq_depth; -- __u32 rq_depth; -- __u8 sq_shift; -- __u8 rq_shift; -- __u8 rsvd[6]; -- __u64 user_wqe_bufs; -- __u64 doorbell_note; -- __u64 user_compl_ctx; --}; -- --struct sxe2_create_cq_req { -- __aligned_u64 user_cq_buf; -- __aligned_u64 user_cq_db_note; --}; -- --struct sxe2_create_cq_resp { -- __u32 cq_id; -- __u32 ncqe; --}; -- --struct sxe2_alloc_pd_resp { -- __u32 pd_id; -- __u8 rsvd[4]; --}; -- --struct sxe2_create_qp_resp { -- __u32 qpn; -- __u32 qp_caps; --}; -- --struct sxe2_alloc_ucontext_req { -- __u32 rsvd32; -- __u8 userspace_ver; -- __u8 rsvd8[3]; -- __aligned_u64 comp_mask; --}; -- --struct sxe2_alloc_ucontext_resp { -- __u32 max_pds; -- __u32 max_qps; -- __u32 wq_size; /* size of the WQs (SQ+RQ) in the mmaped area */ -- __u8 kernel_ver; -- __u8 rsvd[3]; -- __aligned_u64 feature_flags; -- __aligned_u64 db_mmap_key; -- __u32 max_hw_wq_frags; -- __u32 max_hw_read_sges; -- __u32 max_hw_inline; -- __u32 max_hw_rq_quanta; -- __u32 max_hw_wq_quanta; -- __u32 min_hw_cq_size; -- __u32 max_hw_cq_size; -- __u16 max_hw_sq_chunk; -- __u8 hw_rev; -- __u8 is_pf; -- __aligned_u64 comp_mask; -- __u16 min_hw_wq_size; -- __u32 max_db; -- __u8 rsvd3[2]; -- __u8 bdf[16]; -- __u32 max_hw_srq_quanta; -- __u32 max_hw_srq_wr; --}; -- --struct sxe2_create_srq_req { -- __aligned_u64 user_srq_buf; /* SRQ Buf虚拟地址 */ -- __aligned_u64 user_srq_db_note; /* SRQ DB Note虚拟地址 */ -- __aligned_u64 srq_cmpl_ctx; /* SRQ Completion Context指针地址 */ -- __u32 srq_buf_size; /* SRQ Buf大小 */ -- __u32 srq_size; /* SRQ深度(包含SRQE数量) */ -- __u32 max_wr_cal; /* 用户态计算后的max_wr,已减去预留wr,Query SRQ使用 */ --}; -- --struct sxe2_create_srq_resp { -- __u32 srq_id; /* SRQN */ --}; -- --#endif -diff -Naur rdma-core-48.0/libibverbs/verbs.h rdma-core-48.0.bak/libibverbs/verbs.h ---- rdma-core-48.0/libibverbs/verbs.h 2026-05-26 10:41:09.999732819 +0800 -+++ rdma-core-48.0.bak/libibverbs/verbs.h 2026-05-26 10:31:41.521979371 +0800 -@@ -2273,7 +2273,6 @@ +diff -Naur rdma-core-48.0.bak/kernel-headers/rdma/sxe2-abi.h rdma-core-48.0/kernel-headers/rdma/sxe2-abi.h +--- rdma-core-48.0.bak/kernel-headers/rdma/sxe2-abi.h 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/kernel-headers/rdma/sxe2-abi.h 2026-05-27 16:59:51.797711971 +0800 +@@ -0,0 +1,117 @@ ++/** ++ * Copyright (C) ++ * ++ * @file sxe2_abi.h ++ * @author zhaozy ++ * @date 2024年6月6日 ++ * @brief verbs扩展命令结构定义 ++ * @note 创建文件 ++ */ ++#ifndef __SXE2_ABI_STRUCT_H__ ++#define __SXE2_ABI_STRUCT_H__ ++ ++#include ++ ++#define SXE2_RDMA_ABI_VER (5) ++ ++struct sxe2_create_ah_resp { ++ __u32 ah_id; ++ __u8 rsvd[4]; ++}; ++ ++struct sxe2_modify_qp_req { ++ __u8 sq_flush; ++ __u8 rq_flush; ++ __u8 llwqe_enable; ++ __u8 new_page_alloc; ++ __u32 llwqe_page_index; ++}; ++ ++struct sxe2_modify_qp_resp { ++ __u8 rd_fence_rate; ++ __u8 rsvd[3]; ++ __u32 db_mmap_size; ++ __u32 db_page_id; ++ __u32 rsvd1; ++ __u64 db_mmap_offset; ++}; ++ ++struct sxe2_create_qp_req { ++ __u32 sq_depth; ++ __u32 rq_depth; ++ __u8 sq_shift; ++ __u8 rq_shift; ++ __u8 rsvd[6]; ++ __u64 user_wqe_bufs; ++ __u64 doorbell_note; ++ __u64 user_compl_ctx; ++}; ++ ++struct sxe2_create_cq_req { ++ __aligned_u64 user_cq_buf; ++ __aligned_u64 user_cq_db_note; ++}; ++ ++struct sxe2_create_cq_resp { ++ __u32 cq_id; ++ __u32 ncqe; ++}; ++ ++struct sxe2_alloc_pd_resp { ++ __u32 pd_id; ++ __u8 rsvd[4]; ++}; ++ ++struct sxe2_create_qp_resp { ++ __u32 qpn; ++ __u32 qp_caps; ++}; ++ ++struct sxe2_alloc_ucontext_req { ++ __u32 rsvd32; ++ __u8 userspace_ver; ++ __u8 rsvd8[3]; ++ __aligned_u64 comp_mask; ++}; ++ ++struct sxe2_alloc_ucontext_resp { ++ __u32 max_pds; ++ __u32 max_qps; ++ __u32 wq_size; /* size of the WQs (SQ+RQ) in the mmaped area */ ++ __u8 kernel_ver; ++ __u8 rsvd[3]; ++ __aligned_u64 feature_flags; ++ __aligned_u64 db_mmap_key; ++ __u32 max_hw_wq_frags; ++ __u32 max_hw_read_sges; ++ __u32 max_hw_inline; ++ __u32 max_hw_rq_quanta; ++ __u32 max_hw_wq_quanta; ++ __u32 min_hw_cq_size; ++ __u32 max_hw_cq_size; ++ __u16 max_hw_sq_chunk; ++ __u8 hw_rev; ++ __u8 is_pf; ++ __aligned_u64 comp_mask; ++ __u16 min_hw_wq_size; ++ __u32 max_db; ++ __u8 rsvd3[2]; ++ __u8 bdf[16]; ++ __u32 max_hw_srq_quanta; ++ __u32 max_hw_srq_wr; ++}; ++ ++struct sxe2_create_srq_req { ++ __aligned_u64 user_srq_buf; /* SRQ Buf虚拟地址 */ ++ __aligned_u64 user_srq_db_note; /* SRQ DB Note虚拟地址 */ ++ __aligned_u64 srq_cmpl_ctx; /* SRQ Completion Context指针地址 */ ++ __u32 srq_buf_size; /* SRQ Buf大小 */ ++ __u32 srq_size; /* SRQ深度(包含SRQE数量) */ ++ __u32 max_wr_cal; /* 用户态计算后的max_wr,已减去预留wr,Query SRQ使用 */ ++}; ++ ++struct sxe2_create_srq_resp { ++ __u32 srq_id; /* SRQN */ ++}; ++ ++#endif +diff -Naur rdma-core-48.0.bak/libibverbs/verbs.h rdma-core-48.0/libibverbs/verbs.h +--- rdma-core-48.0.bak/libibverbs/verbs.h 2026-05-27 16:55:02.732758209 +0800 ++++ rdma-core-48.0/libibverbs/verbs.h 2026-05-27 17:06:01.496244840 +0800 +@@ -2273,6 +2273,7 @@ extern const struct verbs_device_ops verbs_provider_vmw_pvrdma; extern const struct verbs_device_ops verbs_provider_all; extern const struct verbs_device_ops verbs_provider_none; --extern const struct verbs_device_ops verbs_provider_sxe2; ++extern const struct verbs_device_ops verbs_provider_sxe2; void ibv_static_providers(void *unused, ...); static inline struct ibv_device **__ibv_get_device_list(int *num_devices) -diff -Naur rdma-core-48.0/providers/sxe2/ah.c rdma-core-48.0.bak/providers/sxe2/ah.c ---- rdma-core-48.0/providers/sxe2/ah.c 2026-05-26 10:42:01.855075215 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/ah.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,62 +0,0 @@ -- --#include --#include -- --#include "sxe2_common.h" --#include "sxe2_abi.h" --#include "log.h" -- --struct ibv_ah *sxe2_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) --{ -- struct sxe2_uah *ah; -- union ibv_gid sgid; -- struct sxe2_ucreate_ah_resp resp; -- int err; -- struct sxe2_rdma_ucontext *sctx = to_sctx(ibpd->context); -- -- memset(&resp, 0, sizeof(resp)); -- err = ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, -- &sgid); -- if (err) { -- SXE2_VERBS_LOG_ERROR_BDF("ibv query gid failed, ret %d.\n", err); -- errno = err; -- return NULL; -- } -- -- ah = calloc(1, sizeof(*ah)); -- if (!ah) { -- SXE2_VERBS_LOG_ERROR_BDF("calloc ah buf failed.\n"); -- return NULL; -- } -- -- err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp, -- sizeof(resp)); -- if (err) { -- free(ah); -- SXE2_VERBS_LOG_ERROR_BDF("ibv create ah failed, ret %d.\n", err); -- errno = err; -- return NULL; -- } -- -- ah->ah_id = resp.ah_id; -- -- return &ah->ibv_ah; --} -- --int sxe2_udestroy_ah(struct ibv_ah *ibah) --{ -- struct sxe2_uah *ah; -- int ret; -- -- ah = container_of(ibah, struct sxe2_uah, ibv_ah); -- -- ret = ibv_cmd_destroy_ah(ibah); -- if (ret) { -- SXE2_VERBS_LOG_ERROR("ibv destroy ah failed, ret %d.\n", ret); -- return ret; -- } -- -- free(ah); -- -- return 0; --} -diff -Naur rdma-core-48.0/providers/sxe2/buf.c rdma-core-48.0.bak/providers/sxe2/buf.c ---- rdma-core-48.0/providers/sxe2/buf.c 2026-05-26 10:42:01.854075208 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/buf.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,313 +0,0 @@ -- --#include --#include --#include --#include --#include --#include --#include --#include "util/bitmap.h" -- --#include "buf.h" --#include "sxe2_common.h" --#include "sxe2_abi.h" --#include "log.h" -- -- void sxe2_huge_mem_free(struct sxe2_rdma_ucontext *ctx, sxe2_hugetlb_mem_s *hmem) --{ -- struct sxe2_rdma_ucontext *sctx = ctx; -- -- if (hmem->bitmap) { -- free(hmem->bitmap); -- } -- -- if (shmdt(hmem->shmaddr) == -1) { -- SXE2_VERBS_LOG_ERROR_BDF("shmdt fail, err(%s)", strerror(errno)); -- } -- shmctl(hmem->shmid, IPC_RMID, NULL); -- free(hmem); -- -- return; --} -- -- sxe2_hugetlb_mem_s *sxe2_huge_mem_alloc(struct sxe2_rdma_ucontext *ctx, size_t size) --{ -- sxe2_hugetlb_mem_s *hmem; -- size_t shm_len; -- struct sxe2_rdma_ucontext *sctx = ctx; -- -- hmem = malloc(sizeof(*hmem)); -- if (hmem == NULL) { -- SXE2_VERBS_LOG_ERROR_BDF("hmem malloc error"); -- goto end; -- } -- memset(hmem, 0, sizeof(*hmem)); -- -- shm_len = align(size, SXE2_SHM_LENGTH); -- hmem->shmid = shmget(IPC_PRIVATE, shm_len, SHM_HUGETLB | SHM_R | SHM_W); -- if (hmem->shmid == -1) { -- SXE2_VERBS_LOG_ERROR_BDF("shmget fail, err(%s)", strerror(errno)); -- goto out_free; -- } -- -- hmem->shmaddr = shmat(hmem->shmid, SXE2_SHM_ADDR, SXE2_SHMAT_FLAGS); -- if (hmem->shmaddr == (void *)-1) { -- SXE2_VERBS_LOG_ERROR_BDF("shmat fail, err(%s)", strerror(errno)); -- goto out_rmid; -- } -- -- hmem->bitmap = bitmap_alloc0(shm_len / SXE2_SHM_SINGLE_CHUNK_SIZE); -- if (!hmem->bitmap) { -- SXE2_VERBS_LOG_ERROR_BDF("bitmap_alloc0 fail, err(%s)", strerror(errno)); -- goto out_shmdt; -- } -- -- hmem->bmp_size = shm_len / SXE2_SHM_SINGLE_CHUNK_SIZE; -- -- shmctl(hmem->shmid, IPC_RMID, NULL); -- -- goto end; -- --out_shmdt: -- if (shmdt(hmem->shmaddr) == -1) { -- SXE2_VERBS_LOG_ERROR_BDF("shmdt fail, err(%s)", strerror(errno)); -- } --out_rmid: -- shmctl(hmem->shmid, IPC_RMID, NULL); --out_free: -- free(hmem); -- hmem = NULL; --end: -- return hmem; --} -- -- void sxe2_huge_buf_free(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, bool dofork_range) --{ -- size_t nchunk; -- -- nchunk = buf->length / SXE2_SHM_SINGLE_CHUNK_SIZE; -- -- if (buf->hmem == NULL) { -- goto end; -- } -- -- if (dofork_range == true) { -- ibv_dofork_range(buf->buf, buf->length); -- } -- -- sxe2_spin_lock(&ctx->hugetlb_lock); -- bitmap_zero_region(buf->hmem->bitmap, buf->base, buf->base + nchunk); -- if (bitmap_empty(buf->hmem->bitmap, buf->hmem->bmp_size)) { -- list_del(&buf->hmem->entry); -- sxe2_spin_unlock(&ctx->hugetlb_lock); -- sxe2_huge_mem_free(ctx, buf->hmem); -- buf->hmem = NULL; -- goto free; -- } -- -- sxe2_spin_unlock(&ctx->hugetlb_lock); --free: -- buf->buf = NULL; --end: -- return; --} -- -- int sxe2_huge_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size) --{ -- int found = 0; -- size_t nchunk; -- sxe2_hugetlb_mem_s *hmem = NULL; -- int ret; -- struct sxe2_rdma_ucontext *sctx = ctx; -- -- buf->length = align(size, SXE2_SHM_SINGLE_CHUNK_SIZE); -- nchunk = buf->length / SXE2_SHM_SINGLE_CHUNK_SIZE; -- -- if (!nchunk) { -- ret = 0; -- SXE2_VERBS_LOG_WARN_BDF("not need huge"); -- goto end; -- } -- -- sxe2_spin_lock(&ctx->hugetlb_lock); -- list_for_each(&ctx->hugetlb_list, hmem, entry) { -- if (!bitmap_full(hmem->bitmap, hmem->bmp_size)) { -- buf->base = bitmap_find_free_region(hmem->bitmap, hmem->bmp_size, nchunk); -- if (buf->base != hmem->bmp_size) { -- bitmap_fill_region(hmem->bitmap, buf->base, buf->base + nchunk); -- buf->hmem = hmem; -- found = true; -- break; -- } -- } -- } -- sxe2_spin_unlock(&ctx->hugetlb_lock); -- -- if (!found) { -- hmem = sxe2_huge_mem_alloc(ctx, buf->length); -- if (NULL == hmem) { -- SXE2_VERBS_LOG_ERROR_BDF("sxe2_huge_mem_alloc error"); -- ret = ENOMEM; -- goto end; -- } -- -- buf->base = 0; -- assert(nchunk <= hmem->bmp_size); -- bitmap_fill_region(hmem->bitmap, 0, nchunk); -- -- buf->hmem = hmem; -- -- sxe2_spin_lock(&ctx->hugetlb_lock); -- if (nchunk != hmem->bmp_size) { -- list_add(&ctx->hugetlb_list, &hmem->entry); -- } else { -- list_add_tail(&ctx->hugetlb_list, &hmem->entry); -- } -- sxe2_spin_unlock(&ctx->hugetlb_lock); -- } -- -- buf->buf = hmem->shmaddr + buf->base * SXE2_SHM_SINGLE_CHUNK_SIZE; -- -- ret = ibv_dontfork_range(buf->buf, buf->length); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("dontfork_range error,buf:%p,length:%zu,ret:%d", buf->buf, buf->length, ret); -- ret = EPERM; -- goto out_fork; -- } -- buf->type = SXE2_ALLOC_TYPE_HUGE; -- -- goto end; -- --out_fork: -- sxe2_huge_buf_free(ctx, buf, false); --end: -- return ret; --} -- --int sxe2_prefered_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size, size_t page_size, -- sxe2_alloc_type_e type) --{ -- int ret; -- struct sxe2_rdma_ucontext *sctx = ctx; -- -- if (type == SXE2_ALLOC_TYPE_HUGE || -- type == SXE2_ALLOC_TYPE_PREFER_HUGE ) { -- ret = sxe2_huge_buf_alloc(ctx, buf, size); -- if (!ret) { -- goto end; -- } -- -- if (type == SXE2_ALLOC_TYPE_HUGE) { -- SXE2_VERBS_LOG_ERROR_BDF("Huge mode allocation fail, page_size:%zu, size:%zu", -- page_size, size); -- goto end; -- } -- -- SXE2_VERBS_LOG_ERROR_BDF("Huge mode allocation fail, fallback to default mode, type:%d, \ -- page_size:%zu, size:%zu", type, page_size, size); -- } -- -- ret = sxe2_buf_alloc(ctx, buf, size, page_size); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("cmr_buf_alloc error, page_size:%zu, size:%zu,ret:%d", -- page_size, size, ret); -- } -- --end: -- return ret; -- --} -- --void sxe2_actual_buf_free(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf) --{ -- struct sxe2_rdma_ucontext *sctx = ctx; -- -- if ((buf == NULL) || -- (buf->buf == NULL) || -- (ctx == NULL)) { -- SXE2_VERBS_LOG_ERROR_BDF( "context or buf or buf->buf is NULL"); -- errno = EINVAL; -- goto end; -- } -- -- switch (buf->type) { -- case SXE2_ALLOC_TYPE_ANON: -- sxe2_buf_free(buf); -- break; -- -- case SXE2_ALLOC_TYPE_HUGE: -- sxe2_huge_buf_free(ctx, buf, true); -- break; -- -- default: -- SXE2_VERBS_LOG_ERROR_BDF("Bad allocation type:%d", buf->type); -- } --end: -- return; --} -- --void sxe2_alloc_type_get(const char *component, sxe2_alloc_type_e *alloc_type, sxe2_alloc_type_e default_type) --{ -- char *env_value; -- char name[SXE2_ALLOC_ENV_NAME]; -- -- snprintf(name, sizeof(name), "%s_ALLOC_TYPE", component); -- -- *alloc_type = default_type; -- -- env_value = getenv(name); -- if (env_value) { -- if (!strcasecmp(env_value, "ANON")) { -- *alloc_type = SXE2_ALLOC_TYPE_ANON; -- } else if (!strcasecmp(env_value, "HUGE")){ -- *alloc_type = SXE2_ALLOC_TYPE_HUGE; -- } else if (!strcasecmp(env_value, "PREFER_HUGE")){ -- *alloc_type = SXE2_ALLOC_TYPE_PREFER_HUGE; -- } -- } --} -- --int sxe2_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size, size_t page_size) --{ -- int ret; -- size_t al_size; -- struct sxe2_rdma_ucontext *sctx = ctx; -- -- if ((buf == NULL) || (ctx == NULL)) { -- SXE2_VERBS_LOG_ERROR_BDF("context of buf is NULL"); -- ret = EPERM; -- goto end; -- } -- -- al_size = align(size, page_size); -- ret = posix_memalign(&buf->buf, page_size, al_size); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("posix_memalign error,page_size:%zu,al_size:%zu, ret:%d", page_size, al_size, ret); -- goto end; -- } -- -- ret = ibv_dontfork_range(buf->buf, al_size); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("dontfork_range error,buf:%p,al_size:%zu, ret:%d", buf->buf, al_size, ret); -- free(buf->buf); -- buf->buf = NULL; -- ret = EPERM; -- goto end; -- } -- -- buf->length = al_size; -- buf->type = SXE2_ALLOC_TYPE_ANON; -- --end: -- return ret; --} -- --void sxe2_buf_free(sxe2_buf_s *buf) --{ -- ibv_dofork_range(buf->buf, buf->length); -- free(buf->buf); -- buf->buf = NULL; -- -- return; --} -- -diff -Naur rdma-core-48.0/providers/sxe2/buf.h rdma-core-48.0.bak/providers/sxe2/buf.h ---- rdma-core-48.0/providers/sxe2/buf.h 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/buf.h 1970-01-01 08:00:00.000000000 +0800 -@@ -1,42 +0,0 @@ -- --#ifndef __BUF_H__ --#define __BUF_H__ -- --#include "sxe2_common.h" --#include --#include --#include -- --#define SXE2_SHM_ADDR NULL --#define SXE2_SHMAT_FLAGS 0 --#define SXE2_ALLOC_ENV_NAME (128) -- --#ifndef HPAGE_SIZE --#define HPAGE_SIZE (2UL * 1024 * 1024) --#endif -- --#define SXE2_SHM_LENGTH HPAGE_SIZE --#define SXE2_SHM_SINGLE_CHUNK_SIZE 32768 -- --int sxe2_prefered_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size, size_t page_size, -- sxe2_alloc_type_e type); -- --void sxe2_actual_buf_free(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf); -- --void sxe2_alloc_type_get(const char *component, sxe2_alloc_type_e *alloc_type, -- sxe2_alloc_type_e default_type); -- --int sxe2_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size, size_t page_size); -- --void sxe2_buf_free(sxe2_buf_s *buf); -- -- sxe2_hugetlb_mem_s *sxe2_huge_mem_alloc(struct sxe2_rdma_ucontext *ctx, size_t size); -- -- void sxe2_huge_buf_free(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, bool dofork_range); -- -- void sxe2_huge_mem_free(struct sxe2_rdma_ucontext *ctx, sxe2_hugetlb_mem_s *hmem); -- -- int sxe2_huge_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size); -- --#endif -- -diff -Naur rdma-core-48.0/providers/sxe2/CMakeLists.txt rdma-core-48.0.bak/providers/sxe2/CMakeLists.txt ---- rdma-core-48.0/providers/sxe2/CMakeLists.txt 2026-05-26 10:42:01.854075208 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 -@@ -1,51 +0,0 @@ --# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) --if(DEFINED ENV{MD_MAKEMODE}) -- set(makemode $ENV{MD_MAKEMODE}) --else() -- set(makemode release) --endif() --if(makemode STREQUAL "release") -- add_definitions(-DSXE2_CFG_RELEASE) --else() -- add_definitions(-DSXE2_CFG_DEBUG) --endif() --if(DEFINED ENV{ASAN}) -- set(asan $ENV{ASAN}) --else() -- set(asan no) --endif() --if(asan STREQUAL "yes") -- add_definitions(-DSXE2_SUPPORT_ASAN) --endif() --if(DEFINED ENV{INJECT}) -- set(inject $ENV{INJECT}) --else() -- set(inject no) --endif() --if(inject STREQUAL "yes") -- add_definitions(-DSXE2_SUPPORT_INJECT) --endif() --if(DEFINED ENV{IO_STAT}) -- set(iostat $ENV{IO_STAT}) --else() -- set(iostat no) --endif() --if(iostat STREQUAL "yes") -- add_definitions(-DSXE2_SUPPORT_IO_STAT) --endif() --add_compile_options(-Werror) --rdma_provider(sxe2 -- sxe2_common.c -- mr.c -- io.c -- ah.c -- cq.c -- pd.c -- qp.c -- db.c -- device_port.c -- log.c -- srq.c -- mc.c -- buf.c --) -diff -Naur rdma-core-48.0/providers/sxe2/cq.c rdma-core-48.0.bak/providers/sxe2/cq.c ---- rdma-core-48.0/providers/sxe2/cq.c 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/cq.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,405 +0,0 @@ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include "sxe2_common.h" --#include "sxe2_abi.h" --#include "sxe2-abi.h" --#include "log.h" --#include "io.h" --#include "buf.h" -- --#define SXE2_HCA_CORE_CLOCK_800_MHZ (800) -- --#define SXE2_VERBS_MIN_CQ_SIZE 4 --#define SXE2_VERBS_MAX_CQ_SIZE 2097152 -- --#define SXE2_UCQ_DB_NOTE_CMD_SN_SHIFT 29 --#define SXE2_UCQ_DB_NOTE_CMD_SN 3 -- --enum { -- UCREATE_CQ_SUPPORTED_FLAGS = -- IBV_CREATE_CQ_ATTR_SINGLE_THREADED | -- IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN --}; -- --enum { -- UCREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS | -- IBV_CQ_INIT_ATTR_MASK_PD --}; -- --static inline __u32 get_cqe_count(__u32 ncqe) --{ -- ncqe++; -- -- ncqe = (__u32)roundup_pow_of_two(ncqe); -- -- if (ncqe < SXE2_U_MINCQ_SIZE) { -- ncqe = SXE2_U_MINCQ_SIZE; -- } -- return ncqe; --} -- --static inline size_t get_cq_total_bytes(unsigned int ncqe) --{ -- const long page_size = sysconf(_SC_PAGE_SIZE); -- return roundup(ncqe * sizeof(struct sxe2_cqe), page_size); --} -- --static void sxe2_uk_cq_init(struct sxe2_cq_uk *cq, struct sxe2_cq_uk_init_info *info) --{ -- cq->cq_base = info->cq_base; -- cq->cqe_alloc_db = info->cqe_alloc_db; -- cq->cq_id = info->cq_id; -- cq->ncqe = info->ncqe; -- cq->doorbell_note = info->doorbell_note; -- SXE2_RING_INIT(cq->cq_ring, cq->ncqe); -- cq->polarity = 1; -- cq->arm_sn = 0; -- return; --} -- --static void sxe2_ibvcq_ex_fill_priv_funcs(struct sxe2_ucq *cq, struct ibv_cq_init_attr_ex *cq_attr) --{ -- struct ibv_cq_ex *ibvcq_ex = &cq->verbs_cq.cq_ex; -- -- ibvcq_ex->start_poll = sxe2_start_poll; -- ibvcq_ex->end_poll = sxe2_end_poll; -- ibvcq_ex->next_poll = sxe2_next_poll; -- -- if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) { -- ibvcq_ex->read_completion_ts = sxe2_wc_read_completion_ts; -- cq->report_rtt = true; -- } -- if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) { -- ibvcq_ex->read_completion_wallclock_ns = sxe2_wc_read_completion_wallclock_ns; -- cq->report_rtt = true; -- } -- -- ibvcq_ex->read_opcode = sxe2_wc_read_opcode; -- ibvcq_ex->read_vendor_err = sxe2_wc_read_vendor_err; -- ibvcq_ex->read_wc_flags = sxe2_wc_read_wc_flags; -- -- if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) { -- ibvcq_ex->read_byte_len = sxe2_wc_read_byte_len; -- } -- if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) { -- ibvcq_ex->read_imm_data = sxe2_wc_read_imm_data; -- } -- if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) { -- ibvcq_ex->read_qp_num = sxe2_wc_read_qp_num; -- } -- if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) { -- ibvcq_ex->read_src_qp = sxe2_wc_read_src_qp; -- } -- if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) { -- ibvcq_ex->read_slid = sxe2_wc_read_slid; -- } -- if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) { -- ibvcq_ex->read_sl = sxe2_wc_read_sl; -- } -- if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) { -- ibvcq_ex->read_dlid_path_bits = sxe2_wc_read_dlid_path_bits; -- } --} -- --static bool ucreate_cq_attr_check(struct sxe2_rdma_ucontext *sctx, -- const struct ibv_cq_init_attr_ex *cq_attr) --{ -- bool isFail = true; -- -- if (cq_attr->comp_mask & (~UCREATE_CQ_SUPPORTED_COMP_MASK)) { -- SXE2_VERBS_LOG_ERROR_BDF("Unsupported comp_mask(%u) for create cq\n", cq_attr->comp_mask); -- goto end; -- } -- -- if ((cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS) && -- (cq_attr->flags & (~UCREATE_CQ_SUPPORTED_FLAGS))) { -- SXE2_VERBS_LOG_ERROR_BDF("Unsupported creation flags(%u) requested for create cq\n", cq_attr->flags); -- goto end; -- } -- -- isFail = false; --end: -- return isFail; --} --static int sxe2_alloc_cq_buf(struct sxe2_rdma_ucontext *sctx, sxe2_buf_s *buf, size_t size) --{ -- int ret; -- sxe2_alloc_type_e type; -- sxe2_alloc_type_e default_type = SXE2_ALLOC_TYPE_ANON; -- long page_size = 0; -- -- page_size = sysconf(_SC_PAGE_SIZE); -- if (page_size < 0) { -- SXE2_VERBS_LOG_ERROR_BDF("get system page size failed."); -- ret = EPERM; -- goto end; -- } -- -- sxe2_alloc_type_get(SXE2_CQ_PREFIX, &type, default_type); -- -- ret = sxe2_prefered_buf_alloc( -- sctx, buf, size, page_size, type); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("cq buf alloc err ret (%d)", ret); -- goto end; -- } -- -- memset(buf->buf, 0, buf->length); -- --end: -- return ret; --} --static void sxe2_free_cq_buf(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf) --{ -- sxe2_actual_buf_free(ctx, buf); --} -- --static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, -- struct ibv_cq_init_attr_ex *cq_attr, -- bool ext_cq) --{ -- int ret; -- unsigned int ncqe; -- struct sxe2_ucreate_cq_ex cmd_ex; -- struct sxe2_ucreate_cq_ex_resp resp_ex; -- struct sxe2_rdma_ucontext *sctx; -- struct sxe2_ucq *ucq; -- struct sxe2_cq_uk_init_info info; -- size_t total_size = 0; -- -- sctx = container_of(context, struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- SXE2_VERBS_LOG_INFO_BDF("ucreate_cq start \n"); -- -- if ((cq_attr->cqe <= 0) || (cq_attr->cqe > sctx->uk_attrs.max_hw_cq_size - 1)) { -- errno = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF("inv cqe:%d or vector:%d is invalid\n", cq_attr->cqe, cq_attr->comp_vector); -- goto null; -- } -- -- if (ucreate_cq_attr_check(sctx, cq_attr)) { -- errno = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF("invalid param\n"); -- goto null; -- } -- -- ncqe = cq_attr->cqe; -- ucq = calloc(1, sizeof(*ucq)); -- if (!ucq) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("cq user mem alloc failed, mem size:%ld\n", sizeof(*ucq)); -- goto null; -- } -- -- if (pthread_spin_init(&ucq->lock, PTHREAD_PROCESS_SHARED)) { -- errno = EFAULT; -- SXE2_VERBS_LOG_ERROR_BDF("cq lock init failed\n"); -- goto err_lock_init; -- } -- -- memset(&cmd_ex, 0, sizeof(cmd_ex)); -- memset(&resp_ex, 0, sizeof(resp_ex)); -- memset(&info, 0, sizeof(info)); -- -- ucq->comp_vector = cq_attr->comp_vector; -- info.ncqe = get_cqe_count(cq_attr->cqe); -- if ((info.ncqe < SXE2_VERBS_MIN_CQ_SIZE) || (info.ncqe > SXE2_VERBS_MAX_CQ_SIZE)) { -- errno = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF("cq size(%d)", info.ncqe); -- goto err_cq_base; -- } -- -- total_size = get_cq_total_bytes(info.ncqe); -- -- ucq->buf_size = total_size; -- if (sxe2_alloc_cq_buf(sctx, &ucq->buf, total_size)) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("ncqe %#x total_size %zu", ncqe, total_size); -- goto err_cq_base; -- } -- -- info.cq_base = ucq->buf.buf; -- if (!info.cq_base) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("cq buf alloc failed, ncqe:%d size:%ld\n", info.ncqe, total_size); -- goto err_cq_base; -- } -- memset(info.cq_base, 0, total_size); -- -- info.doorbell_note = sxe2_alloc_hw_buf(SXE2_DB_NOTE_SIZE); -- if (!info.doorbell_note) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("cq db note buf alloc failed, ncqe:%d size:%d\n", info.ncqe, SXE2_DB_NOTE_SIZE); -- goto err_alloc_db; -- } -- memset(info.doorbell_note, 0, SXE2_DB_NOTE_SIZE); -- set_32bit_val(info.doorbell_note, 4, (__u32)(SXE2_UCQ_DB_NOTE_CMD_SN) << SXE2_UCQ_DB_NOTE_CMD_SN_SHIFT); -- -- cq_attr->cqe = info.ncqe; -- cmd_ex.user_cq_buf = (__u64)((uintptr_t)info.cq_base); -- cmd_ex.user_cq_db_note = (__u64)((uintptr_t)info.doorbell_note); -- -- SXE2_VERBS_LOG_INFO_BDF("create cq comp_vector%d cqe:%d \n", cq_attr->comp_vector, cq_attr->cqe); -- -- ret = ibv_cmd_create_cq_ex(context, cq_attr, &ucq->verbs_cq, &cmd_ex.ibv_cmd, -- sizeof(cmd_ex), &resp_ex.ibv_resp, sizeof(resp_ex), 0); -- if (ret != 0) { -- errno = ret; -- SXE2_VERBS_LOG_ERROR_BDF("ibv cmd create cq err(%d)\n", ret); -- goto err_create_cq; -- } -- -- if (ext_cq) { -- sxe2_ibvcq_ex_fill_priv_funcs(ucq, cq_attr); -- } -- -- info.cq_id = resp_ex.cq_id; -- ucq->verbs_cq.cq.cqe = (int)ncqe; -- SXE2_VERBS_LOG_INFO_BDF("create cq cqn(%#x) cqsize %d\n", info.cq_id, info.ncqe); -- -- info.cqe_alloc_db = (__le64 *)sctx->cq_db_arm; -- sxe2_uk_cq_init(&ucq->cq, &info); -- -- return &ucq->verbs_cq.cq_ex; -- --err_create_cq: -- sxe2_free_hw_buf(info.doorbell_note, SXE2_DB_NOTE_SIZE); -- --err_alloc_db: -- sxe2_free_cq_buf(sctx, &ucq->buf); -- --err_cq_base: -- pthread_spin_destroy(&ucq->lock); -- --err_lock_init: -- free(ucq); -- --null: -- return NULL; --} -- --struct ibv_cq *sxe2_ucreate_cq(struct ibv_context *context, int cqe, -- struct ibv_comp_channel *channel, int comp_vector) --{ -- struct ibv_cq_ex *cq; -- struct ibv_cq_init_attr_ex cq_attr; -- struct sxe2_rdma_ucontext *sctx; -- -- sctx = container_of(context, struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- SXE2_VERBS_LOG_INFO_BDF("sxe2_ucreate_cq comp_vector%d cqe:%d \n", comp_vector, cqe); -- SXE2_VERBS_LOG_INFO_BDF("sxe2_ucreate_cq ctx num_comp_vectors:%d\n", context->num_comp_vectors); -- memset(&cq_attr, 0, sizeof(cq_attr)); -- cq_attr.cqe = cqe; -- cq_attr.channel = channel; -- cq_attr.comp_vector = comp_vector; -- -- cq = ucreate_cq(context, &cq_attr, false); -- if (!cq) { -- SXE2_VERBS_LOG_ERROR_BDF("user create cq failed\n"); -- } -- -- return cq ? ibv_cq_ex_to_cq(cq) : NULL; --} -- --struct ibv_cq_ex *sxe2_ucreate_cq_ex(struct ibv_context *context, -- struct ibv_cq_init_attr_ex *cq_attr) --{ -- struct ibv_cq_ex *cq; -- struct sxe2_rdma_ucontext *sctx; -- -- sctx = container_of(context, struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- if (cq_attr->wc_flags & ~SXE2_CQ_SUPPORTED_WC_FLAGS) { -- SXE2_VERBS_LOG_ERROR_BDF("user create cq ex flags err\n"); -- errno = EOPNOTSUPP; -- cq = NULL; -- goto end; -- } -- -- cq = ucreate_cq(context, cq_attr, true); -- --end: -- return cq; --} -- --int sxe2_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr) --{ -- int ret; -- struct ibv_modify_cq cmd; -- struct sxe2_rdma_ucontext *sctx; -- -- if (!cq || !attr) { -- SXE2_VERBS_LOG_ERROR("user modify cq para err\n"); -- ret = EINVAL; -- goto end; -- } -- sctx = container_of(cq->context, struct sxe2_rdma_ucontext, ibv_ctx.context); -- memset(&cmd, 0, sizeof(cmd)); -- -- ret = ibv_cmd_modify_cq(cq, attr, &cmd, sizeof(cmd)); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("ibv cmd modify cq err(%d)\n", ret); -- } -- --end: -- return ret; --} -- --int sxe2_udestroy_cq(struct ibv_cq *cq) --{ -- struct sxe2_ucq *ucq; -- int ret; -- struct sxe2_rdma_ucontext *sctx; -- -- sctx = container_of(cq->context, struct sxe2_rdma_ucontext, ibv_ctx.context); -- ucq = container_of(cq, struct sxe2_ucq, verbs_cq.cq); -- -- ret = pthread_spin_destroy(&ucq->lock); -- if (ret != 0) { -- SXE2_VERBS_LOG_ERROR("user destroy cq lock failed\n"); -- goto end; -- } -- -- ret = ibv_cmd_destroy_cq(cq); -- if (ret != 0) { -- SXE2_VERBS_LOG_ERROR("ibv cmd destory cq err(%d)\n", ret); -- goto end; -- } -- -- sxe2_free_cq_buf(sctx, &ucq->buf); -- sxe2_free_hw_buf(ucq->cq.doorbell_note, SXE2_DB_NOTE_SIZE); -- ucq->cq.doorbell_note = NULL; -- free(ucq); --end: -- return ret; --} -- -diff -Naur rdma-core-48.0/providers/sxe2/db.c rdma-core-48.0.bak/providers/sxe2/db.c ---- rdma-core-48.0/providers/sxe2/db.c 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/db.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,622 +0,0 @@ -- --#include --#include --#include --#include --#include --#include --#include --#include "log.h" --#include "sxe2_common.h" -- --int gsingle_threaded = 0; -- --#define SXE2_VERBS_STRTOL_BASE 0 --#define SXE2_VERBS_LLWQE_PER_DB 15 --#define SXE2_VERBS_PF_DB_PAGE 2 --#define SXE2_VERBS_VF_DB_PAGE 1 --#define SXE2_VERBS_DED_LLWQE_PERCENT \ -- 3 --#define SXE2_VERBS_LLWQE_OFFSET 0x100 --#define SXE2_VERBS_LLWQE_DB_OFFSET \ -- 0x10 --#define SXE2_VERBS_LLWQE_SIZE 256 --#define SXE2_VERBS_LLWQE_DB_SIZE 16 --#define SXE2_VERBS_CQ_ARM_OFFSET 0x8 --#define SXE2_VERBS_CQ_INFO_OFFSET 0xC -- --enum sxe2_verbs_cmd_attr_num { -- SXE2_VERBS_ONE_CMD_ATTR = 1, -- SXE2_VERBS_TWO_CMD_ATTRS = 2, -- SXE2_VERBS_THREE_CMD_ATTRS = 3, -- SXE2_VERBS_FOUR_CMD_ATTRS = 4, -- SXE2_VERBS_FIVE_CMD_ATTRS = 5, --}; -- -- --static struct sxe2_verbs_llwqe *db_ualloc_page(struct ibv_context *context, -- __u32 flags, -- __u32 db_mmap_size, -- __u32 db_page_id, -- __u64 db_mmap_offset) --{ -- struct sxe2_verbs_llwqe *llwqe = NULL; -- struct sxe2_rdma_ucontext *sctx = to_sctx(context); -- struct sxe2_db_mmap_db_page_addr *page_addr_entry = NULL; -- struct sxe2_db_mmap_db_page_addr *mmap_addr_entry = NULL; -- struct sxe2_db_mmap_db_page_addr *tmp_mmap_addr_entry = NULL; -- __u32 db_page_id_allign; -- __u32 page_id_mod; -- long page_size = sysconf(_SC_PAGE_SIZE); -- __u32 db_page_multiplier = page_size/SXE2_DB_PAGE_SIZE; -- -- llwqe = calloc(1, sizeof(*llwqe)); -- if (!llwqe) { -- SXE2_VERBS_LOG_ERROR_BDF("llwqe calloc fail"); -- goto end; -- } -- llwqe->db_mmap_size = db_mmap_size; -- llwqe->db_page_id = db_page_id; -- llwqe->db_mmap_offset = db_mmap_offset; -- llwqe->db_page_addr = NULL; -- -- page_id_mod = llwqe->db_page_id % db_page_multiplier; -- if (page_id_mod != 0) { -- list_for_each_safe(&sctx->mmap_page_addr_list, mmap_addr_entry, tmp_mmap_addr_entry, -- list_entry) -- { -- db_page_id_allign = llwqe->db_page_id - page_id_mod; -- if (mmap_addr_entry->db_page_id == db_page_id_allign) { -- llwqe->db_page_addr = (void*)((__u64)mmap_addr_entry->db_page_addr + page_id_mod * SXE2_DB_PAGE_SIZE); -- llwqe->db_page_id = mmap_addr_entry->db_page_id + page_id_mod; -- break; -- } -- } -- } -- if (!llwqe->db_page_addr) { -- llwqe->db_page_addr = -- mmap(NULL, db_mmap_size, PROT_WRITE | PROT_READ, MAP_SHARED, -- context->cmd_fd, db_mmap_offset); -- if (llwqe->db_page_addr == MAP_FAILED) { -- SXE2_VERBS_LOG_ERROR_BDF("mmap db fail"); -- goto free_llwqe; -- } -- page_addr_entry = calloc(1, sizeof(*page_addr_entry)); -- if (!page_addr_entry) { -- SXE2_VERBS_LOG_ERROR_BDF("page addr entry alloc fail"); -- goto unmap_page; -- } -- page_addr_entry->db_page_id = llwqe->db_page_id - page_id_mod; -- page_addr_entry->db_page_addr = llwqe->db_page_addr; -- page_addr_entry->mmap_size = db_mmap_size; -- list_add_tail(&sctx->mmap_page_addr_list, &page_addr_entry->list_entry); -- -- if (page_id_mod != 0) { -- llwqe->db_page_addr = (void*)((__u64)llwqe->db_page_addr + page_id_mod * SXE2_DB_PAGE_SIZE); -- } -- } -- llwqe->mmaped_entry = true; -- -- SXE2_VERBS_LOG_DEBUG_BDF( -- "DB ALLOC:llwqe->db_handle:%#x, db_mmap_offset:%#llx, length:%#x, " -- "pageid:%#x, over", -- llwqe->db_handle, (__u64)llwqe->db_mmap_offset, llwqe->db_mmap_size, -- llwqe->db_page_id); -- --end: -- return llwqe; --unmap_page: -- munmap(llwqe->db_page_addr, db_mmap_size); --free_llwqe: -- free(llwqe); -- llwqe = NULL; -- goto end; --} -- --static void db_uinsert_llwqe(struct sxe2_rdma_ucontext *sctx, -- struct sxe2_verbs_llwqe *llwqe_first) --{ -- struct list_head *head; -- struct sxe2_verbs_llwqe *llwqe = llwqe_first; -- __u64 idx; -- -- if (llwqe_first->qp_dedicated) { -- head = &sctx->dedicated_llwqe_list; -- } else { -- head = &sctx->shared_llwqe_list; -- } -- -- for (idx = 0; idx < SXE2_VERBS_LLWQE_PER_DB; idx++) { -- if (idx != 0) { -- llwqe = calloc(1, sizeof(*llwqe)); -- } -- if (!llwqe) { -- SXE2_VERBS_LOG_ERROR_BDF("llwqe is NULL"); -- goto end; -- } -- -- llwqe->wqe_addr = llwqe_first->db_page_addr + SXE2_VERBS_LLWQE_OFFSET + -- (idx * SXE2_VERBS_LLWQE_SIZE); -- llwqe->db_addr = llwqe_first->db_page_addr + -- SXE2_VERBS_LLWQE_DB_OFFSET + -- (idx * SXE2_VERBS_LLWQE_DB_SIZE); -- llwqe->wqe_buf_size = -- llwqe_first->no_ll_mode ? 0 : SXE2_VERBS_LLWQE_SIZE; -- llwqe->num_llwqe = llwqe_first->no_ll_mode ? 0 : 1; -- list_node_init(&llwqe->list_entry); -- list_add_tail(head, &llwqe->list_entry); -- pthread_spin_init(&llwqe->lock, PTHREAD_PROCESS_SHARED); -- if (idx != 0) { -- llwqe->db_page_addr = llwqe_first->db_page_addr; -- llwqe->db_page_id = llwqe_first->db_page_id; -- llwqe->db_handle = llwqe_first->db_handle; -- llwqe->no_ll_mode = llwqe_first->no_ll_mode; -- llwqe->db_mmap_offset = llwqe_first->db_mmap_offset; -- } -- if (llwqe_first->qp_dedicated) { -- sctx->alloc_dedicated_llwqes++; -- llwqe->qp_dedicated = true; -- } else { -- sctx->alloc_shared_llwqes++; -- llwqe->qp_shared = true; -- } -- llwqe->need_lock = llwqe_first->qp_shared && (!gsingle_threaded); -- } -- --end: -- return; --} -- --int sxe2_uget_single_threaded_env(void) --{ -- char *env; -- int ret_code = 0; -- -- env = getenv(SXE2_SINGLE_THREADED); -- if (env) { -- ret_code = strncmp(env, "1", 1) ? 0 : 1; -- } -- return ret_code; --} -- --int sxe2_uget_ll_mode(void) --{ -- char *env; -- int ret_code = 0; -- -- env = getenv(SXE2_LL_MODE); -- if (env) { -- ret_code = strncmp(env, "0", 1) ? 1 : 0; -- } -- return ret_code; --} -- --int sxe2_uget_tot_llwqe(struct sxe2_rdma_ucontext *sctx, -- struct sxe2_get_context_resp *resp) --{ -- errno = 0; -- char *env; -- int total_llwqe = 0; -- int config_tot_llwqe = 0; -- int max_tot_llwqe = 0; -- int total_db_page = 0; -- int cal_db_page = 0; -- -- max_tot_llwqe = (int)(resp->max_db * SXE2_VERBS_LLWQE_PER_DB); -- -- if (resp->is_pf) { -- cal_db_page = (SXE2_VERBS_PF_DB_PAGE < resp->max_db) -- ? SXE2_VERBS_PF_DB_PAGE -- : (int)resp->max_db; -- } else { -- cal_db_page = (SXE2_VERBS_VF_DB_PAGE < resp->max_db) -- ? SXE2_VERBS_VF_DB_PAGE -- : (int)resp->max_db; -- } -- -- env = getenv(SXE2_TOTAL_LL_WQE); -- if (env) { -- config_tot_llwqe = (int)strtol(env, NULL, SXE2_VERBS_STRTOL_BASE); -- if (errno == ERANGE || config_tot_llwqe < 1) { -- total_db_page = cal_db_page; -- SXE2_VERBS_LOG_WARN_BDF( -- "Config total_llwqe %d invalid, modify total_db_page %d", -- config_tot_llwqe, total_db_page); -- } else if (config_tot_llwqe > max_tot_llwqe) { -- total_db_page = (int)resp->max_db; -- SXE2_VERBS_LOG_WARN_BDF("Config total_llwqe %d > max_tot_llwqe " -- "%d, modify total_db_page %d", -- config_tot_llwqe, max_tot_llwqe, -- total_db_page); -- } else { -- total_db_page = (config_tot_llwqe + SXE2_VERBS_LLWQE_PER_DB - 1) / -- SXE2_VERBS_LLWQE_PER_DB; -- SXE2_VERBS_LOG_INFO_BDF( -- "Config and align total_llwqe %d, total_db_page %d", -- config_tot_llwqe, total_db_page); -- } -- } else { -- total_db_page = cal_db_page; -- SXE2_VERBS_LOG_WARN_BDF( -- "No config total_llwqe, modify total_db_page %d", total_db_page); -- } -- -- total_llwqe = total_db_page * SXE2_VERBS_LLWQE_PER_DB; -- return total_llwqe; --} -- --int sxe2_uget_ded_llwqe(struct sxe2_rdma_ucontext *sctx, int total_llwqe) --{ -- errno = 0; -- char *env; -- int cal_ded_llwqe = 0; -- int config_ded_llwqe = 0; -- int ded_llwqe = 0; -- -- cal_ded_llwqe = ((total_llwqe / SXE2_VERBS_LLWQE_PER_DB) / -- SXE2_VERBS_DED_LLWQE_PERCENT) * -- SXE2_VERBS_LLWQE_PER_DB; -- -- env = getenv(SXE2_DEDICATED_LL_WQE); -- if (env) { -- config_ded_llwqe = (int)strtol(env, NULL, SXE2_VERBS_STRTOL_BASE); -- if (errno == ERANGE || config_ded_llwqe < 0) { -- ded_llwqe = cal_ded_llwqe; -- SXE2_VERBS_LOG_WARN_BDF( -- "Config ded_llwqe %d invalid, modify ded_llwqe %d", -- config_ded_llwqe, ded_llwqe); -- } else if (config_ded_llwqe > total_llwqe) { -- ded_llwqe = total_llwqe; -- SXE2_VERBS_LOG_WARN_BDF( -- "Config ded_llwqe %d > total_llwqe %d, modify ded_llwqe %d", -- config_ded_llwqe, total_llwqe, total_llwqe); -- } else { -- ded_llwqe = ((config_ded_llwqe + SXE2_VERBS_LLWQE_PER_DB - 1) / -- SXE2_VERBS_LLWQE_PER_DB) * -- SXE2_VERBS_LLWQE_PER_DB; -- SXE2_VERBS_LOG_INFO_BDF("Config and align ded_llwqe %d", -- config_ded_llwqe); -- } -- } else { -- ded_llwqe = cal_ded_llwqe; -- SXE2_VERBS_LOG_WARN_BDF("No config ded_llwqe, modify ded_llwqe %d", -- ded_llwqe); -- } -- -- return ded_llwqe; --} -- --struct sxe2_verbs_llwqe * --db_ualloc_page_and_llwqes(struct ibv_context *context, bool dedicated, -- __u32 db_mmap_size, -- __u32 db_page_id, -- __u64 db_mmap_offset) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_verbs_llwqe *llwqe = NULL; -- -- sctx = to_sctx(context); -- if (db_page_id == 0) { -- SXE2_VERBS_LOG_ERROR_BDF("have no llwqe page"); -- goto end; -- } -- llwqe = db_ualloc_page(context, SXE2_VERBS_DB_PAGE_TYPE_LLWQE, db_mmap_size, db_page_id, db_mmap_offset); -- if (!llwqe) { -- SXE2_VERBS_LOG_ERROR_BDF("alloc db page fail"); -- goto end; -- } -- -- if (dedicated) { -- llwqe->qp_dedicated = true; -- } else { -- llwqe->qp_shared = true; -- } -- -- db_uinsert_llwqe(sctx, llwqe); -- --end: -- return llwqe; --} -- --static void *db_ummap(int fd, off_t offset) --{ -- void *map = NULL; -- long page_size = sysconf(_SC_PAGE_SIZE); -- -- if (page_size < 0) { -- map = MAP_FAILED; -- goto end; -- } -- map = mmap(NULL, (size_t)page_size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, -- offset); -- if (map == MAP_FAILED) { -- return map; -- } -- -- if (ibv_dontfork_range(map, page_size)) { -- munmap(map, page_size); -- return MAP_FAILED; -- } --end: -- return map; --} -- --static void db_umunmap(void *map) --{ -- long page_size = sysconf(_SC_PAGE_SIZE); -- if (page_size < 0) { -- goto end; -- } -- -- ibv_dofork_range(map, (size_t)page_size); -- munmap(map, page_size); --end: -- return; --} -- --int sxe2_uinit_doorbell(struct sxe2_rdma_ucontext *sctx, -- struct sxe2_get_context_resp *resp) --{ -- int ret_code = 0; -- int config_ll_mode = 0; -- int total_llwqe = 0; -- int ded_llwqe = 0; -- __u64 mmap_key; -- long page_size = sysconf(_SC_PAGE_SIZE); -- -- gsingle_threaded = sxe2_uget_single_threaded_env( ); -- -- config_ll_mode = sxe2_uget_ll_mode( ); -- -- if (config_ll_mode) { -- total_llwqe = sxe2_uget_tot_llwqe(sctx, resp); -- if (total_llwqe) { -- ded_llwqe = sxe2_uget_ded_llwqe(sctx, total_llwqe); -- } else { -- ret_code = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF( -- "config ll_mode but total_llwqe=0, ret_code %d", ret_code); -- goto end; -- } -- } -- -- sctx->ll_mode = config_ll_mode; -- sctx->tot_ll_wqes = (__u32)total_llwqe; -- sctx->ded_ll_wqes = (__u32)ded_llwqe; -- sctx->shared_ll_wqes = (__u32)(total_llwqe - ded_llwqe); -- list_head_init(&sctx->shared_llwqe_list); -- list_head_init(&sctx->dedicated_llwqe_list); -- list_head_init(&sctx->mmap_page_addr_list); -- pthread_mutex_init(&sctx->alloc_llwqe_mutex, NULL); -- sctx->alloc_dedicated_llwqes = 0; -- sctx->alloc_shared_llwqes = 0; -- -- mmap_key = resp->db_mmap_key; -- sctx->qp_db_no_llwqe = db_ummap(sctx->ibv_ctx.context.cmd_fd, (off_t)mmap_key); -- if (sctx->qp_db_no_llwqe == MAP_FAILED) { -- ret_code = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("mmap db page fail, ret_code %d", ret_code); -- goto end; -- } -- if (page_size > SXE2_DB_PAGE_SIZE) -- sctx->qp_db_no_llwqe = (void*)((__u64)sctx->qp_db_no_llwqe + SXE2_DB_PAGE_SIZE); -- -- sctx->cq_db_arm = sctx->qp_db_no_llwqe + SXE2_VERBS_CQ_ARM_OFFSET; -- sctx->cq_db_info = sctx->qp_db_no_llwqe + SXE2_VERBS_CQ_INFO_OFFSET; -- -- SXE2_VERBS_LOG_INFO_BDF( -- "init doorbell over: single_thread %d, ll_mode %d, " -- "total_llwqes %#x, ded_llwqes %#x, shared_llwqes %#x, " -- "qp_db_no_llwqe addr %p, cq_db_arm addr %p", -- gsingle_threaded, sctx->ll_mode, sctx->tot_ll_wqes, sctx->ded_ll_wqes, -- sctx->shared_ll_wqes, sctx->qp_db_no_llwqe, sctx->cq_db_arm); -- --end: -- return ret_code; --} -- --void sxe2_ufree_doorbell(struct ibv_context *context) --{ -- struct sxe2_verbs_llwqe *llwqe = NULL; -- struct sxe2_verbs_llwqe *tmp_llwqe = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_db_mmap_db_page_addr *mmap_addr_entry = NULL; -- struct sxe2_db_mmap_db_page_addr *tmp_mmap_addr_entry = NULL; -- long page_size = sysconf(_SC_PAGE_SIZE); -- -- if (context == NULL) { -- SXE2_VERBS_LOG_ERROR_BDF("ibv context is NULL"); -- goto end; -- } -- sctx = to_sctx(context); -- -- list_for_each_safe(&sctx->dedicated_llwqe_list, llwqe, tmp_llwqe, -- list_entry) -- { -- SXE2_VERBS_LOG_DEBUG_BDF( -- "DB FREE:llwqe->db_handle:%#x, db_mmap_offset:%#llx, " -- "db_mmap_size:%#x, db_page_id:%#x, start(dedicated)", -- llwqe->db_handle, (__u64)llwqe->db_mmap_offset, llwqe->db_mmap_size, -- llwqe->db_page_id); -- list_del(&llwqe->list_entry); -- free(llwqe); -- llwqe = NULL; -- } -- -- list_for_each_safe(&sctx->shared_llwqe_list, llwqe, tmp_llwqe, list_entry) -- { -- SXE2_VERBS_LOG_DEBUG_BDF( -- "DB FREE:llwqe->db_handle:%#x, db_mmap_offset:%#llx, " -- "db_mmap_size:%#x, db_page_id:%#x, start(shared)", -- llwqe->db_handle, (__u64)llwqe->db_mmap_offset, llwqe->db_mmap_size, -- llwqe->db_page_id); -- list_del(&llwqe->list_entry); -- free(llwqe); -- llwqe = NULL; -- } -- -- list_for_each_safe(&sctx->mmap_page_addr_list, mmap_addr_entry, tmp_mmap_addr_entry, -- list_entry) -- { -- if (mmap_addr_entry->db_page_addr && -- munmap(mmap_addr_entry->db_page_addr, mmap_addr_entry->mmap_size)) { -- SXE2_VERBS_LOG_ERROR_BDF("munmap db fail"); -- } -- SXE2_VERBS_LOG_DEBUG_BDF("DB FREE: munmap page_addr(%p) page_size(%u)", -- mmap_addr_entry->db_page_addr, mmap_addr_entry->mmap_size); -- list_del(&mmap_addr_entry->list_entry); -- free(mmap_addr_entry); -- mmap_addr_entry = NULL; -- } -- -- if (page_size > SXE2_DB_PAGE_SIZE) -- sctx->qp_db_no_llwqe = (void*)((__u64)sctx->qp_db_no_llwqe - SXE2_DB_PAGE_SIZE); -- db_umunmap(sctx->qp_db_no_llwqe); -- sctx->qp_db_no_llwqe = NULL; -- -- SXE2_VERBS_LOG_INFO_BDF("free doorbell over"); -- --end: -- return; --} -- --static struct sxe2_verbs_llwqe *get_idle_shared_llwqe(struct sxe2_rdma_ucontext *sctx) --{ -- struct sxe2_verbs_llwqe *llwqe = NULL; -- struct sxe2_verbs_llwqe *llwqe_entry = NULL; -- -- list_for_each(&sctx->shared_llwqe_list, llwqe_entry, list_entry) -- { -- if (!llwqe) { -- llwqe = llwqe_entry; -- } else { -- if (llwqe_entry->count < llwqe->count) { -- llwqe = llwqe_entry; -- } -- } -- } -- if (llwqe) { -- llwqe->count++; -- } -- -- return llwqe; --} -- --struct sxe2_verbs_llwqe *alloc_db_page_and_get_qp_llwqe(struct ibv_context *context, -- __u32 db_mmap_size, -- __u32 db_page_id, -- __u64 db_mmap_offset, -- __u8 alloc_page_type) --{ -- struct sxe2_verbs_llwqe *llwqe = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- if (context == NULL) { -- SXE2_VERBS_LOG_ERROR_BDF("ibv context is NULL"); -- goto end; -- } -- sctx = to_sctx(context); -- -- pthread_mutex_lock(&sctx->alloc_llwqe_mutex); -- -- if (alloc_page_type == SXE2_VERBS_DB_PAGE_TYPE_DEDICATED) { -- if (!db_ualloc_page_and_llwqes(context, true, db_mmap_size, db_page_id, db_mmap_offset)) { -- SXE2_VERBS_LOG_ERROR_BDF( -- "LL_WQE:Alloc new ded db page failed"); -- goto end; -- } -- llwqe = list_pop(&sctx->dedicated_llwqe_list, struct sxe2_verbs_llwqe, -- list_entry); -- if (llwqe) { -- llwqe->count++; -- } -- } else { -- if (!db_ualloc_page_and_llwqes(context, false, db_mmap_size, db_page_id, db_mmap_offset)) { -- SXE2_VERBS_LOG_ERROR_BDF("LLWQE:Alloc new shared db page failed"); -- goto end; -- } -- llwqe = get_idle_shared_llwqe(sctx); -- } -- --end: -- pthread_mutex_unlock(&sctx->alloc_llwqe_mutex); -- if (llwqe) { -- SXE2_VERBS_LOG_DEBUG_BDF( -- "DB:Get LL_WQE:db_page_id %#x, ll_wqe_count %#x, dedicated %#x, " -- "shared %#x, wqe_addr %p, db_addr %p", -- llwqe->db_page_id, llwqe->count, llwqe->qp_dedicated, -- llwqe->qp_shared, llwqe->wqe_addr, llwqe->db_addr); -- } else { -- SXE2_VERBS_LOG_ERROR_BDF("DB:Get LL_WQE failed"); -- } -- return llwqe; --} -- --struct sxe2_verbs_llwqe *db_uget_qp_llwqe(struct ibv_context *context, -- bool *need_alloc_page, -- __u8 *alloc_page_type) --{ -- struct sxe2_verbs_llwqe *llwqe = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- if (context == NULL) { -- SXE2_VERBS_LOG_ERROR_BDF("ibv context is NULL"); -- goto end; -- } -- sctx = to_sctx(context); -- -- pthread_mutex_lock(&sctx->alloc_llwqe_mutex); -- -- llwqe = list_pop(&sctx->dedicated_llwqe_list, struct sxe2_verbs_llwqe, -- list_entry); -- if (llwqe) { -- llwqe->count++; -- *need_alloc_page = false; -- goto end; -- } -- if (sctx->alloc_dedicated_llwqes < sctx->ded_ll_wqes) { -- *need_alloc_page = true; -- *alloc_page_type = SXE2_VERBS_DB_PAGE_TYPE_DEDICATED; -- goto end; -- } -- -- llwqe = get_idle_shared_llwqe(sctx); -- -- if (sctx->alloc_shared_llwqes < sctx->shared_ll_wqes) { -- *need_alloc_page = true; -- *alloc_page_type = SXE2_VERBS_DB_PAGE_TYPE_SHARED; -- } -- --end: -- pthread_mutex_unlock(&sctx->alloc_llwqe_mutex); -- if (llwqe) { -- SXE2_VERBS_LOG_DEBUG_BDF( -- "DB:Get LL_WQE:db_page_id %#x, ll_wqe_count %#x, dedicated %#x, " -- "shared %#x, wqe_addr %p, db_addr %p", -- llwqe->db_page_id, llwqe->count, llwqe->qp_dedicated, -- llwqe->qp_shared, llwqe->wqe_addr, llwqe->db_addr); -- } else { -- SXE2_VERBS_LOG_INFO_BDF("DB:No LL_WQE left, will alloc new db page."); -- } -- return llwqe; --} -- --void db_uput_qp_llwqe(struct sxe2_rdma_ucontext *sctx, -- struct sxe2_verbs_llwqe *llwqe) --{ -- if (!llwqe || (!llwqe->qp_dedicated && !llwqe->qp_shared)) { -- SXE2_VERBS_LOG_ERROR_BDF("llwqe:invalid argument"); -- goto end; -- } -- -- pthread_mutex_lock(&sctx->alloc_llwqe_mutex); -- if (llwqe->qp_dedicated) { -- list_add_tail(&sctx->dedicated_llwqe_list, &llwqe->list_entry); -- } else { -- llwqe->count--; -- } -- pthread_mutex_unlock(&sctx->alloc_llwqe_mutex); -- --end: -- return; --} -diff -Naur rdma-core-48.0/providers/sxe2/device_port.c rdma-core-48.0.bak/providers/sxe2/device_port.c ---- rdma-core-48.0/providers/sxe2/device_port.c 2026-05-26 10:42:01.852075195 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/device_port.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,277 +0,0 @@ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include "sxe2_common.h" --#include "device_port.h" --#include "log.h" --#include "verbs.h" --#include -- --#define INTEL_HCA(v, d) VERBS_PCI_MATCH(v, d, NULL) --static const struct verbs_match_ent sxe2_hca_table[] = { -- VERBS_DRIVER_ID(RDMA_DRIVER_SXE2), -- INTEL_HCA(SXE2_PCI_VENDOR_ID, SXE2_RDMA_DEVICE_ID_PF), -- INTEL_HCA(SXE2_PCI_VENDOR_ID, SXE2_RDMA_DEVICE_ID_VF), -- INTEL_HCA(SXE2_PCI_VENDOR_ID, SXE2_RDMA_DEVICE_ID_PF_PCIE_2_10G), -- INTEL_HCA(SXE2_PCI_VENDOR_ID, SXE2_RDMA_DEVICE_ID_VF_PCIE_2_10G), -- INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_TL, SXE2_RDMA_DEVICE_ID_PF), -- INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_TL, SXE2_RDMA_DEVICE_ID_VF), -- INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_TL, SXE2_RDMA_DEVICE_ID_PF_PCIE_2_10G), -- INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_TL, SXE2_RDMA_DEVICE_ID_VF_PCIE_2_10G), -- INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_POP, SXE2_RDMA_DEVICE_ID_PF_POP), -- INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_POP, SXE2_RDMA_DEVICE_ID_VF_POP), -- {} --}; -- --void sxe2_ufree_context(struct ibv_context *context) --{ -- struct sxe2_rdma_ucontext *sctx; -- -- sctx = container_of(context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- sxe2_ufree_pd(&sctx->sxe2_upd->ibv_pd); -- sctx->sxe2_upd = NULL; -- sxe2_ufree_doorbell(context); -- -- verbs_uninit_context(&sctx->ibv_ctx); -- if(sctx != NULL){ -- free(sctx); -- } --} -- --int sxe2_uquery_device_ex(struct ibv_context *ibctx, -- const struct ibv_query_device_ex_input *input, -- struct ibv_device_attr_ex *attr, size_t attr_size) --{ -- int ret = SXE2_OK; -- struct ib_uverbs_ex_query_device_resp resp = {}; -- size_t resp_size = sizeof(resp); -- struct sxe2_rdma_ucontext *sctx; -- uint16_t major_ver_id; -- uint16_t sub_ver_id; -- uint16_t fix_ver_id; -- uint16_t build_id; -- -- sctx = to_sctx(ibctx); -- -- ret = ibv_cmd_query_device_any(ibctx, input, attr, attr_size, -- &resp, &resp_size); -- if (ret != SXE2_OK) { -- SXE2_VERBS_LOG_ERROR_BDF("device:uquery device err ret=%d\n", ret); -- goto end; -- } -- -- major_ver_id = FIELD_GET(SXE2_RDMA_FW_MAIN_VERSION_BITS, resp.base.fw_ver); -- sub_ver_id = FIELD_GET(SXE2_RDMA_FW_SUB_VERSION_BITS, resp.base.fw_ver); -- fix_ver_id = FIELD_GET(SXE2_RDMA_FW_FIX_VERSION_BITS, resp.base.fw_ver); -- build_id = FIELD_GET(SXE2_RDMA_FW_BUILD_NUMBER_BITS, resp.base.fw_ver); -- -- snprintf(attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver), -- "%u.%u.%u.%u", major_ver_id, sub_ver_id, fix_ver_id, build_id); --end: -- return ret; --} -- --int sxe2_uquery_port(struct ibv_context *ibctx, uint8_t port, -- struct ibv_port_attr *attr) --{ -- int ret = SXE2_OK; -- struct ibv_query_port cmd; -- struct sxe2_rdma_ucontext *sctx; -- sctx = to_sctx(ibctx); -- ret = ibv_cmd_query_port(ibctx, port, attr, &cmd, sizeof(cmd)); -- if (ret != SXE2_OK) { -- SXE2_VERBS_LOG_ERROR_BDF("device:uquery port err ret=%d\n", ret); -- goto end; -- } -- --end: -- return ret; --} -- --static const struct verbs_context_ops sxe2_uctx_ops = { -- .alloc_pd = sxe2_ualloc_pd, -- .create_ah = sxe2_ucreate_ah, -- .create_cq = sxe2_ucreate_cq, -- .create_cq_ex = sxe2_ucreate_cq_ex, -- .create_qp = sxe2_ucreate_qp, -- .create_qp_ex = sxe2_ucreate_qp_ex, -- .dealloc_pd = sxe2_ufree_pd, -- .dereg_mr = sxe2_udereg_mr, -- .destroy_ah = sxe2_udestroy_ah, -- .destroy_cq = sxe2_udestroy_cq, -- .destroy_qp = sxe2_udestroy_qp, -- .modify_qp = sxe2_umodify_qp, -- .modify_cq = sxe2_umodify_cq, -- .poll_cq = sxe2_upoll_cq, -- .post_recv = sxe2_upost_recv, -- .post_send = sxe2_upost_send, -- .post_srq_recv = sxe2_upost_srq_recv, -- .query_device_ex = sxe2_uquery_device_ex, -- .query_port = sxe2_uquery_port, -- .query_qp = sxe2_uquery_qp, -- .reg_mr = sxe2_ureg_mr, -- .rereg_mr = sxe2_urereg_mr, -- .free_context = sxe2_ufree_context, -- .cq_event = sxe2_ucq_event, -- .reg_dmabuf_mr = sxe2_ureg_dmabuf_mr, -- .req_notify_cq = sxe2_uarm_cq, -- .create_srq = sxe2_ucreate_srq, -- .modify_srq = sxe2_umodify_srq, -- .query_srq = sxe2_uquery_srq, -- .destroy_srq = sxe2_udestroy_srq, -- .create_srq_ex = sxe2_ucreate_srq_ex, -- .get_srq_num = sxe2_uget_srq_num, -- .attach_mcast = sxe2_uattach_mcast, -- .detach_mcast = sxe2_udetach_mcast, --#if 0 -- .resize_cq = sxe2_uresize_cq, --#endif --}; -- --struct verbs_context *sxe2_ualloc_context(struct ibv_device *ib_dev, -- int cmd_fd, void *private_data) --{ -- int ret = SXE2_OK; -- struct ibv_pd *ibv_pd; -- struct sxe2_rdma_ucontext *sctx; -- struct sxe2_get_context cmd = {}; -- struct sxe2_get_context_resp resp = {}; -- __u8 user_ver = SXE2_RDMA_USER_ABI_VER; -- char *env; -- -- sctx = verbs_init_and_alloc_context(ib_dev, cmd_fd, sctx, ibv_ctx, -- RDMA_DRIVER_SXE2); -- if (!sctx) -- { -- ret = -ENOMEM; -- goto end; -- } -- sctx->enable_io_log = 1; -- env = getenv(SXE2_LOG_IOLOG_ON); -- if ((env) && (0 == strncmp(env, "OFF", 3))){ -- SXE2_VERBS_LOG_INFO_BDF("device:disable user io log.\n"); -- sctx->enable_io_log = 0; -- } --#if defined SXE2_CFG_DEBUG -- sctx->log_level = LOG_LEVEL_DEBUG; --#else -- sctx->log_level = LOG_LEVEL_INVALID; --#endif -- env = getenv(SXE2_VERBS_LOG_LEVEL); -- if ((env) && (0 == strncmp(env, "OFF", 3))){ -- SXE2_VERBS_LOG_INFO_BDF("device:disable user log.\n"); -- sctx->log_level = LOG_LEVEL_INVALID; -- } else if ((env) && (0 == strncmp(env, "FATAL", 5))) { -- sctx->log_level = LOG_LEVEL_FATAL; -- } else if ((env) && (0 == strncmp(env, "ERROR", 5))) { -- sctx->log_level = LOG_LEVEL_ERROR; -- } else if ((env) && (0 == strncmp(env, "WARN", 4))) { -- sctx->log_level = LOG_LEVEL_WARN; -- } else if ((env) && (0 == strncmp(env, "INFO", 4))) { -- sctx->log_level = LOG_LEVEL_INFO; -- } else if ((env) && (0 == strncmp(env, "DEBUG", 5))) { -- sctx->log_level = LOG_LEVEL_DEBUG; -- } else if ((env) && (0 == strncmp(env, "TRACE", 5))) { -- sctx->log_level = LOG_LEVEL_TRACE; -- } -- cmd.userspace_ver = user_ver; -- ret = ibv_cmd_get_context(&sctx->ibv_ctx, (struct ibv_get_context *)&cmd, sizeof(cmd), -- &resp.ibv_resp, sizeof(resp)); -- if (ret != SXE2_OK) { -- SXE2_VERBS_LOG_ERROR_BDF("device:cmd get context err ret=%d\n", ret); -- goto free_ctx; -- } -- verbs_set_ops(&sctx->ibv_ctx, &sxe2_uctx_ops); -- sctx->uk_attrs.feature_flags = resp.feature_flags; -- sctx->uk_attrs.hw_rev = resp.hw_rev; -- sctx->uk_attrs.max_hw_wq_frags = resp.max_hw_wq_frags; -- sctx->uk_attrs.max_hw_read_sges = resp.max_hw_read_sges; -- sctx->uk_attrs.max_hw_inline = resp.max_hw_inline; -- sctx->uk_attrs.max_hw_rq_quanta = resp.max_hw_rq_quanta; -- sctx->uk_attrs.max_hw_wq_quanta = resp.max_hw_wq_quanta; -- sctx->uk_attrs.max_hw_sq_chunk = resp.max_hw_sq_chunk; -- sctx->uk_attrs.max_hw_cq_size = resp.max_hw_cq_size; -- sctx->uk_attrs.min_hw_cq_size = resp.min_hw_cq_size; -- sctx->uk_attrs.max_hw_srq_quanta = resp.max_hw_srq_quanta; -- sctx->uk_attrs.max_hw_srq_wr = resp.max_hw_srq_wr; -- sctx->abi_ver = user_ver; -- if (resp.comp_mask & SXE2_ALLOC_UCTX_MIN_HW_WQ_SIZE) { -- sctx->uk_attrs.min_hw_wq_size = resp.min_hw_wq_size; -- } else { -- sctx->uk_attrs.min_hw_wq_size = SXE2_QP_SW_MIN_WQSIZE; -- } -- memcpy(sctx->bdf, (const char *)resp.bdf, sizeof(sctx->bdf) - 1); -- -- ret = sxe2_uinit_doorbell(sctx, &resp); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("device:init door bell err ret=%d\n", ret); -- goto free_ctx; -- } -- -- ibv_pd = sxe2_ualloc_pd(&sctx->ibv_ctx.context); -- if (!ibv_pd) { -- SXE2_VERBS_LOG_ERROR_BDF("device:alloc pd err\n"); -- goto free_db; -- } -- -- ibv_pd->context = &sctx->ibv_ctx.context; -- sctx->sxe2_upd = container_of(ibv_pd, struct sxe2_upd, ibv_pd); -- -- sxe2_spinlock_init(&sctx->hugetlb_lock, true); -- list_head_init(&sctx->hugetlb_list); -- -- return &sctx->ibv_ctx; -- --free_db: -- sxe2_ufree_doorbell(&sctx->ibv_ctx.context); --free_ctx: -- free(sctx); -- sctx = NULL; --end: -- return NULL; --} -- --void sxe2_uninit_device(struct verbs_device *verbs_device) --{ -- struct sxe2_rdma_udevice *dev; -- -- log_uninit0(); -- -- dev = container_of(&verbs_device->device, struct sxe2_rdma_udevice, -- ibv_dev.device); -- free(dev); --} -- --struct verbs_device *sxe2_ualloc_device(struct verbs_sysfs_dev *sysfs_dev) --{ -- struct sxe2_rdma_udevice *dev; -- -- log_init0(false); -- -- dev = calloc(1, sizeof(*dev)); -- if (!dev){ -- return NULL; -- } -- return &dev->ibv_dev; --} -- --static const struct verbs_device_ops sxe2_udev_ops = { -- .alloc_context = sxe2_ualloc_context, -- .alloc_device = sxe2_ualloc_device, -- .match_max_abi_version = SXE2_RDMA_MAX_ABI_VERSION, -- .match_min_abi_version = SXE2_RDMA_MIN_ABI_VERSION, -- .match_table = sxe2_hca_table, -- .name = "sxe2_rdma", -- .uninit_device = sxe2_uninit_device, --}; -- --PROVIDER_DRIVER(sxe2, sxe2_udev_ops); -diff -Naur rdma-core-48.0/providers/sxe2/device_port.h rdma-core-48.0.bak/providers/sxe2/device_port.h ---- rdma-core-48.0/providers/sxe2/device_port.h 2026-05-26 10:42:01.854075208 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/device_port.h 1970-01-01 08:00:00.000000000 +0800 -@@ -1,48 +0,0 @@ --#ifndef DEVICE_H --#define DEVICE_H --#define SXE2_PCI_VENDOR_ID 0x1FF2 --#define SXE2_PCI_VENDOR_ID_FOR_TL 0x206F --#define SXE2_PCI_VENDOR_ID_FOR_POP 0x1D94 -- --#define SXE2_RDMA_DEVICE_ID_PF 0x10B1 --#define SXE2_RDMA_DEVICE_ID_VF 0x10B2 --#define SXE2_RDMA_DEVICE_ID_PF_PCIE_2_10G 0x10B3 --#define SXE2_RDMA_DEVICE_ID_VF_PCIE_2_10G 0x10B4 --#define SXE2_RDMA_DEVICE_ID_PF_POP 0x1260 --#define SXE2_RDMA_DEVICE_ID_VF_POP 0x126F -- --#define SXE2_RDMA_USER_ABI_VER 1 --#define SXE2_RDMA_MIN_ABI_VERSION 1 --#define SXE2_RDMA_MAX_ABI_VERSION 1 --#define SXE2_OK 0 --#define SXE2_QP_SW_MIN_WQSIZE 8 --#define SXE2_RDMA_FW_BUILD_NUMBER_BITS GENMASK(7, 0) --#define SXE2_RDMA_FW_FIX_VERSION_BITS GENMASK(15, 8) --#define SXE2_RDMA_FW_SUB_VERSION_BITS GENMASK(23, 16) --#define SXE2_RDMA_FW_MAIN_VERSION_BITS GENMASK(31, 24) -- --enum { -- SXE2_ALLOC_UCTX_MIN_HW_WQ_SIZE = 1 << 1, --}; -- --struct sxe2_rdma_udevice { -- struct verbs_device ibv_dev; --}; -- --void sxe2_ufree_context(struct ibv_context *context); -- --int sxe2_uquery_device_ex(struct ibv_context *ibctx, -- const struct ibv_query_device_ex_input *input, -- struct ibv_device_attr_ex *attr, size_t attr_size); -- --int sxe2_uquery_port(struct ibv_context *ibctx, uint8_t port, -- struct ibv_port_attr *attr); -- --struct verbs_context *sxe2_ualloc_context(struct ibv_device *ib_dev, -- int cmd_fd, void *private_data); -- --void sxe2_uninit_device(struct verbs_device *verbs_device); -- --struct verbs_device *sxe2_ualloc_device(struct verbs_sysfs_dev *sysfs_dev); -- --#endif -diff -Naur rdma-core-48.0/providers/sxe2/io.c rdma-core-48.0.bak/providers/sxe2/io.c ---- rdma-core-48.0/providers/sxe2/io.c 2026-05-26 10:42:01.854075208 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/io.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,3829 +0,0 @@ -- --#include --#include --#include --#include --#include -- --#include --#include --#include --#include --#include --#include --#include -- --#include "sxe2_common.h" --#include "io.h" --#include "sxe2_abi.h" --#include "ring.h" --#include "log.h" --#include -- --static const rdma_disp_func g_rdma_op[SXE2_RDMA_MAX_ID] = { -- sxe2_hw_send, -- sxe2_hw_inline_send, -- sxe2_hw_rdma_write, -- sxe2_hw_inline_rdma_write, -- sxe2_hw_rdma_read, -- sxe2_hw_mw_bind, -- sxe2_hw_local_invalidate, --}; -- --static const int g_frag2quanta[SXE2_MAX_FRAGCNT] = { 1, 1, 2, 2, 3, 3, 4, 4, -- 5, 5, 6, 6, 7, 7, 8, 8 }; --#define DUMP_RDMA_SEND_HDR(psnd) \ -- do { \ -- SXE2_VERBS_LOG_DEBUG_BDF("SND_WQE_HDR: (%#llx)\n" \ -- "remote_inv_key: %#x \n" \ -- "op : %#x \n" \ -- "addfragcnt : %d \n" \ -- "report_rtt : %d \n" \ -- "imme_data_flag: %d \n" \ -- "push_wqe : %d \n" \ -- "read_fence : %d \n" \ -- "local_fence : %d \n" \ -- "signaled : %d \n" \ -- "wqe_valid : %d\n", \ -- (unsigned long long)htole64((psnd)->val), \ -- (psnd)->field.remote_inv_rkey, \ -- (psnd)->field.op, \ -- (psnd)->field.addfragcnt, \ -- (psnd)->field.report_rtt, \ -- (psnd)->field.imme_data_flag, \ -- (psnd)->field.push_wqe, \ -- (psnd)->field.read_fence, \ -- (psnd)->field.local_fence, \ -- (psnd)->field.signaled_completion, \ -- (psnd)->field.wqe_valid); \ -- } while (0) --#define DUMP_RDMA_SEND_INLINE_HDR(pinsnd) \ -- do { \ -- SXE2_VERBS_LOG_DEBUG_BDF("INSND_WQE_HDR: (%#llx)\n" \ -- "remote_inv_key: %#x \n" \ -- "op : %#x \n" \ -- "report_rtt : %d \n" \ -- "imme_data_flag: %d \n" \ -- "inline_datalen: %d \n" \ -- "push_wqe : %d \n" \ -- "inline_dataflg: %d \n" \ -- "read_fence : %d \n" \ -- "local_fence : %d \n" \ -- "signaled : %d \n" \ -- "wqe_valid : %d\n", \ -- (unsigned long long)htole64((pinsnd)->val), \ -- (pinsnd)->field.remote_inv_rkey, \ -- (pinsnd)->field.op, \ -- (pinsnd)->field.report_rtt, \ -- (pinsnd)->field.imme_data_flag, \ -- (pinsnd)->field.inline_data_len, \ -- (pinsnd)->field.push_wqe, \ -- (pinsnd)->field.inline_data_flag, \ -- (pinsnd)->field.read_fence, \ -- (pinsnd)->field.local_fence, \ -- (pinsnd)->field.signaled_completion, \ -- (pinsnd)->field.wqe_valid); \ -- } while (0) --#define DUMP_RDMA_WRITE_HDR(pwrite) \ -- do { \ -- SXE2_VERBS_LOG_DEBUG_BDF("WRITE_WQE_HDR: (%#llx)\n" \ -- "remote_key : %#x \n" \ -- "op : %#x \n" \ -- "addfragcnt : %d \n" \ -- "report_rtt : %d \n" \ -- "imme_data_flag: %d \n" \ -- "push_wqe : %d \n" \ -- "read_fence : %d \n" \ -- "local_fence : %d \n" \ -- "signaled : %d \n" \ -- "wqe_valid : %d\n", \ -- (unsigned long long)htole64(pwrite->val), \ -- pwrite->field.remote_key, \ -- pwrite->field.op, \ -- pwrite->field.addfragcnt, \ -- pwrite->field.report_rtt, \ -- pwrite->field.imme_data_flag, \ -- pwrite->field.push_wqe, \ -- pwrite->field.read_fence, \ -- pwrite->field.local_fence, \ -- pwrite->field.signaled_completion, \ -- pwrite->field.wqe_valid); \ -- } while (0) --#define DUMP_RDMA_WRITE_INLINE_HDR(pinwrite) \ -- do { \ -- SXE2_VERBS_LOG_DEBUG_BDF("INWRITE_WQE_HDR: (%#llx)\n" \ -- "remote_key : %#x \n" \ -- "op : %#x \n" \ -- "report_rtt : %d \n" \ -- "imme_data_flag: %d \n" \ -- "inline_datalen: %d \n" \ -- "push_wqe : %d \n" \ -- "inline_dataflg: %d \n" \ -- "read_fence : %d \n" \ -- "local_fence : %d \n" \ -- "signaled : %d \n" \ -- "wqe_valid : %d\n", \ -- (unsigned long long)htole64((pinwrite)->val), \ -- (pinwrite)->field.remote_key, \ -- (pinwrite)->field.op, \ -- (pinwrite)->field.report_rtt, \ -- (pinwrite)->field.imme_data_flag, \ -- (pinwrite)->field.inline_data_len, \ -- (pinwrite)->field.push_wqe, \ -- (pinwrite)->field.inline_data_flag, \ -- (pinwrite)->field.read_fence, \ -- (pinwrite)->field.local_fence, \ -- (pinwrite)->field.signaled_completion, \ -- (pinwrite)->field.wqe_valid); \ -- } while (0) --#define DUMP_RDMA_READ_HDR(pread) \ -- do { \ -- SXE2_VERBS_LOG_DEBUG_BDF("READ_WQE_HDR: (%#llx)\n" \ -- "remote_key : %#x \n" \ -- "op : %#x \n" \ -- "addfragcnt : %d \n" \ -- "report_rtt : %d \n" \ -- "push_wqe : %d \n" \ -- "read_fence : %d \n" \ -- "local_fence : %d \n" \ -- "signaled : %d \n" \ -- "wqe_valid : %d\n", \ -- (unsigned long long)htole64((pread)->val), \ -- (pread)->field.remote_key, \ -- (pread)->field.op, \ -- (pread)->field.addfragcnt, \ -- (pread)->field.report_rtt, \ -- (pread)->field.push_wqe, \ -- (pread)->field.read_fence, \ -- (pread)->field.local_fence, \ -- (pread)->field.signaled_completion, \ -- (pread)->field.wqe_valid); \ -- } while (0) --#define DUMP_RDMA_LOCAL_INV_HDR(pinv) \ -- do { \ -- SXE2_VERBS_LOG_DEBUG_BDF("LOCALINVKEY_WQE_HDR: (%#llx)\n" \ -- "op : %#x \n" \ -- "push_wqe : %d \n" \ -- "read_fence : %d \n" \ -- "local_fence : %d \n" \ -- "signaled : %d \n" \ -- "wqe_valid : %d\n", \ -- (unsigned long long)htole64((pinv)->val), \ -- (pinv)->field.op, \ -- (pinv)->field.push_wqe, \ -- (pinv)->field.read_fence, \ -- (pinv)->field.local_fence, \ -- (pinv)->field.signaled_completion, \ -- (pinv)->field.wqe_valid); \ -- } while (0) --static void sxe2_dump_wqe_hdr(struct sxe2_uqp *uqp, __u64 *hdr, -- enum sxe2_disp_id func) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- union sxe2_send_hdr *psnd; -- union sxe2_send_inline_hdr *pinsnd; -- union sxe2_write_hdr *pwrite; -- union sxe2_write_inline_hdr *pinwrite; -- union sxe2_read_hdr *pread; -- union sxe2_inval_hdr *pinv; -- -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- if (sctx->enable_io_log == false) { -- return; -- } -- -- switch (func) { -- case SXE2_RDMA_SEND: -- psnd = (union sxe2_send_hdr *)hdr; -- DUMP_RDMA_SEND_HDR(psnd); -- break; -- case SXE2_RDMA_SEND_INLINE: -- pinsnd = (union sxe2_send_inline_hdr *)hdr; -- DUMP_RDMA_SEND_INLINE_HDR(pinsnd); -- break; -- case SXE2_RDMA_WRITE: -- pwrite = (union sxe2_write_hdr *)hdr; -- DUMP_RDMA_WRITE_HDR(pwrite); -- break; -- case SXE2_RDMA_WRITE_INLINE: -- pinwrite = (union sxe2_write_inline_hdr *)hdr; -- DUMP_RDMA_WRITE_INLINE_HDR(pinwrite); -- break; -- case SXE2_RDMA_READ: -- pread = (union sxe2_read_hdr *)hdr; -- DUMP_RDMA_READ_HDR(pread); -- break; -- case SXE2_RDMA_LOCAL_INV: -- pinv = (union sxe2_inval_hdr *)hdr; -- DUMP_RDMA_LOCAL_INV_HDR(pinv); -- break; -- default: -- SXE2_VERBS_LOG_ERROR_BDF("UNSUPPORT OP CODE\n"); -- } --} -- --static void sxe2_dump_wqe(struct sxe2_qp_common *qp, __le64 *wqe, -- __u16 quanta, __u32 wqe_idx, const char *desc) --{ -- struct sxe2_uqp *uqp =container_of(qp, struct sxe2_uqp, qp); -- struct sxe2_rdma_ucontext *sctx = NULL; -- __le32 *p = (__le32 *)wqe; -- int i, offset = 0; -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- if (sctx->enable_io_log) { -- if (desc) { -- SXE2_VERBS_LOG_DEBUG_BDF("SQWQE DUMP TYPE=[%s], qpn [0x%x], wqe_idx"\ -- "[0x%x] quanta [%u]:\n", desc, qp->qpn, wqe_idx, quanta); -- } -- else { -- SXE2_VERBS_LOG_DEBUG_BDF("RQWQE DUMP, qpn [0x%x], wqe_idx [0x%x]:\n", -- qp->qpn, wqe_idx); -- } -- for (i = 0; i < quanta * SXE2_QP_WQE_MIN_SIZE; i += 32) { -- SXE2_VERBS_LOG_DEBUG_BDF("[qpn 0x%x][offset %u] %08X %08X %08X %08X " -- "%08X %08X %08X %08X\n", -- qp->qpn, offset, -- le32toh(p[0]), le32toh(p[1]), -- le32toh(p[2]), le32toh(p[3]), -- le32toh(p[4]), le32toh(p[5]), -- le32toh(p[6]), le32toh(p[7])); -- p += 8; -- offset += 32; -- } -- } --} -- --static inline int sxe2_fragcnt_to_quanta_cnt(__u32 frag_cnt, __u16 *quanta) --{ -- if (frag_cnt >= SXE2_MAX_FRAGCNT) { -- return EINVAL; -- } -- -- *quanta = (__u16)g_frag2quanta[frag_cnt]; -- return 0; --} -- --static inline __u16 sxe2_inline_to_quanta_cnt(__u32 data_size) --{ -- if (data_size <= 8) { -- return SXE2_QP_WQE_MIN_QUANTA; -- } else if (data_size <= 39) { -- return 2; -- } else if (data_size <= 70) { -- return 3; -- } else if (data_size <= 101) { -- return 4; -- } else if (data_size <= 132) { -- return 5; -- } else if (data_size <= 163) { -- return 6; -- } else if (data_size <= 194) { -- return 7; -- } else { -- return 8; -- } --} -- --static void sxe2_qp_ring_normal_db(struct sxe2_qp_common *qp) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- if (SXE2_RING_CURRENT_HEAD(qp->sq_ring) != qp->initial_ring.head) { -- udma_to_device_barrier(); -- db_wr32(qp->qpn, qp->qp_db_no_llwqe); -- -- if (qp->push_dropped) -- qp->push_dropped = false; -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("DB NOTIFY: QPN (%#x -> %p) PI %#x\n", -- qp->qpn, qp->qp_db_no_llwqe, qp->sq_ring.head); -- } -- } -- qp->initial_ring.head = qp->sq_ring.head; --} -- --static void sxe2_qp_push_wqe(struct sxe2_qp_common *qp, __le64 *wqe, -- __u16 quanta, __u32 wqe_idx) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- __le64 *push; -- struct sxe2_verbs_llwqe *llwqe; --#if defined(SXE2_CFG_DEBUG) && defined(SXE2_SUPPORT_INJECT) -- char *env; --#endif -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- if (!qp->llwqe_mode) { -- sxe2_qp_ring_normal_db(qp); -- } else { -- llwqe = qp->verbs_llwqe; -- if ((llwqe != NULL) && (llwqe->need_lock)) { -- pthread_spin_lock(&llwqe->lock); -- } -- push = (__le64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x7) * 0x20); --#if defined(SXE2_CFG_DEBUG) && defined(SXE2_SUPPORT_INJECT) -- env = getenv(SXE2_INJECT_LLWQE_ERR); -- if ((env) && (0 == strncmp(env, "yes", 3))){ -- memcpy(push, wqe, 4); -- } --#else -- memcpy(push, wqe, quanta * SXE2_QP_WQE_MIN_SIZE); --#endif -- set_32bit_val(qp->push_db, 0, -- FIELD_PREP(SXE2_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) | -- qp->qpn); -- qp->initial_ring.head = qp->sq_ring.head; -- qp->llwqe_mode = true; -- qp->push_dropped = false; -- if ((llwqe != NULL) && (llwqe->need_lock)) { -- pthread_spin_unlock(&llwqe->lock); -- } -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("DB NOTIFY(LLWQE): QPN (%#x ->db %#lx llwqe %#lx) idx %#x\n", -- qp->qpn, (uintptr_t)qp->push_db, (uintptr_t)qp->push_wqe, wqe_idx >> 3); -- } -- } --} -- --static void sxe2_qp_flush_wqe(struct ibv_qp *qp, bool flush_sq, bool flush_rq) --{ -- int ret; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_umodify_qp cmd = {}; -- struct sxe2_umodify_qp_resp resp = {}; -- struct ibv_qp_attr qp_attr = {}; -- -- uqp = container_of(qp, struct sxe2_uqp, verbs_qp.qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- qp_attr.qp_state = IBV_QPS_ERR; -- cmd.sq_flush = flush_sq; -- cmd.rq_flush = flush_rq; -- -- ret = ibv_cmd_modify_qp_ex(qp, &qp_attr, IBV_QP_STATE, -- &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); -- if (ret || sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("ibv_modify_qp_ex ret(%d)\n", ret); -- } --} -- --static inline void sxe2_set_qkeyqpn(__le64 *wqe, __u32 offset, __u32 qkey, -- __u32 qpn) --{ -- union sxe2_dqpn_data msg; -- -- msg.val = 0; -- msg.field.dest_qkey = qkey; -- msg.field.dest_qpn = qpn; -- -- wqe[offset >> 3] = htole64(msg.val); --} -- --static inline void sxe2_set_remote_offset(__le64 *wqe, __u32 offset, -- __u64 remote_offset) --{ -- wqe[offset >> 3] = htole64(remote_offset); --} -- --static void sxe2_set_send_hdr(__le64 *wqe, __u32 value, -- struct sxe2_wr_info *wr_info, -- struct sxe2_qp_common *qp) --{ -- union sxe2_send_hdr hdr; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- hdr.val = 0; -- hdr.field.remote_inv_rkey = -- wr_info->rkey_to_inv | wr_info->op_info.send.ah_id; -- hdr.field.op = wr_info->op_type; -- hdr.field.addfragcnt = value; -- hdr.field.report_rtt = wr_info->report_rtt; -- hdr.field.imme_data_flag = wr_info->imm_data_valid; -- hdr.field.push_wqe = wr_info->push_wqe; -- hdr.field.read_fence = wr_info->read_fence; -- hdr.field.local_fence = wr_info->local_fence; -- hdr.field.signaled_completion = wr_info->signaled; -- hdr.field.wqe_valid = qp->swqe_polarity; -- -- udma_to_device_barrier(); -- -- set_64bit_val(wqe, 24, hdr.val); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("SND_WQE_HDR: (%#llx)\n" \ -- "remote_inv_key: %#x \n"\ -- "op : %#x \n"\ -- "addfragcnt : %d \n"\ -- "report_rtt : %d \n"\ -- "imme_data_flag: %d \n"\ -- "push_wqe : %d \n"\ -- "read_fence : %d \n"\ -- "local_fence : %d \n"\ -- "signaled : %d \n"\ -- "wqe_valid : %d\n", -- (unsigned long long)htole64(hdr.val), -- hdr.field.remote_inv_rkey, -- hdr.field.op, -- hdr.field.addfragcnt, -- hdr.field.report_rtt, -- hdr.field.imme_data_flag, -- hdr.field.push_wqe, -- hdr.field.read_fence, -- hdr.field.local_fence, -- hdr.field.signaled_completion, -- hdr.field.wqe_valid); -- } --} -- --static void sxe2_set_inlinesnd_hdr(__le64 *wqe, __u32 value, -- struct sxe2_wr_info *wr_info, -- struct sxe2_qp_common *qp) --{ -- union sxe2_send_inline_hdr hdr; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- hdr.val = 0; -- hdr.field.remote_inv_rkey = -- wr_info->rkey_to_inv | wr_info->op_info.send.ah_id; -- hdr.field.op = wr_info->op_type; -- hdr.field.report_rtt = wr_info->report_rtt; -- hdr.field.imme_data_flag = wr_info->imm_data_valid; -- hdr.field.inline_data_len = value; -- hdr.field.push_wqe = wr_info->push_wqe; -- hdr.field.inline_data_flag = 1; -- hdr.field.read_fence = wr_info->read_fence; -- hdr.field.local_fence = wr_info->local_fence; -- hdr.field.signaled_completion = wr_info->signaled; -- hdr.field.wqe_valid = qp->swqe_polarity; -- -- udma_to_device_barrier(); -- -- set_64bit_val(wqe, 24, hdr.val); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("INSND_WQE_HDR: (%#llx)\n" \ -- "remote_inv_key: %#x \n"\ -- "op : %#x \n"\ -- "report_rtt : %d \n"\ -- "imme_data_flag: %d \n"\ -- "inline_datalen: %d \n"\ -- "push_wqe : %d \n"\ -- "inline_dataflg: %d \n"\ -- "read_fence : %d \n"\ -- "local_fence : %d \n"\ -- "signaled : %d \n"\ -- "wqe_valid : %d\n", -- (unsigned long long)htole64(hdr.val), -- hdr.field.remote_inv_rkey, -- hdr.field.op, -- hdr.field.report_rtt, -- hdr.field.imme_data_flag, -- hdr.field.inline_data_len, -- hdr.field.push_wqe, -- hdr.field.inline_data_flag, -- hdr.field.read_fence, -- hdr.field.local_fence, -- hdr.field.signaled_completion, -- hdr.field.wqe_valid); -- } --} -- --static void sxe2_set_write_hdr(__le64 *wqe, __u32 value, -- struct sxe2_wr_info *wr_info, -- struct sxe2_qp_common *qp) --{ -- union sxe2_write_hdr hdr; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- hdr.val = 0; -- hdr.field.remote_key = wr_info->op_info.rdma_write.rem_addr.lkey; -- hdr.field.op = wr_info->op_type; -- hdr.field.addfragcnt = value; -- hdr.field.report_rtt = wr_info->report_rtt; -- hdr.field.imme_data_flag = wr_info->imm_data_valid; -- hdr.field.push_wqe = wr_info->push_wqe; -- hdr.field.read_fence = wr_info->read_fence; -- hdr.field.local_fence = wr_info->local_fence; -- hdr.field.signaled_completion = wr_info->signaled; -- hdr.field.wqe_valid = qp->swqe_polarity; -- -- udma_to_device_barrier(); -- -- set_64bit_val(wqe, 24, hdr.val); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("WRITE_WQE_HDR: (%#llx)\n" \ -- "remote_key : %#x \n"\ -- "op : %#x \n"\ -- "addfragcnt : %d \n"\ -- "report_rtt : %d \n"\ -- "imme_data_flag: %d \n"\ -- "push_wqe : %d \n"\ -- "read_fence : %d \n"\ -- "local_fence : %d \n"\ -- "signaled : %d \n"\ -- "wqe_valid : %d\n", -- (unsigned long long)htole64(hdr.val), -- hdr.field.remote_key, -- hdr.field.op, -- hdr.field.addfragcnt, -- hdr.field.report_rtt, -- hdr.field.imme_data_flag, -- hdr.field.push_wqe, -- hdr.field.read_fence, -- hdr.field.local_fence, -- hdr.field.signaled_completion, -- hdr.field.wqe_valid); -- } --} -- --static void sxe2_set_inlinewrite_hdr(__le64 *wqe, __u32 value, -- struct sxe2_wr_info *wr_info, -- struct sxe2_qp_common *qp) --{ -- union sxe2_write_inline_hdr hdr; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- hdr.val = 0; -- hdr.field.remote_key = wr_info->op_info.rdma_write.rem_addr.lkey; -- hdr.field.op = wr_info->op_type; -- hdr.field.report_rtt = wr_info->report_rtt; -- hdr.field.imme_data_flag = wr_info->imm_data_valid; -- hdr.field.inline_data_len = value; -- hdr.field.push_wqe = wr_info->push_wqe; -- hdr.field.inline_data_flag = 1; -- hdr.field.read_fence = wr_info->read_fence; -- hdr.field.local_fence = wr_info->local_fence; -- hdr.field.signaled_completion = wr_info->signaled; -- hdr.field.wqe_valid = qp->swqe_polarity; -- -- udma_to_device_barrier(); -- -- set_64bit_val(wqe, 24, hdr.val); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("INWRITE_WQE_HDR: (%#llx)\n" \ -- "remote_key : %#x \n"\ -- "op : %#x \n"\ -- "report_rtt : %d \n"\ -- "imme_data_flag: %d \n"\ -- "inline_datalen: %d \n"\ -- "push_wqe : %d \n"\ -- "inline_dataflg: %d \n"\ -- "read_fence : %d \n"\ -- "local_fence : %d \n"\ -- "signaled : %d \n"\ -- "wqe_valid : %d\n", -- (unsigned long long)htole64(hdr.val), -- hdr.field.remote_key, -- hdr.field.op, -- hdr.field.report_rtt, -- hdr.field.imme_data_flag, -- hdr.field.inline_data_len, -- hdr.field.push_wqe, -- hdr.field.inline_data_flag, -- hdr.field.read_fence, -- hdr.field.local_fence, -- hdr.field.signaled_completion, -- hdr.field.wqe_valid); -- } --} -- --static void sxe2_set_read_hdr(__le64 *wqe, __u32 addfragcnt, -- struct sxe2_wr_info *wr_info, -- struct sxe2_qp_common *qp, bool ord_fence) --{ -- union sxe2_read_hdr hdr; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- hdr.val = 0; -- hdr.field.remote_key = wr_info->op_info.rdma_read.rem_addr.lkey; -- hdr.field.op = wr_info->op_type; -- hdr.field.addfragcnt = addfragcnt; -- hdr.field.report_rtt = wr_info->report_rtt; -- hdr.field.push_wqe = wr_info->push_wqe; -- hdr.field.read_fence = wr_info->read_fence || ord_fence ? 1 : 0; -- hdr.field.local_fence = wr_info->local_fence; -- hdr.field.signaled_completion = wr_info->signaled; -- hdr.field.wqe_valid = qp->swqe_polarity; -- -- udma_to_device_barrier(); -- -- set_64bit_val(wqe, 24, hdr.val); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("READ_WQE_HDR: (%#llx)\n" \ -- "remote_key : %#x \n"\ -- "op : %#x \n"\ -- "addfragcnt : %d \n"\ -- "report_rtt : %d \n"\ -- "push_wqe : %d \n"\ -- "read_fence : %d \n"\ -- "local_fence : %d \n"\ -- "signaled : %d \n"\ -- "wqe_valid : %d\n", -- (unsigned long long)htole64(hdr.val), -- hdr.field.remote_key, -- hdr.field.op, -- hdr.field.addfragcnt, -- hdr.field.report_rtt, -- hdr.field.push_wqe, -- hdr.field.read_fence, -- hdr.field.local_fence, -- hdr.field.signaled_completion, -- hdr.field.wqe_valid); -- } --} -- --static void sxe2_set_bindmw_hdr(__le64 *wqe, struct sxe2_wr_info *wr_info, -- struct sxe2_qp_common *qp) --{ -- union sxe2_bindmw_hdr hdr; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- hdr.val = 0; -- hdr.field.op = wr_info->op_type; -- hdr.field.access = wr_info->op_info.bind_window.ena_reads << 2 | -- wr_info->op_info.bind_window.ena_writes << 3; -- hdr.field.va_base_flag = -- (wr_info->op_info.bind_window.addressing_type == -- SXE2_ADDR_TYPE_VA_BASED ? 1 : 0); -- hdr.field.mw_type = -- wr_info->op_info.bind_window.mem_window_type_1 ? 0 : 1; -- hdr.field.push_wqe = wr_info->push_wqe; -- hdr.field.read_fence = wr_info->read_fence; -- hdr.field.local_fence = wr_info->local_fence; -- hdr.field.signaled_completion = wr_info->signaled; -- hdr.field.wqe_valid = qp->swqe_polarity; -- -- udma_to_device_barrier(); -- -- set_64bit_val(wqe, 24, hdr.val); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("BINDMW_WQE_HDR: (%#llx)\n" \ -- "op : %#x \n"\ -- "access : %d \n"\ -- "va_base_flag : %d \n"\ -- "mw_type : %d \n"\ -- "push_wqe : %d \n"\ -- "read_fence : %d \n"\ -- "local_fence : %d \n"\ -- "signaled : %d \n"\ -- "wqe_valid : %d\n", -- (unsigned long long)htole64(hdr.val), -- hdr.field.op, -- hdr.field.access, -- hdr.field.va_base_flag, -- hdr.field.mw_type, -- hdr.field.push_wqe, -- hdr.field.read_fence, -- hdr.field.local_fence, -- hdr.field.signaled_completion, -- hdr.field.wqe_valid); -- } --} -- --static void sxe2_set_invalidate_hdr(__le64 *wqe, struct sxe2_wr_info *wr_info, -- struct sxe2_qp_common *qp) --{ -- union sxe2_inval_hdr hdr; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- hdr.val = 0; -- hdr.field.op = wr_info->op_type; -- hdr.field.push_wqe = wr_info->push_wqe; -- hdr.field.read_fence = wr_info->read_fence; -- hdr.field.local_fence = wr_info->local_fence; -- hdr.field.signaled_completion = wr_info->signaled; -- hdr.field.wqe_valid = qp->swqe_polarity; -- -- udma_to_device_barrier(); -- -- set_64bit_val(wqe, 24, hdr.val); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("LOCALINVKEY_WQE_HDR: (%#llx)\n" \ -- "op : %#x \n"\ -- "push_wqe : %d \n"\ -- "read_fence : %d \n"\ -- "local_fence : %d \n"\ -- "signaled : %d \n"\ -- "wqe_valid : %d\n", -- (unsigned long long)htole64(hdr.val), -- hdr.field.op, -- hdr.field.push_wqe, -- hdr.field.read_fence, -- hdr.field.local_fence, -- hdr.field.signaled_completion, -- hdr.field.wqe_valid); -- } --} -- --static void sxe2_set_nop_hdr(__le64 *wqe, __u32 offset, -- struct sxe2_qp_common *qp) --{ -- union sxe2_nop_hdr hdr; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- hdr.val = 0; -- hdr.field.op = SXE2_OP_TYPE_NOP; -- hdr.field.signaled_completion = false; -- hdr.field.wqe_valid = qp->swqe_polarity; -- -- udma_to_device_barrier(); -- -- set_64bit_val(wqe, offset, hdr.val); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("NOP_WQE_HDR: (%#llx)\n" \ -- "op : %#x \n"\ -- "signaled : %d \n"\ -- "wqe_valid : %d\n", -- (unsigned long long)htole64(hdr.val), -- hdr.field.op, -- hdr.field.signaled_completion, -- hdr.field.wqe_valid); -- } --} -- --static int sxe2_hw_nop(struct sxe2_qp_common *qp) --{ -- __le64 *wqe; -- __u32 wqe_idx; -- -- if (!qp->sq_ring.head) -- return EINVAL; -- -- wqe_idx = SXE2_RING_CURRENT_HEAD(qp->sq_ring); -- wqe = qp->sq_base[wqe_idx].elem; -- -- qp->sq_wrtrk_array[wqe_idx].quanta = SXE2_QP_WQE_MIN_QUANTA; -- -- set_64bit_val(wqe, 0, 0); -- set_64bit_val(wqe, 8, 0); -- set_64bit_val(wqe, 16, 0); -- -- sxe2_set_nop_hdr(wqe, 24, qp); -- -- return 0; --} -- --static void sxe2_set_rcvq_hdr(__le64 *wqe, __u32 addl_frag_cnt, -- struct sxe2_qp_common *qp) --{ -- union sxe2_rq_hdr hdr; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- hdr.val = 0; -- hdr.field.addfragcnt = addl_frag_cnt; -- hdr.field.wqe_valid = qp->rwqe_polarity; -- -- udma_to_device_barrier(); -- -- set_64bit_val(wqe, 24, hdr.val); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("RCV_WQE_HDR: (%#llx)\n" \ -- "addfragcnt : %d \n"\ -- "wqe_valid : %d\n", -- (unsigned long long)htole64(hdr.val), -- hdr.field.addfragcnt, -- hdr.field.wqe_valid); -- } --} -- --static inline void sxe2_set_immedata(__le64 *wqe, __u32 offset, __u64 imm_data) --{ -- wqe[offset >> 3] = htole64(imm_data); --} -- --static inline void sxe2_set_sgelist_data(void *wqe, __u32 offset, -- struct ibv_sge *sge, __u8 valid) --{ -- struct sxe2_frag_data *pmsg = (struct sxe2_frag_data *)wqe; -- uint32_t len; -- -- pmsg = pmsg + offset / sizeof(*pmsg); -- if (sge) { -- len = (sge->length & ((uint32_t)1 << 31)) ? 0 : sge->length; -- pmsg->tag_offset = htole64(sge->addr); -- pmsg->offset8.field.frag_valid = valid; -- pmsg->offset8.field.frag_len = len; -- pmsg->offset8.field.stag = sge->lkey; -- pmsg->offset8.val = htole64(pmsg->offset8.val); -- } else { -- pmsg->tag_offset = 0; -- pmsg->offset8.field.frag_valid = valid; -- pmsg->offset8.val = htole64(pmsg->offset8.val); -- } --} -- --static void sxe2_set_inline_data_seg_list(__u8 *wqe, struct ibv_sge *sge_list, -- __u32 num_sges, __u8 polarity) --{ -- __u8 inline_valid = (__u8)(polarity << SXE2_INLINE_VALID_S); -- __u32 quanta_bytes_remaining = 8; -- __u32 i; -- bool first_quanta = true; -- -- wqe += 8; -- -- for (i = 0; i < num_sges; i++) { -- __u8 *cur_sge = (__u8 *)(uintptr_t)sge_list[i].addr; -- __u32 sge_len = sge_list[i].length; -- -- while (sge_len) { -- __u32 bytes_copied; -- -- bytes_copied = min(sge_len, quanta_bytes_remaining); -- memcpy(wqe, cur_sge, bytes_copied); -- wqe += bytes_copied; -- cur_sge += bytes_copied; -- quanta_bytes_remaining -= bytes_copied; -- sge_len -= bytes_copied; -- -- if (!quanta_bytes_remaining) { -- quanta_bytes_remaining = 31; -- -- if (first_quanta) { -- first_quanta = false; -- wqe += 16; -- } else { -- *wqe = inline_valid; -- wqe++; -- } -- } -- } -- } -- if (!first_quanta && quanta_bytes_remaining < 31) { -- *(wqe + quanta_bytes_remaining) = inline_valid; -- } --} -- --static void sxe2_set_wqe_mw_bind(__le64 *wqe, -- struct sxe2_bind_window *op_info, -- struct sxe2_qp_common *qp) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_bindmw_info *pmsg; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- pmsg = (struct sxe2_bindmw_info *)wqe; -- pmsg->mw_va_base = htole64((uintptr_t)op_info->va); -- pmsg->offset8.field.mw_key = op_info->mw_stag; -- pmsg->offset8.field.mr_key = op_info->mr_stag; -- pmsg->offset8.val = htole64(pmsg->offset8.val); -- pmsg->offset16.field.mw_len = op_info->bind_len; -- pmsg->offset16.val = htole64(pmsg->offset16.val); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("BINDMW_WQE_INFO:\n" \ -- "mw_va_base : %p \n"\ -- "mw_key|mr_key : 0x%x|0x%x \n"\ -- "mw_len : %#llx \n", -- op_info->va, -- op_info->mw_stag, -- op_info->mr_stag, -- op_info->bind_len); -- } --} -- --static __le64 *sxe2_qp_get_next_send_wqe(struct sxe2_qp_common *qp, -- __u32 *wqe_idx, __u16 *quanta, __u32 total_size, -- __u64 wr_id, bool push_wqe) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- __le64 *wqe; -- __u32 nop_wqe_idx; -- __u16 wqe_quanta = *quanta; -- bool push_wqe_pad = false; -- __u16 avail_quanta; -- __u16 i; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- if (push_wqe && (*quanta & 0x1)) { -- *quanta = *quanta + 1; -- push_wqe_pad = true; -- } -- -- avail_quanta = qp->common_attrs->max_hw_sq_chunk - -- (SXE2_RING_CURRENT_HEAD(qp->sq_ring) % -- qp->common_attrs->max_hw_sq_chunk); -- if (*quanta <= avail_quanta) { -- if (*quanta > SXE2_SQ_RING_FREE_QUANTA(qp->sq_ring)) { -- return NULL; -- } -- } else { -- if (*quanta + avail_quanta > -- SXE2_SQ_RING_FREE_QUANTA(qp->sq_ring)) { -- return NULL; -- } -- -- nop_wqe_idx = SXE2_RING_CURRENT_HEAD(qp->sq_ring); -- for (i = 0; i < avail_quanta; i++) { -- (void)sxe2_hw_nop(qp); -- SXE2_RING_MOVE_HEAD_NOCHECK(qp->sq_ring); -- } -- if (qp->push_db && push_wqe) { -- sxe2_qp_push_wqe(qp, qp->sq_base[nop_wqe_idx].elem, -- avail_quanta, nop_wqe_idx); -- } -- } -- -- *wqe_idx = SXE2_RING_CURRENT_HEAD(qp->sq_ring); -- if (!*wqe_idx) { -- qp->swqe_polarity = !qp->swqe_polarity; -- } -- -- SXE2_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, *quanta); -- -- wqe = qp->sq_base[*wqe_idx].elem; -- -- qp->sq_wrtrk_array[*wqe_idx].wrid = wr_id; -- qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; -- qp->sq_wrtrk_array[*wqe_idx].quanta = wqe_quanta; -- -- if (push_wqe_pad) { -- __le64 *nop_wqe; -- -- nop_wqe_idx = *wqe_idx + wqe_quanta; -- nop_wqe = qp->sq_base[nop_wqe_idx].elem; -- qp->sq_wrtrk_array[nop_wqe_idx].quanta = SXE2_QP_WQE_MIN_QUANTA; -- set_64bit_val(nop_wqe, 0, 0); -- set_64bit_val(nop_wqe, 8, 0); -- set_64bit_val(nop_wqe, 16, 0); -- sxe2_set_nop_hdr(nop_wqe, 24, qp); -- } -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("POST SEND(qpn = %u): wqe_idx %u wr_id %llu"\ -- " ring_size %u sq_chunk %d pad %d push_mode %d\n", -- qp->qpn, *wqe_idx, qp->sq_wrtrk_array[*wqe_idx].wrid, -- SXE2_RING_SIZE(qp->sq_ring), qp->common_attrs->max_hw_sq_chunk, -- push_wqe_pad, push_wqe); -- } -- -- return wqe; --} -- --static __le64 *sxe2_qp_get_next_recv_wqe(struct sxe2_qp_common *qp, -- __u32 *wqe_idx) --{ -- __le64 *wqe; -- int ret_code; -- -- if (SXE2_RING_FULL_ERR(qp->rq_ring)) { -- return NULL; -- } -- -- SXE2_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code); -- if (ret_code) { -- return NULL; -- } -- -- if (!*wqe_idx) { -- qp->rwqe_polarity = !qp->rwqe_polarity; -- } -- wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem; -- -- return wqe; --} -- --int sxe2_hw_send(struct sxe2_qp_common *qp, struct sxe2_wr_info *wr_info, -- bool post_sq) --{ -- __le64 *wqe; -- struct sxe2_post_send *op_info; -- __u32 i, wqe_idx, total_size = 0, byte_off; -- int ret_code; -- __u32 frag_cnt, addl_frag_cnt; -- __u64 frag_info = 0; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- __u16 quanta; -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- op_info = &wr_info->op_info.send; -- if (qp->max_sq_sge_cnt < op_info->num_sges) { -- return EINVAL; -- } -- -- for (i = 0; i < op_info->num_sges; i++) { -- total_size += op_info->sg_list[i].length; -- } -- -- if (wr_info->imm_data_valid) { -- frag_cnt = op_info->num_sges + 1; -- } else { -- frag_cnt = op_info->num_sges; -- } -- -- ret_code = sxe2_fragcnt_to_quanta_cnt(frag_cnt, &quanta); -- if (ret_code) { -- return ret_code; -- } -- -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, -- wr_info->wr_id, wr_info->push_wqe); -- if (!wqe) { -- return ENOMEM; -- } -- -- addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; -- -- if (wr_info->imm_data_valid) { -- sxe2_set_immedata(wqe, 0, wr_info->imm_data); -- i = 0; -- } else { -- sxe2_set_sgelist_data(wqe, 0, -- frag_cnt ? op_info->sg_list : NULL, -- qp->swqe_polarity); -- i = 1; -- } -- -- if (total_size == 0) { -- get_64bit_val(wqe, 8, &frag_info); -- frag_info = frag_info & (~SXE2_WQE_FRAG_VALID); -- SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_VALID, !qp->swqe_polarity); -- set_64bit_val(wqe, 8, frag_info); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr set frag_info [%#llx] field [%d]\n", -- frag_info, !qp->swqe_polarity); -- } -- } -- -- sxe2_set_qkeyqpn(wqe, 16, op_info->qkey, op_info->dest_qp); -- -- for (byte_off = 32; i < op_info->num_sges;) { -- sxe2_set_sgelist_data(wqe, byte_off, &op_info->sg_list[i], -- qp->swqe_polarity); -- byte_off += 16; -- i++; -- } -- -- if (!(frag_cnt & 0x01) && frag_cnt) { -- sxe2_set_sgelist_data(wqe, byte_off, NULL, qp->swqe_polarity); -- } -- -- sxe2_set_send_hdr(wqe, addl_frag_cnt, wr_info, qp); -- -- if (wr_info->push_wqe) { -- sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); -- } else if (post_sq) { -- sxe2_qp_ring_normal_db(qp); -- } -- sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "send"); -- -- return 0; --} -- --int sxe2_hw_inline_send(struct sxe2_qp_common *qp, -- struct sxe2_wr_info *wr_info, bool post_sq) --{ -- __le64 *wqe; -- struct sxe2_post_send *op_info; -- __u32 wqe_idx; -- __u32 i, total_size = 0; -- __u16 quanta; -- -- op_info = &wr_info->op_info.send; -- -- if (unlikely(qp->max_sq_sge_cnt < op_info->num_sges)) { -- return EINVAL; -- } -- -- for (i = 0; i < op_info->num_sges; i++) { -- total_size += op_info->sg_list[i].length; -- } -- -- if (unlikely(total_size > qp->max_inline_data)) { -- return EINVAL; -- } -- -- quanta = sxe2_inline_to_quanta_cnt(total_size); -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, -- wr_info->wr_id, wr_info->push_wqe); -- if (!wqe) { -- return ENOMEM; -- } -- -- if (wr_info->imm_data_valid) { -- sxe2_set_immedata(wqe, 0, wr_info->imm_data); -- } -- -- sxe2_set_qkeyqpn(wqe, 16, op_info->qkey, op_info->dest_qp); -- -- sxe2_set_inline_data_seg_list((__u8 *)wqe, op_info->sg_list, -- op_info->num_sges, qp->swqe_polarity); -- -- sxe2_set_inlinesnd_hdr(wqe, total_size, wr_info, qp); -- -- if (wr_info->push_wqe) { -- sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); -- } else if (post_sq) { -- sxe2_qp_ring_normal_db(qp); -- } -- --#ifdef SXE2_CFG_DEBUG -- if ((wr_info->op_type == SXE2_OP_TYPE_SEND_INV) || -- (wr_info->op_type == SXE2_OP_TYPE_SEND_SOL_INV)) { -- qp->statistics.send_inv_cnt++; -- } else { -- qp->statistics.send_cnt++; -- } -- -- if (wr_info->signaled) { -- qp->statistics.total_signal_cnt++; -- } -- -- qp->statistics.total_sqe_cnt++; -- qp->statistics.last_send_sqwrid = wr_info->wr_id; --#endif -- -- sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "inline_send"); -- -- return 0; --} -- --int sxe2_hw_rdma_write(struct sxe2_qp_common *qp, -- struct sxe2_wr_info *wr_info, bool post_sq) --{ -- __le64 *wqe; -- struct sxe2_rdma_write *op_info; -- __u32 i, wqe_idx; -- __u32 total_size = 0, byte_off; -- int ret_code; -- __u32 frag_cnt, addl_frag_cnt; -- __u16 quanta; -- __u64 frag_info = 0; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- op_info = &wr_info->op_info.rdma_write; -- if (op_info->num_lo_sges > qp->max_sq_sge_cnt) { -- return EINVAL; -- } -- -- for (i = 0; i < op_info->num_lo_sges; i++) { -- total_size += op_info->lo_sg_list[i].length; -- } -- -- if (wr_info->imm_data_valid) { -- frag_cnt = op_info->num_lo_sges + 1; -- } else { -- frag_cnt = op_info->num_lo_sges; -- } -- -- addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; -- ret_code = sxe2_fragcnt_to_quanta_cnt(frag_cnt, &quanta); -- if (ret_code) { -- return ret_code; -- } -- -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, -- wr_info->wr_id, wr_info->push_wqe); -- if (!wqe) { -- return ENOMEM; -- } -- -- if (wr_info->imm_data_valid) { -- sxe2_set_immedata(wqe, 0, wr_info->imm_data); -- i = 0; -- } else { -- sxe2_set_sgelist_data(wqe, 0, op_info->lo_sg_list, -- qp->swqe_polarity); -- i = 1; -- } -- -- if (total_size == 0) { -- get_64bit_val(wqe, 8, &frag_info); -- frag_info = frag_info & (~SXE2_WQE_FRAG_VALID); -- SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_VALID, !qp->swqe_polarity); -- set_64bit_val(wqe, 8, frag_info); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr set frag_info [%#llx] field [%d]\n", -- frag_info, !qp->swqe_polarity); -- } -- } -- -- sxe2_set_remote_offset(wqe, 16, op_info->rem_addr.addr); -- -- for (byte_off = 32; i < op_info->num_lo_sges;) { -- sxe2_set_sgelist_data(wqe, byte_off, &op_info->lo_sg_list[i], -- qp->swqe_polarity); -- byte_off += 16; -- i++; -- } -- -- if (!(frag_cnt & 0x01) && frag_cnt) { -- sxe2_set_sgelist_data(wqe, byte_off, NULL, qp->swqe_polarity); -- } -- -- sxe2_set_write_hdr(wqe, addl_frag_cnt, wr_info, qp); -- -- if (wr_info->push_wqe) { -- sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); -- } else if (post_sq) { -- sxe2_qp_ring_normal_db(qp); -- } -- sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "write"); -- -- return 0; --} -- --int sxe2_hw_inline_rdma_write(struct sxe2_qp_common *qp, -- struct sxe2_wr_info *wr_info, bool post_sq) --{ -- __le64 *wqe; -- struct sxe2_rdma_write *op_info; -- __u32 wqe_idx; -- __u32 i, total_size = 0; -- __u16 quanta; -- -- op_info = &wr_info->op_info.rdma_write; -- -- if (unlikely(qp->max_sq_sge_cnt < op_info->num_lo_sges)) { -- return EINVAL; -- } -- -- for (i = 0; i < op_info->num_lo_sges; i++) { -- total_size += op_info->lo_sg_list[i].length; -- } -- -- if (unlikely(total_size > qp->max_inline_data)) { -- return EINVAL; -- } -- -- quanta = sxe2_inline_to_quanta_cnt(total_size); -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, -- wr_info->wr_id, wr_info->push_wqe); -- if (!wqe) { -- return ENOMEM; -- } -- -- if (wr_info->imm_data_valid) { -- sxe2_set_immedata(wqe, 0, wr_info->imm_data); -- } -- -- sxe2_set_remote_offset(wqe, 16, op_info->rem_addr.addr); -- -- sxe2_set_inline_data_seg_list((__u8 *)wqe, op_info->lo_sg_list, -- op_info->num_lo_sges, qp->swqe_polarity); -- -- sxe2_set_inlinewrite_hdr(wqe, total_size, wr_info, qp); -- -- if (wr_info->push_wqe) { -- sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); -- } else if (post_sq) { -- sxe2_qp_ring_normal_db(qp); -- } -- --#ifdef SXE2_CFG_DEBUG -- qp->statistics.write_cnt++; -- -- if (wr_info->signaled) { -- qp->statistics.total_signal_cnt++; -- } -- -- qp->statistics.total_sqe_cnt++; -- qp->statistics.last_send_sqwrid = wr_info->wr_id; --#endif -- -- sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "inline_write"); -- -- return 0; --} -- --int sxe2_hw_rdma_read(struct sxe2_qp_common *qp, -- struct sxe2_wr_info *wr_info, bool post_sq) --{ -- struct sxe2_rdma_read *op_info; -- int ret_code; -- __u32 i, byte_off, total_size = 0; -- __u32 addl_frag_cnt; -- __le64 *wqe; -- __u32 wqe_idx; -- __u16 quanta; -- __u64 frag_info = 0; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- bool ord_fence = false; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- -- op_info = &wr_info->op_info.rdma_read; -- if (qp->max_sq_sge_cnt < op_info->num_lo_sges) { -- return EINVAL; -- } -- -- for (i = 0; i < op_info->num_lo_sges; i++) { -- total_size += op_info->lo_sg_list[i].length; -- } -- -- ret_code = sxe2_fragcnt_to_quanta_cnt(op_info->num_lo_sges, &quanta); -- if (ret_code) { -- return ret_code; -- } -- if (qp->rd_fence_rate && (qp->ord_cnt++ == qp->rd_fence_rate)) { -- ord_fence = true; -- qp->ord_cnt = 0; -- } -- -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, -- wr_info->wr_id, wr_info->push_wqe); -- if (!wqe) { -- return ENOMEM; -- } -- -- addl_frag_cnt = -- op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0; -- -- sxe2_set_sgelist_data(wqe, 0, op_info->lo_sg_list, qp->swqe_polarity); -- -- if (total_size == 0) { -- get_64bit_val(wqe, 8, &frag_info); -- frag_info = frag_info & (~SXE2_WQE_FRAG_VALID); -- SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_VALID, !qp->swqe_polarity); -- set_64bit_val(wqe, 8, frag_info); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr set frag_info [%#llx] field [%d]\n", -- frag_info, !qp->swqe_polarity); -- } -- } -- -- sxe2_set_remote_offset(wqe, 16, op_info->rem_addr.addr); -- -- for (i = 1, byte_off = 32; i < op_info->num_lo_sges; ++i) { -- sxe2_set_sgelist_data(wqe, byte_off, &op_info->lo_sg_list[i], -- qp->swqe_polarity); -- byte_off += 16; -- } -- -- if (!(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) { -- sxe2_set_sgelist_data(wqe, byte_off, NULL, qp->swqe_polarity); -- } -- -- sxe2_set_read_hdr(wqe, addl_frag_cnt, wr_info, qp, ord_fence); -- -- if (wr_info->push_wqe) { -- sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); -- } else if (post_sq) { -- sxe2_qp_ring_normal_db(qp); -- } -- --#ifdef SXE2_CFG_DEBUG -- qp->statistics.read_cnt++; -- -- if (wr_info->signaled) { -- qp->statistics.total_signal_cnt++; -- } -- -- qp->statistics.total_sqe_cnt++; -- qp->statistics.last_send_sqwrid = wr_info->wr_id; --#endif -- -- sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "rdma_read"); -- -- return 0; --} -- --int sxe2_hw_mw_bind(struct sxe2_qp_common *qp, struct sxe2_wr_info *wr_info, -- bool post_sq) --{ -- __le64 *wqe; -- struct sxe2_bind_window *op_info; -- __u32 wqe_idx; -- __u16 quanta = SXE2_QP_WQE_MIN_QUANTA; -- -- op_info = &wr_info->op_info.bind_window; -- -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, -- 0, wr_info->wr_id, wr_info->push_wqe); -- if (!wqe) { -- return ENOMEM; -- } -- -- sxe2_set_wqe_mw_bind(wqe, op_info, qp); -- -- sxe2_set_bindmw_hdr(wqe, wr_info, qp); -- -- if (wr_info->push_wqe) { -- sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); -- } else if (post_sq) { -- sxe2_qp_ring_normal_db(qp); -- } -- --#ifdef SXE2_CFG_DEBUG -- qp->statistics.bind_mw_cnt++; -- -- if (wr_info->signaled) { -- qp->statistics.total_signal_cnt++; -- } -- -- qp->statistics.total_sqe_cnt++; -- qp->statistics.last_send_sqwrid = wr_info->wr_id; --#endif -- -- sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "mw_bind"); -- -- return 0; --} -- --int sxe2_hw_local_invalidate(struct sxe2_qp_common *qp, -- struct sxe2_wr_info *wr_info, bool post_sq) --{ -- __le64 *wqe; -- struct sxe2_local_invalidate *op_info; -- __u32 wqe_idx; -- __u16 quanta = SXE2_QP_WQE_MIN_QUANTA; -- struct ibv_sge sge = {}; -- -- op_info = &wr_info->op_info.local_inval; -- -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, -- 0, wr_info->wr_id, wr_info->push_wqe); -- if (!wqe) { -- return ENOMEM; -- } -- -- sge.lkey = op_info->target_stag; -- sxe2_set_sgelist_data(wqe, 0, &sge, 0); -- -- set_64bit_val(wqe, 16, 0); -- -- sxe2_set_invalidate_hdr(wqe, wr_info, qp); -- -- if (wr_info->push_wqe) { -- sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); -- } else if (post_sq) { -- sxe2_qp_ring_normal_db(qp); -- } -- --#ifdef SXE2_CFG_DEBUG -- qp->statistics.local_inv_cnt++; -- -- if (wr_info->signaled) { -- qp->statistics.total_signal_cnt++; -- } -- -- qp->statistics.total_sqe_cnt++; -- qp->statistics.last_send_sqwrid = wr_info->wr_id; --#endif -- -- sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "local_invalidate"); -- -- return 0; --} -- --static int sxe2_hw_post_receive(struct sxe2_qp_common *qp, -- struct sxe2_rq_info *wr_info) --{ -- __u32 wqe_idx, i, byte_off, sge_count_valid = 0; -- __u32 addl_frag_cnt; -- __le64 *wqe; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct ibv_sge *p_sge = NULL; -- -- uqp = container_of(qp, struct sxe2_uqp, qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- if (qp->max_rq_sge_cnt < wr_info->num_sges) { -- return EINVAL; -- } -- if (!qp->rq_size) { -- return EINVAL; -- } -- wqe = sxe2_qp_get_next_recv_wqe(qp, &wqe_idx); -- if (!wqe) { -- return ENOMEM; -- } -- for (i = 0; i < wr_info->num_sges; i++) { -- if (wr_info->sg_list[i].length != 0) { -- sge_count_valid++; -- if (p_sge == NULL) { -- p_sge = &wr_info->sg_list[i]; -- } -- } -- } -- qp->rq_wrid_array[wqe_idx] = wr_info->wr_id; -- -- addl_frag_cnt = sge_count_valid > 1 ? (sge_count_valid - 1) : 0; -- if (sge_count_valid) { -- sxe2_set_sgelist_data(wqe, 0, p_sge, qp->rwqe_polarity); -- } else { -- sxe2_set_sgelist_data(wqe, 0, NULL, qp->rwqe_polarity); -- } -- if (sge_count_valid) { -- for (i = 1, byte_off = SXE2_RQ_WQE_HEAD_OFFSET; i < wr_info->num_sges; i++) { -- if (wr_info->sg_list[i].length == 0) { -- continue; -- } -- sxe2_set_sgelist_data(wqe, byte_off, &wr_info->sg_list[i], -- qp->rwqe_polarity); -- byte_off += SXE2_RQ_WQE_FRAG_OFFSET; -- } -- if (!(sge_count_valid & SXE2_WQE_QUANTA_ODD_NUMBER) && sge_count_valid) { -- sxe2_set_sgelist_data(wqe, byte_off, NULL, qp->rwqe_polarity); -- } -- } -- set_64bit_val(wqe, 16, 0); -- -- sxe2_set_rcvq_hdr(wqe, addl_frag_cnt, qp); -- -- udma_to_device_barrier(); -- -- qp->doorbell_note[SXE2_QP_RQ_PI] = -- htole32(SXE2_RING_CURRENT_HEAD(qp->rq_ring) * qp->rq_wqe_size_multiplier); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("POST RCV(qpn = %u): wqe_idx %u wr_id %llu"\ -- " rq_pi %u ring_size %u\n", qp->qpn, wqe_idx, -- qp->rq_wrid_array[wqe_idx], -- SXE2_RING_CURRENT_HEAD(qp->rq_ring) * qp->rq_wqe_size_multiplier, -- SXE2_RING_SIZE(qp->rq_ring)); -- for (i = 0; i < wr_info->num_sges; i++) { -- SXE2_VERBS_LOG_DEBUG_BDF("sgelist[%d] addr %" PRIu64 " len [%u] lkey [%u]\n", -- i, wr_info->sg_list[i].addr, wr_info->sg_list[i].length, -- wr_info->sg_list[i].lkey); -- } -- } -- --#ifdef SXE2_CFG_DEBUG -- qp->statistics.total_rqe_cnt++; -- qp->statistics.last_send_rqwrid = wr_info->wr_id; --#endif -- -- return 0; --} --static void sxe2_wrinfo_init_inv(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, -- struct sxe2_wr_info *wr_info) --{ -- struct sxe2_uah *ah; -- -- if (ib_wr->opcode == IBV_WR_SEND || -- ib_wr->opcode == IBV_WR_SEND_WITH_IMM) { -- wr_info->op_type = SXE2_OP_TYPE_SEND; -- if (ib_wr->send_flags & IBV_SEND_SOLICITED) { -- wr_info->op_type = SXE2_OP_TYPE_SEND_SOL; -- } -- } else { -- wr_info->op_type = SXE2_OP_TYPE_SEND_INV; -- if (ib_wr->send_flags & IBV_SEND_SOLICITED) { -- wr_info->op_type = SXE2_OP_TYPE_SEND_SOL_INV; -- } -- wr_info->rkey_to_inv = ib_wr->invalidate_rkey; -- } -- wr_info->op_info.send.num_sges = (__u32)ib_wr->num_sge; -- wr_info->op_info.send.sg_list = (struct ibv_sge *)ib_wr->sg_list; -- if (ib_qp->qp_type == IBV_QPT_UD) { -- ah = container_of(ib_wr->wr.ud.ah, struct sxe2_uah, ibv_ah); -- wr_info->op_info.send.ah_id = ah->ah_id; -- wr_info->op_info.send.qkey = ib_wr->wr.ud.remote_qkey; -- wr_info->op_info.send.dest_qp = ib_wr->wr.ud.remote_qpn; -- } -- wr_info->funid = SXE2_RDMA_SEND; -- if (ib_wr->send_flags & IBV_SEND_INLINE) { -- wr_info->funid = SXE2_RDMA_SEND_INLINE; -- } --} --static int sxe2_wrinfo_init(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, -- struct sxe2_wr_info *wr_info) --{ -- struct sxe2_common_attrs *uk_attrs; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- int err = 0; -- -- uqp = container_of(ib_qp, struct sxe2_uqp, verbs_qp.qp); -- sctx = container_of(ib_qp->context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- uk_attrs = &sctx->uk_attrs; -- -- memset(wr_info, 0, sizeof(*wr_info)); -- -- wr_info->wr_id = (__u64)(ib_wr->wr_id); -- wr_info->push_wqe = uqp->qp.push_db ? true : false; -- -- if ((ib_wr->send_flags & IBV_SEND_SIGNALED) || uqp->sq_sig_all) { -- wr_info->signaled = true; -- } -- -- if (ib_wr->send_flags & IBV_SEND_FENCE) { -- wr_info->read_fence = true; -- } -- -- if (uqp->send_cq->report_rtt) { -- wr_info->report_rtt = true; -- } -- -- switch (ib_wr->opcode) { -- case IBV_WR_SEND_WITH_IMM: -- if (!(uqp->qp.qp_caps & SXE2_SEND_WITH_IMM)) { -- return EINVAL; -- } -- wr_info->imm_data_valid = true; -- wr_info->imm_data = ntohl(ib_wr->imm_data); -- SWITCH_FALLTHROUGH; -- case IBV_WR_SEND: -- case IBV_WR_SEND_WITH_INV: -- sxe2_wrinfo_init_inv(ib_qp, ib_wr, wr_info); -- break; -- case IBV_WR_RDMA_WRITE_WITH_IMM: -- if (!(uqp->qp.qp_caps & SXE2_WRITE_WITH_IMM)) { -- return EINVAL; -- } -- wr_info->imm_data_valid = true; -- wr_info->imm_data = ntohl(ib_wr->imm_data); -- SWITCH_FALLTHROUGH; -- case IBV_WR_RDMA_WRITE: -- wr_info->op_type = SXE2_OP_TYPE_RDMA_WRITE; -- if (ib_wr->send_flags & IBV_SEND_SOLICITED) { -- wr_info->op_type = SXE2_OP_TYPE_RDMA_WRITE_SOL; -- } -- -- wr_info->op_info.rdma_write.num_lo_sges = (__u32)ib_wr->num_sge; -- wr_info->op_info.rdma_write.lo_sg_list = ib_wr->sg_list; -- wr_info->op_info.rdma_write.rem_addr.addr = -- ib_wr->wr.rdma.remote_addr; -- wr_info->op_info.rdma_write.rem_addr.lkey = ib_wr->wr.rdma.rkey; -- wr_info->funid = SXE2_RDMA_WRITE; -- if (ib_wr->send_flags & IBV_SEND_INLINE) { -- wr_info->funid = SXE2_RDMA_WRITE_INLINE; -- } -- break; -- case IBV_WR_RDMA_READ: -- if (ib_wr->num_sge > (int)uk_attrs->max_hw_read_sges) { -- return EINVAL; -- } -- wr_info->op_type = SXE2_OP_TYPE_RDMA_READ; -- wr_info->op_info.rdma_read.rem_addr.addr = -- ib_wr->wr.rdma.remote_addr; -- wr_info->op_info.rdma_read.rem_addr.lkey = ib_wr->wr.rdma.rkey; -- -- wr_info->op_info.rdma_read.lo_sg_list = ib_wr->sg_list; -- wr_info->op_info.rdma_read.num_lo_sges = (__u32)ib_wr->num_sge; -- wr_info->funid = SXE2_RDMA_READ; -- break; -- case IBV_WR_LOCAL_INV: -- wr_info->op_type = SXE2_OP_TYPE_LOCAL_INV; -- wr_info->op_info.local_inval.target_stag = -- ib_wr->invalidate_rkey; -- wr_info->funid = SXE2_RDMA_LOCAL_INV; -- wr_info->post_wqe = true; -- break; -- default: -- err = EINVAL; -- break; -- } -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr_info qpn [0x%x] opcode [%d] "\ -- "wr_id %llu push_mode %d post_wqe %d signald %d\n", -- ib_qp->qp_num, ib_wr->opcode, wr_info->wr_id, -- wr_info->push_wqe, wr_info->post_wqe, wr_info->signaled); -- } -- return err; --} -- --int sxe2_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, -- struct ibv_send_wr **bad_wr) --{ -- struct sxe2_uqp *uqp = NULL; -- bool reflush = false; -- struct sxe2_wr_info wr_info; -- struct sxe2_rdma_ucontext *sctx = NULL; -- int err; -- -- uqp = container_of(ib_qp, struct sxe2_uqp, verbs_qp.qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("upost send enter, qpn [%u]\n", -- uqp->qp.qpn); -- } -- -- err = pthread_spin_lock(&uqp->lock); -- if (err) { -- return err; -- } -- -- if (!SXE2_RING_MORE_WORK(uqp->qp.sq_ring) && -- ib_qp->state == IBV_QPS_ERR) { -- reflush = true; -- } -- -- while (ib_wr) { -- err = sxe2_wrinfo_init(ib_qp, ib_wr, &wr_info); -- if (err) { -- *bad_wr = ib_wr; -- break; -- } -- --#if 0 -- wr_info.post_wqe = true; --#endif -- err = g_rdma_op[wr_info.funid](&uqp->qp, &wr_info, -- wr_info.post_wqe); -- if (err) { -- *bad_wr = ib_wr; -- break; -- } -- ib_wr = ib_wr->next; -- } -- -- if (!uqp->qp.push_db) -- sxe2_qp_ring_normal_db(&uqp->qp); -- if (reflush) -- sxe2_qp_flush_wqe(ib_qp, true, false); -- -- pthread_spin_unlock(&uqp->lock); -- -- return err; --} -- --int sxe2_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, -- struct ibv_recv_wr **bad_wr) --{ -- struct sxe2_rq_info rq_info = {}; -- struct sxe2_uqp *uqp = NULL; -- bool reflush = false; -- struct sxe2_rdma_ucontext *sctx = NULL; -- int err; -- -- uqp = container_of(ib_qp, struct sxe2_uqp, verbs_qp.qp); -- sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("upost receive enter, qpn [%u]\n", -- uqp->qp.qpn); -- } -- -- err = pthread_spin_lock(&uqp->lock); -- if (err) { -- return err; -- } -- -- if (!SXE2_RING_MORE_WORK(uqp->qp.rq_ring) && -- ib_qp->state == IBV_QPS_ERR) { -- reflush = true; -- } -- -- while (ib_wr) { -- if (ib_wr->num_sge > (int)uqp->qp.max_rq_sge_cnt) { -- *bad_wr = ib_wr; -- err = EINVAL; -- goto error; -- } -- rq_info.num_sges = (__u32)ib_wr->num_sge; -- rq_info.wr_id = ib_wr->wr_id; -- rq_info.sg_list = ib_wr->sg_list; -- err = sxe2_hw_post_receive(&uqp->qp, &rq_info); -- if (err) { -- *bad_wr = ib_wr; -- goto error; -- } -- -- if (reflush) { -- sxe2_qp_flush_wqe(ib_qp, false, true); -- } -- -- ib_wr = ib_wr->next; -- } --error: -- pthread_spin_unlock(&uqp->lock); -- -- return err; --} -- --static int sxe2_get_next_cqe(struct sxe2_cq_uk *cq, struct sxe2_cqe_info *cqe_info) --{ -- int i; -- __le64 *cqe; -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- vendor_cq = container_of(cq, struct sxe2_ucq, cq); -- sctx = container_of(vendor_cq->verbs_cq.cq.context, struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- cqe = SXE2_GET_CURRENT_CQ_ELEM(cq); -- -- cqe_info->info.buf[SXE2_CQE_SIZE - 1] = le64toh(cqe[SXE2_CQE_SIZE - 1]); -- if (cqe_info->info.field.cqe_valid != cq->polarity) { -- return -ENOENT; -- } -- -- udma_from_device_barrier(); -- -- for (i = 0; i < SXE2_CQE_SIZE - 1; i++) { -- cqe_info->info.buf[i] = le64toh(cqe[i]); -- } -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("CQ1 (%d) CI(0x%x) cqeinfo:\n"\ -- "%#llx(%#llx) %#llx(%#llx) %#llx(%#llx) %#llx(%#llx)\n"\ -- "%#llx(%#llx) %#llx(%#llx) %#llx(%#llx) %#llx(%#llx)\n", -- cq->cq_id, -- cq->cq_ring.head, -- cqe[0], cqe_info->info.buf[0], -- cqe[1], cqe_info->info.buf[1], -- cqe[2], cqe_info->info.buf[2], -- cqe[3], cqe_info->info.buf[3], -- cqe[4], cqe_info->info.buf[4], -- cqe[5], cqe_info->info.buf[5], -- cqe[6], cqe_info->info.buf[6], -- cqe[7], cqe_info->info.buf[7]); -- } -- -- return SXE2_CQ_OK; --} -- --static int sxe2_hw_flush_one_sq_wqe(struct sxe2_cq_uk *cq, -- struct sxe2_qp_common *qp, struct sxe2_cqe_info *cqe_info) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- __le64 *sw_wqe; -- __u64 wqe_hdr; -- __u32 tail; -- -- vendor_cq = container_of(cq, struct sxe2_ucq, cq); -- sctx = container_of(vendor_cq->verbs_cq.cq.context, struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- if (!SXE2_RING_MORE_WORK(qp->sq_ring) || !SXE2_RING_MORE_WORK_PAD(qp->sq_ring)) { -- return -ENOENT; -- } -- do { -- tail = qp->sq_ring.tail; -- sw_wqe = qp->sq_base[tail].elem; -- get_64bit_val(sw_wqe, 24, &wqe_hdr); -- cqe_info->info.field.op = (__u8)FIELD_GET(SXE2_WQE_OPCODE, wqe_hdr); -- SXE2_RING_SET_TAIL(qp->sq_ring, -- tail + qp->sq_wrtrk_array[tail].quanta); -- if (cqe_info->info.field.op != SXE2_OP_TYPE_NOP) { -- cqe_info->wr_id = qp->sq_wrtrk_array[tail].wrid; -- cqe_info->bytes = qp->sq_wrtrk_array[tail].wr_len; -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("flush qp %u sq_pi %u sq_ci %u wqe_idx %u wr_id %llu.\n", -- qp->qpn, SXE2_RING_CURRENT_HEAD(qp->sq_ring), -- SXE2_RING_CURRENT_TAIL(qp->sq_ring), tail, -- cqe_info->wr_id); -- } --#ifdef SXE2_CFG_DEBUG -- qp->statistics.flushed_sq_cnt++; -- qp->statistics.last_rcvd_sqwrid = cqe_info->wr_id; --#endif -- break; -- } -- } while (1); -- -- return SXE2_CQ_OK; --} -- --static void sxe2_move_srq_ring_tail(struct sxe2_srq_verbs *srq) --{ -- __u32 tail; -- while (SXE2_RING_MORE_WORK(srq->srq_ring)) { -- tail = SXE2_RING_CURRENT_TAIL(srq->srq_ring); -- if (srq->srqe_array[tail] == SXE2_SRQE_BUSY) { -- break; -- } -- SXE2_RING_MOVE_TAIL(srq->srq_ring); -- } --} --static void sxe2_hw_deal_srq_cqe(struct sxe2_rdma_ucontext *sctx, struct sxe2_cq_uk *cq, -- struct sxe2_cqe_info *cqe_info, struct sxe2_qp_common *qp, __u32 qpn) --{ -- struct sxe2_srq_verbs *srq; -- __u32 wqe_idx; -- __u32 array_idx; -- -- srq = qp->srq; -- wqe_idx = (__u32)cqe_info->info.field.wq_desc_idx; -- array_idx = wqe_idx / srq->wqe_size_multiplier; -- if (srq->srqe_array[array_idx] == SXE2_SRQE_BUSY) { -- srq->srqe_array[array_idx] = SXE2_SRQE_FREE; -- } else { -- SXE2_VERBS_LOG_ERROR_BDF("cq %d received invalid cqe"\ -- "(qpn %u srqn %u wqe_idx %u mul_size %d flag %d).\n", -- cq->cq_id, qpn, srq->srq_id, wqe_idx, -- srq->wqe_size_multiplier, srq->srqe_array[array_idx]); -- } -- cqe_info->wr_id = srq->srq_wrid_array[array_idx]; -- sxe2_move_srq_ring_tail(srq); --} --static int sxe2_hw_deal_rq_cqe(struct sxe2_rdma_ucontext *sctx, struct sxe2_cq_uk *cq, -- struct sxe2_cqe_info *cqe_info, struct sxe2_qp_common *qp, __u32 qpn) --{ -- __u32 wqe_idx; -- __u32 array_idx; -- -- wqe_idx = (__u32)cqe_info->info.field.wq_desc_idx; -- array_idx = wqe_idx / qp->rq_wqe_size_multiplier; -- if (cqe_info->info.field.major_err == SXE2_WR_FLUSH_ERR) { -- if (!SXE2_RING_MORE_WORK(qp->rq_ring)) { -- return -ENOENT; -- } -- SXE2_VERBS_LOG_DEBUG_BDF("flush qp %u state rq_pi %u rq_ci %u .\n", -- qpn, SXE2_RING_CURRENT_HEAD(qp->rq_ring), -- SXE2_RING_CURRENT_TAIL(qp->rq_ring)); -- array_idx = qp->rq_ring.tail; -- } -- cqe_info->wr_id = qp->rq_wrid_array[array_idx]; -- SXE2_RING_SET_TAIL(qp->rq_ring, array_idx + 1); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("update qp %u rq_ci %u wqeidx %u arridx %u wrid %llu.\n", -- qpn, SXE2_RING_CURRENT_TAIL(qp->rq_ring), wqe_idx, -- array_idx, cqe_info->wr_id); -- } -- return 0; --} --static int sxe2_hw_deal_sq_cqe(struct sxe2_rdma_ucontext *sctx, struct sxe2_cq_uk *cq, -- struct sxe2_cqe_info *cqe_info, struct sxe2_qp_common *qp, __u32 qpn) --{ -- __u32 wqe_idx; -- -- wqe_idx = (__u32)cqe_info->info.field.wq_desc_idx; -- -- if (cqe_info->info.field.push_dropped) { -- qp->llwqe_mode = false; -- qp->push_dropped = true; -- } -- if (cqe_info->info.field.major_err != SXE2_WR_FLUSH_ERR) { -- cqe_info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; -- cqe_info->bytes = qp->sq_wrtrk_array[wqe_idx].wr_len; -- SXE2_RING_SET_TAIL(qp->sq_ring, wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("update qp %u sq_ci %u wqe_idx %u wr_id %llu.\n", -- qpn, SXE2_RING_CURRENT_TAIL(qp->sq_ring), wqe_idx, -- cqe_info->wr_id); -- } -- } else { -- return sxe2_hw_flush_one_sq_wqe(cq, qp, cqe_info); -- } -- return 0; --} -- --static int sxe2_hw_cq_poll(struct sxe2_cq_uk *cq, -- struct sxe2_cqe_info *cqe_info) --{ -- struct sxe2_qp_common *qp; -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- struct sxe2_ring *cur_ring = NULL; -- __u32 wqe_idx; -- int ret_code = SXE2_CQ_OK; -- __le64 *cqe; -- __u64 qword = 0; -- __u32 qpn = 0; -- bool move_cq_head = true; -- -- vendor_cq = container_of(cq, struct sxe2_ucq, cq); -- sctx = container_of(vendor_cq->verbs_cq.cq.context, struct sxe2_rdma_ucontext, ibv_ctx.context); -- ret_code = sxe2_get_next_cqe(cq, cqe_info); -- if (ret_code != SXE2_CQ_OK) { -- return ret_code; -- } -- -- qp = (struct sxe2_qp_common *)(unsigned long)cqe_info->info.field.qpc; -- if (!qp || qp->destroy_pending) { -- ret_code = -EFAULT; -- goto exit; -- } -- -- qpn = qp->qpn; -- wqe_idx = (__u32)cqe_info->info.field.wq_desc_idx; -- cqe_info->bytes = cqe_info->info.field.payload_len; -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("cqe process cqn [%u] ci [%u] qp [%u] wqeidx %u.\n", -- cq->cq_id, SXE2_RING_CURRENT_HEAD(cq->cq_ring), qpn, wqe_idx); -- } -- if (cqe_info->info.field.is_srq) { -- sxe2_hw_deal_srq_cqe(sctx, cq, cqe_info, qp, qpn); -- } else if (cqe_info->info.field.qp_type == SXE2_CQE_QTYPE_RQ) { -- cur_ring = &qp->rq_ring; -- ret_code = sxe2_hw_deal_rq_cqe(sctx, cq, cqe_info, qp, qpn); -- if (ret_code) { -- goto exit; -- } -- } else { -- cur_ring = &qp->sq_ring; -- ret_code = sxe2_hw_deal_sq_cqe(sctx, cq, cqe_info, qp, qpn); -- if (ret_code) { -- goto exit; -- } -- } -- --exit: -- if (!ret_code && cqe_info->info.field.major_err == SXE2_WR_FLUSH_ERR) { -- if (cur_ring && SXE2_RING_MORE_WORK(*cur_ring)) { -- move_cq_head = false; -- } -- } -- -- if (move_cq_head) { -- SXE2_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); -- if (!SXE2_RING_CURRENT_HEAD(cq->cq_ring)) { -- cq->polarity ^= 1; -- } -- -- SXE2_RING_MOVE_TAIL(cq->cq_ring); -- cq->doorbell_note[SXE2_CQ_SET_CI] = -- htole32(SXE2_RING_CURRENT_HEAD(cq->cq_ring)); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("update qp %u cq %d ci %u.\n", -- qpn, cq->cq_id, SXE2_RING_CURRENT_HEAD(cq->cq_ring)); -- } -- } else { -- cqe = SXE2_GET_CURRENT_CQ_ELEM(cq); -- get_64bit_val(cqe, 24, &qword); -- qword &= ~SXE2_CQE_WQEIDX; -- qword |= FIELD_PREP(SXE2_CQE_WQEIDX, cur_ring->tail); -- set_64bit_val(cqe, 24, qword); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("cqn [%u] idx [%u] flush wqeidx %d to %d qpn %u.\n", -- cq->cq_id, SXE2_RING_CURRENT_HEAD(cq->cq_ring), -- wqe_idx, cur_ring->tail, qpn); -- } -- } -- -- return ret_code; --} -- --static enum ibv_wc_status sxe2_flush_err_to_ib_wc_status(enum sxe2_major_opcode opcode) --{ -- switch (opcode) { -- case SXE2_LOCAL_PROTECTION_ERR: -- return IBV_WC_LOC_PROT_ERR; -- case SXE2_BAD_RESPONSE_ERR: -- return IBV_WC_BAD_RESP_ERR; -- case SXE2_REMOTE_ACCESS_ERR: -- return IBV_WC_REM_ACCESS_ERR; -- case SXE2_LOCAL_QP_OP_ERR: -- return IBV_WC_LOC_QP_OP_ERR; -- case SXE2_REMOTE_OPERATION_ERR: -- return IBV_WC_REM_OP_ERR; -- case SXE2_LOCAL_LEN_ERR: -- return IBV_WC_LOC_LEN_ERR; -- case SXE2_LOCAL_ACCESS_ERR: -- return IBV_WC_LOC_ACCESS_ERR; -- case SXE2_WR_FLUSH_ERR: -- return IBV_WC_WR_FLUSH_ERR; -- case SXE2_TRANS_RETRY_CNT_EXCEED_ERR: -- return IBV_WC_RETRY_EXC_ERR; -- case SXE2_MW_BIND_ERR: -- return IBV_WC_MW_BIND_ERR; -- case SXE2_REMOTE_INVALID_REQUEST_ERR: -- return IBV_WC_REM_INV_REQ_ERR; -- case SXE2_RNR_RETRY_CNT_EXCEED_ERR: -- return IBV_WC_RNR_RETRY_EXC_ERR; -- default: -- return IBV_WC_GENERAL_ERR; -- } --} -- --static enum ibv_wc_opcode sxe2_get_ib_wc_op_sq(struct sxe2_cqe_info *cur_cqe) --{ -- enum ibv_wc_opcode opcode; -- -- switch (cur_cqe->info.field.op) { -- case SXE2_OP_TYPE_RDMA_WRITE: -- case SXE2_OP_TYPE_RDMA_WRITE_SOL: -- opcode = IBV_WC_RDMA_WRITE; -- break; -- case SXE2_OP_TYPE_RDMA_READ: -- opcode = IBV_WC_RDMA_READ; -- break; -- case SXE2_OP_TYPE_SEND_SOL: -- case SXE2_OP_TYPE_SEND_SOL_INV: -- case SXE2_OP_TYPE_SEND_INV: -- case SXE2_OP_TYPE_SEND: -- opcode = IBV_WC_SEND; -- break; -- case SXE2_OP_TYPE_BIND_MW: -- opcode = IBV_WC_BIND_MW; -- break; -- case SXE2_OP_TYPE_LOCAL_INV: -- opcode = IBV_WC_LOCAL_INV; -- break; -- default: -- opcode = IBV_WC_DRIVER1; -- break; -- } -- return opcode; --} -- --static enum ibv_wc_opcode sxe2_get_ib_wc_op_rq(struct sxe2_cqe_info *cur_cqe) --{ -- enum ibv_wc_opcode opcode; -- -- if (cur_cqe->info.field.imm_data_flag) { -- opcode = IBV_WC_RECV_RDMA_WITH_IMM; -- } else { -- opcode = IBV_WC_RECV; -- } -- -- return opcode; --} -- --static void sxe2_cq_cqe_ext(struct sxe2_cqe_info *cur_cqe) --{ -- struct sxe2_ucq *ucq = container_of(cur_cqe, struct sxe2_ucq, cur_cqe); -- struct ibv_cq_ex *ibvcq_ex = &ucq->verbs_cq.cq_ex; -- ibvcq_ex->wr_id = cur_cqe->wr_id; -- if (cur_cqe->info.field.error) { -- ibvcq_ex->status = sxe2_flush_err_to_ib_wc_status(cur_cqe->info.field.major_err); -- } else { -- ibvcq_ex->status = IBV_WC_SUCCESS; -- } --} -- --static void sxe2_cq_pollinfo_to_wc(struct ibv_wc *entry, struct sxe2_cqe_info *cur_cqe) --{ -- struct sxe2_qp_common *qp; -- struct ibv_qp *ib_qp; -- -- entry->wc_flags = 0; -- entry->wr_id = cur_cqe->wr_id; -- entry->qp_num = cur_cqe->info.field.qp_id; -- qp = (struct sxe2_qp_common *)(unsigned long)cur_cqe->info.field.qpc; -- ib_qp = qp->back_qp; -- -- if (cur_cqe->info.field.error) { -- entry->status = sxe2_flush_err_to_ib_wc_status(cur_cqe->info.field.major_err); -- entry->vendor_err = (uint32_t)(cur_cqe->info.field.major_err << 16 | -- cur_cqe->info.field.minor_err); -- } else { -- entry->status = IBV_WC_SUCCESS; -- } -- -- if (cur_cqe->info.field.imm_data_flag) { -- entry->imm_data = htonl(cur_cqe->info.field.imme_data); -- entry->wc_flags |= IBV_WC_WITH_IMM; -- } -- -- if (cur_cqe->info.field.qp_type == SXE2_CQE_QTYPE_SQ) { -- entry->opcode = sxe2_get_ib_wc_op_sq(cur_cqe); -- } else { -- entry->opcode = sxe2_get_ib_wc_op_rq(cur_cqe); -- if (ib_qp->qp_type != IBV_QPT_UD && -- cur_cqe->info.field.stag_or_lrkey) { -- entry->invalidated_rkey = cur_cqe->info.field.l_r_key; -- entry->wc_flags |= IBV_WC_WITH_INV; -- } -- } -- -- if (ib_qp->qp_type == IBV_QPT_UD) { -- entry->src_qp = cur_cqe->info.field.ud_src_qpn; -- entry->wc_flags |= IBV_WC_GRH; -- } else { -- entry->src_qp = cur_cqe->info.field.qp_id; -- } -- entry->byte_len = cur_cqe->bytes; --} -- --static int sxe2_poll_one(struct sxe2_cq_uk *ukcq, struct sxe2_cqe_info *cur_cqe, -- struct ibv_wc *entry) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- vendor_cq = container_of(ukcq, struct sxe2_ucq, cq); -- sctx = container_of(vendor_cq->verbs_cq.cq.context, struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- int ret = sxe2_hw_cq_poll(ukcq, cur_cqe); -- if (ret) { -- if (ret == -EFAULT) { -- SXE2_VERBS_LOG_WARN_BDF("CQ %d skip cqe(CI 0x%x) for qp destroy.\n", -- ukcq->cq_id, ukcq->cq_ring.head); -- } -- return ret; -- } -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("CQ (%d) CI(0x%x) wrid(%llu) cqeinfo:\n"\ -- "payload_len : %ul\n"\ -- "packet_seq : %d\n"\ -- "qpc : %#llx\n"\ -- "l_r_key : %ul\n"\ -- "qp_id : %d\n"\ -- "minor_err : %d\n"\ -- "major_err : %d\n"\ -- "wq_desc_idx : %d\n"\ -- "extended_cqe : %d\n"\ -- "push_dropped : %d\n"\ -- "ipv4 : %d\n"\ -- "stag_or_lrkey : %d\n"\ -- "solicited_evt : %d\n"\ -- "error : %d\n"\ -- "op : %d\n"\ -- "qp_type : %d\n"\ -- "imme_data : %ul\n"\ -- "srqn : %d\n"\ -- "is_srq : %d\n"\ -- "cqe_timestamp : %#llx\n"\ -- "ud_smac : %#llx\n"\ -- "ud_vlan_tag : %d\n"\ -- "ud_src_qpn : %d\n"\ -- "vsi_index : %d\n"\ -- "vlan_tag_flag : %d\n"\ -- "ud_smac_valid : %d\n"\ -- "imm_data_flag : %d\n"\ -- "cqe_valid : %d\n", -- ukcq->cq_id, -- ukcq->cq_ring.head, -- cur_cqe->wr_id, -- cur_cqe->info.field.payload_len, -- cur_cqe->info.field.packet_seq, -- cur_cqe->info.field.qpc, -- cur_cqe->info.field.l_r_key, -- cur_cqe->info.field.qp_id, -- cur_cqe->info.field.minor_err, -- cur_cqe->info.field.major_err, -- cur_cqe->info.field.wq_desc_idx, -- cur_cqe->info.field.extended_cqe, -- cur_cqe->info.field.push_dropped, -- cur_cqe->info.field.ipv4, -- cur_cqe->info.field.stag_or_lrkey, -- cur_cqe->info.field.solicited_evt, -- cur_cqe->info.field.error, -- cur_cqe->info.field.op, -- cur_cqe->info.field.qp_type, -- cur_cqe->info.field.imme_data, -- cur_cqe->info.field.srqn, -- cur_cqe->info.field.is_srq, -- cur_cqe->info.field.cqe_timestamp, -- (__u64)cur_cqe->info.field.ud_smac, -- cur_cqe->info.field.ud_vlan_tag, -- cur_cqe->info.field.ud_src_qpn, -- cur_cqe->info.field.vsi_index, -- cur_cqe->info.field.vlan_tag_flag, -- cur_cqe->info.field.ud_smac_valid, -- cur_cqe->info.field.imm_data_flag, -- cur_cqe->info.field.cqe_valid); -- } -- -- if (!entry) { -- sxe2_cq_cqe_ext(cur_cqe); -- } else { -- sxe2_cq_pollinfo_to_wc(entry, cur_cqe); -- } -- -- return 0; --} -- --int sxe2_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc) --{ -- struct sxe2_ucq *ucq = NULL; -- struct sxe2_cqe_info *cur_cqe; -- int ret; -- int npolled = 0; -- -- ucq = container_of(cq, struct sxe2_ucq, verbs_cq.cq); -- cur_cqe = &ucq->cur_cqe; -- -- ret = pthread_spin_lock(&ucq->lock); -- if (ret) { -- return -ret; -- } -- while (npolled < num_entries) { -- ret = sxe2_poll_one(&ucq->cq, cur_cqe, wc + npolled); -- if (ret == SXE2_CQ_OK) { -- ++npolled; -- continue; -- } -- if (ret == -ENOENT) { -- break; -- } -- } -- -- pthread_spin_unlock(&ucq->lock); -- return npolled; --} -- --static void sxe2_cq_arm_notify(struct sxe2_cq_uk *arm_cq, -- enum sxe2_arm_type arm_type) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- uint64_t doorbell; -- uint32_t sn; -- uint32_t ci; -- uint32_t cmd; -- -- vendor_cq = container_of(arm_cq, struct sxe2_ucq, cq); -- sctx = container_of(vendor_cq->verbs_cq.cq.context, struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- sn = arm_cq->arm_sn & 3; -- ci = SXE2_RING_CURRENT_HEAD(arm_cq->cq_ring); -- -- if (arm_type == SXE2_CQ_ARM_SOLICITED) { -- cmd = (uint32_t)SXE2_CQ_DB_REQ_SOLICITED; -- } else { -- cmd = SXE2_CQ_DB_REQ_NOSOLICITED; -- } -- -- doorbell = 0; -- doorbell = sn << 29 | cmd | ci; -- doorbell <<= 32; -- doorbell |= arm_cq->cq_id; -- -- arm_cq->doorbell_note[SXE2_CQ_ARM_DB] = htole32(sn << 29 | cmd | ci); -- -- udma_to_device_barrier(); -- -- set_64bit_val(arm_cq->cqe_alloc_db, 0, doorbell); -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("CQ (%u -> %p) ARM NOTIFY DBNOTE(0x%x)"\ -- " DB(0x%lx) CI(%u) SN(%u) CMD(%u)\n", arm_cq->cq_id, -- arm_cq->cqe_alloc_db, htole32(sn << 29 | cmd | ci), -- htole64(doorbell), ci, sn, cmd); -- } --} -- --int sxe2_uarm_cq(struct ibv_cq *cq, int solicited) --{ -- struct sxe2_ucq *vendor_cq = NULL; -- enum sxe2_arm_type arm_type; -- int ret; -- -- arm_type = solicited ? SXE2_CQ_ARM_SOLICITED : SXE2_CQ_ARM_NEXT; -- vendor_cq = container_of(cq, struct sxe2_ucq, verbs_cq.cq); -- ret = pthread_spin_lock(&vendor_cq->lock); -- if (ret) { -- return ret; -- } -- -- sxe2_cq_arm_notify(&vendor_cq->cq, arm_type); -- -- pthread_spin_unlock(&vendor_cq->lock); -- -- return 0; --} -- --void sxe2_ucq_event(struct ibv_cq *cq) --{ -- struct sxe2_ucq *vendor_cq = NULL; -- struct sxe2_cq_uk *arm_cq; -- -- vendor_cq = container_of(cq, struct sxe2_ucq, verbs_cq.cq); -- arm_cq = &vendor_cq->cq; -- if (pthread_spin_lock(&vendor_cq->lock)) { -- return; -- } -- -- arm_cq->arm_sn += 1; -- -- pthread_spin_unlock(&vendor_cq->lock); --} -- --static void sxe2_clean_base_cq(struct sxe2_qp_common *qp, struct sxe2_cq_uk *cq, int cq_type) --{ -- __le64 *cqe; -- __u64 qword, qpc; -- __u32 cq_head; -- __u8 polarity, cq_polarity; -- -- cq_head = cq->cq_ring.head; -- cq_polarity = cq->polarity; -- do { -- cqe = ((struct sxe2_cqe *)(cq->cq_base))[cq_head].buf; -- get_64bit_val(cqe, 56, &qword); -- polarity = (__u8)FIELD_GET(SXE2_CQE_VALID, qword); -- if (polarity != cq_polarity) { -- break; -- } -- -- get_64bit_val(cqe, 8, &qpc); -- if ((void *)(uintptr_t)qpc == (void *)qp) { -- set_64bit_val(cqe, 8, 0); --#ifdef SXE2_CFG_DEBUG -- if (cq_type == SQ_CQ) { -- qp->statistics.cleaned_sq_cnt++; -- } else { -- qp->statistics.cleaned_rq_cnt++; -- } --#endif -- } -- -- cq_head = (cq_head + 1) % cq->cq_ring.size; -- if (!cq_head) { -- cq_polarity ^= 1; -- } -- } while (true); --} -- --void sxe2_clean_cqes(struct sxe2_qp_common *qp, -- struct sxe2_ucq *vendor_cq, int cq_type) --{ -- struct sxe2_cq_uk *cq = &vendor_cq->cq; -- int ret; -- -- ret = pthread_spin_lock(&vendor_cq->lock); -- if (ret) { -- return; -- } -- -- sxe2_clean_base_cq(qp, cq, cq_type); -- -- pthread_spin_unlock(&vendor_cq->lock); --} -- --int sxe2_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr) --{ -- struct sxe2_ucq *vendor_cq = NULL; -- struct sxe2_cqe_info *cur_cqe; -- struct sxe2_rdma_ucontext *sctx = NULL; -- int ret; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- ret = pthread_spin_lock(&vendor_cq->lock); -- if (ret) { -- return ret; -- } -- -- cur_cqe = &vendor_cq->cur_cqe; -- ret = sxe2_poll_one(&vendor_cq->cq, cur_cqe, NULL); -- if (ret == SXE2_CQ_OK) { -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("sxe2 start poll, cqn [%u] wr_id %"PRIu64" succeed.\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id); -- } -- return SXE2_CQ_OK; -- } -- -- if (ret == SXE2_CQ_NOENT) { -- ret = ENOENT; -- } -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("sxe2 start poll, cqn [%u] wr_id %"PRIu64" failed[%d].\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, ret); -- } -- -- pthread_spin_unlock(&vendor_cq->lock); -- -- return ret; --} -- --int sxe2_next_poll(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_ucq *vendor_cq = NULL; -- struct sxe2_cqe_info *cur_cqe; -- struct sxe2_rdma_ucontext *sctx = NULL; -- int ret; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("sxe2 next poll, cqn [%u] wr_id %"PRIu64".\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id); -- } -- cur_cqe = &vendor_cq->cur_cqe; -- ret = sxe2_poll_one(&vendor_cq->cq, cur_cqe, NULL); -- if (ret == SXE2_CQ_NOENT) { -- return ENOENT; -- } -- -- return ret; --} -- --void sxe2_end_poll(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("sxe2 end poll, cqn [%u] wr_id %"PRIu64".\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id); -- } -- -- pthread_spin_unlock(&vendor_cq->lock); --} -- --uint64_t sxe2_wc_read_completion_ts(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- uint64_t timestamp; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- timestamp = vendor_cq->cur_cqe.info.field.cqe_timestamp / HCA_CORE_CLOCK_500_MHZ; -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read comp ts, cqn [%u] wr_id %"PRIu64" ret %"PRIu64"\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, timestamp); -- } -- return timestamp; --} -- --uint64_t sxe2_wc_read_completion_wallclock_ns(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- uint64_t timestamp_ns; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- timestamp_ns = vendor_cq->cur_cqe.info.field.cqe_timestamp * 1000; -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read timestamp ns, cqn [%u] wr_id %"PRIu64" ret %"PRIu64"\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, timestamp_ns); -- } -- -- return timestamp_ns; --} -- --enum ibv_wc_opcode sxe2_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_ucq *vendor_cq = NULL; -- struct sxe2_cqe_info *cur_cqe; -- struct sxe2_rdma_ucontext *sctx = NULL; -- enum ibv_wc_opcode opcode; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- cur_cqe = &vendor_cq->cur_cqe; -- -- if (cur_cqe->info.field.qp_type == SXE2_CQE_QTYPE_SQ) { -- opcode = sxe2_get_ib_wc_op_sq(cur_cqe); -- } else { -- opcode = sxe2_get_ib_wc_op_rq(cur_cqe); -- } -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read opcode, cqn [%u] wr_id %"PRIu64" ret %d\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, opcode); -- } -- -- return opcode; --} -- --uint32_t sxe2_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_cqe_info *cur_cqe; -- struct sxe2_ucq *vendor_cq = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- __u32 error = 0; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- cur_cqe = &vendor_cq->cur_cqe; -- if (cur_cqe->info.field.error) { -- error = (__u32)(cur_cqe->info.field.major_err << 16 | cur_cqe->info.field.minor_err); -- } -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read vendor error, cqn [%u] wr_id %"PRIu64" ret %u\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, error); -- } -- -- return error; --} -- --unsigned int sxe2_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_cqe_info *cur_cqe; -- struct sxe2_ucq *vendor_cq = NULL; -- struct sxe2_qp_common *qp; -- struct ibv_qp *ib_qp; -- unsigned int wc_flags = 0; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- cur_cqe = &vendor_cq->cur_cqe; -- qp = (struct sxe2_qp_common *)cur_cqe->info.field.qpc; -- ib_qp = qp->back_qp; -- -- if (cur_cqe->info.field.imm_data_flag) { -- wc_flags |= IBV_WC_WITH_IMM; -- } -- -- if (ib_qp->qp_type == IBV_QPT_UD) { -- wc_flags |= IBV_WC_GRH; -- } else { -- if ((cur_cqe->info.field.qp_type == SXE2_CQE_QTYPE_RQ) -- && (cur_cqe->info.field.stag_or_lrkey)) { -- wc_flags |= IBV_WC_WITH_INV; -- } -- } -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read wc flags, cqn [%u] wr_id %"PRIu64" ret %u\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, wc_flags); -- } -- -- return wc_flags; --} -- --uint32_t sxe2_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- __u32 bytes; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, -- verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- bytes = vendor_cq->cur_cqe.bytes; -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read length, cqn [%u] wr_id %"PRIu64" ret %u\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, bytes); -- } -- -- return bytes; --} -- --__be32 sxe2_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_cqe_info *cur_cqe; -- struct sxe2_ucq *vendor_cq = NULL; -- __u32 imme_data = 0; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- cur_cqe = &vendor_cq->cur_cqe; -- -- if (cur_cqe->info.field.imm_data_flag) { -- imme_data = cur_cqe->info.field.imme_data; -- } -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read imme data, cqn [%u] wr_id %"PRIu64" ret %u\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, imme_data); -- } -- return htonl(imme_data); --} -- --uint32_t sxe2_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- __u32 qpn; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, -- verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- qpn = vendor_cq->cur_cqe.info.field.qp_id; -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read qpn, cqn [%u] wr_id %"PRIu64" ret %u\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, qpn); -- } -- return qpn; --} -- --uint32_t sxe2_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_cqe_info *cur_cqe; -- struct sxe2_ucq *vendor_cq = NULL; -- struct sxe2_qp_common *qp; -- struct ibv_qp *ib_qp; -- __u32 src_qpn; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- cur_cqe = &vendor_cq->cur_cqe; -- qp = (struct sxe2_qp_common *)cur_cqe->info.field.qpc; -- ib_qp = qp->back_qp; -- if (ib_qp->qp_type == IBV_QPT_UD) { -- src_qpn = cur_cqe->info.field.ud_src_qpn; -- } else { -- src_qpn = cur_cqe->info.field.qp_id; -- } -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read src qpn, cqn [%u] wr_id %"PRIu64" ret %u\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, src_qpn); -- } -- return src_qpn; --} -- --uint32_t sxe2_wc_read_slid(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read src lid, cqn [%u] wr_id %"PRIu64" ret %u\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, 0); -- } -- -- return 0; --} -- --uint8_t sxe2_wc_read_sl(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read sl, cqn [%u] wr_id %"PRIu64" ret %u\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, 0); -- } -- -- return 0; --} -- --uint8_t sxe2_wc_read_dlid_path_bits(struct ibv_cq_ex *ibvcq_ex) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_ucq *vendor_cq = NULL; -- -- vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); -- sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wc read dlid path bits, cqn [%u] wr_id %"PRIu64" ret %u\n", -- vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, 0); -- } -- -- return 0; --} -- --static __le64 *sxe2_srq_get_next_recv_wqe(struct sxe2_srq_verbs *srq, __u32 *wqe_idx) --{ -- int ret_code; -- __le64 *wqe; -- -- if (SXE2_RING_FULL_ERR(srq->srq_ring)) -- return NULL; -- -- *wqe_idx = SXE2_RING_CURRENT_HEAD(srq->srq_ring); -- if (srq->srqe_array[*wqe_idx] == SXE2_SRQE_BUSY) -- return NULL; -- -- SXE2_RING_MOVE_HEAD(srq->srq_ring, ret_code); -- if (ret_code) -- return NULL; -- -- srq->srqe_array[*wqe_idx] = SXE2_SRQE_BUSY; -- -- if (!*wqe_idx) -- srq->srq_polarity = !srq->srq_polarity; -- -- wqe = srq->srq_base[*wqe_idx * srq->wqe_size_multiplier].elem; -- -- return wqe; --} -- --static int sxe2_hw_srq_post_receive(struct sxe2_srq_verbs *srq, -- struct sxe2_rq_info *info) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_usrq *usrq = NULL; -- __u32 wqe_idx, i, byte_off; -- __u32 addl_sge_cnt; -- __le64 *wqe; -- __u64 hdr; -- -- usrq = container_of(srq, struct sxe2_usrq, srq_verbs); -- sctx = to_sctx(usrq->vsrq.srq.context); -- -- if (srq->max_srq_frag_cnt < info->num_sges) -- return -EINVAL; -- -- wqe = sxe2_srq_get_next_recv_wqe(srq, &wqe_idx); -- if (!wqe) -- return -ENOMEM; -- -- srq->srq_wrid_array[wqe_idx] = info->wr_id; -- -- addl_sge_cnt = info->num_sges > 1 ? info->num_sges - 1 : 0; -- sxe2_set_sgelist_data(wqe, 0, info->sg_list, srq->srq_polarity); -- -- for (i = 1, byte_off = 32; i < info->num_sges; i++) { -- sxe2_set_sgelist_data(wqe, byte_off, &info->sg_list[i], srq->srq_polarity); -- byte_off += 16; -- } -- -- if (!(info->num_sges & 0x01) && info->num_sges) { -- sxe2_set_sgelist_data(wqe, byte_off, NULL, srq->srq_polarity); -- } -- -- hdr = FIELD_PREP(SXE2_WQE_ADDSGECNT, addl_sge_cnt) | -- FIELD_PREP(SXE2_WQE_VALID, srq->srq_polarity); -- -- udma_to_device_barrier(); -- -- set_64bit_val(wqe, 24, hdr); -- -- set_64bit_val(srq->db_note, 0, -- (__u64)SXE2_RING_CURRENT_HEAD(srq->srq_ring) * srq->wqe_size_multiplier); -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("POST SRQ RCV(srqn = %u): wqe_idx %u "\ -- "wr_id %llu ring_size %u mul_size %d\n", -- srq->srq_id, wqe_idx, srq->srq_wrid_array[wqe_idx], -- SXE2_RING_SIZE(srq->srq_ring), srq->wqe_size_multiplier); -- for (i = 0; i < info->num_sges; i++) { -- SXE2_VERBS_LOG_DEBUG_BDF("sgelist[%d] addr %" PRIu64 " len [%u] lkey [%u]\n", -- i, info->sg_list[i].addr, info->sg_list[i].length, -- info->sg_list[i].lkey); -- } -- } -- -- return 0; --} -- --int sxe2_upost_srq_recv(struct ibv_srq *ibv_srq, -- struct ibv_recv_wr *ibv_wr, struct ibv_recv_wr **bad_wr) --{ -- struct sxe2_usrq *usrq = to_usrq(ibv_srq); -- struct sxe2_srq_verbs *verbs_srq = &usrq->srq_verbs; -- struct sxe2_rq_info rq_info = {}; -- int err = 0; -- -- pthread_spin_lock(&usrq->lock); -- while (ibv_wr) { -- if (ibv_wr->num_sge > (int)verbs_srq->max_srq_frag_cnt) { -- err = -EINVAL; -- goto out; -- } -- -- rq_info.num_sges = (__u32)ibv_wr->num_sge; -- rq_info.wr_id = ibv_wr->wr_id; -- rq_info.sg_list = ibv_wr->sg_list; -- err = sxe2_hw_srq_post_receive(verbs_srq, &rq_info); -- if (err) -- goto out; -- -- ibv_wr = ibv_wr->next; -- } -- --out: -- pthread_spin_unlock(&usrq->lock); -- -- if (err) -- *bad_wr = ibv_wr; -- -- return err; --} -- --static __le64 *sxe2_init_wqe(struct sxe2_uqp *uqp, __u64 wr_id, unsigned int opcode, bool ord_fence) --{ -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_qp_common *qp; -- unsigned int send_flags = uqp->verbs_qp.qp_ex.wr_flags; -- __le64 *wqe; -- __u64 hdr = 0; -- -- qp = &uqp->qp; -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("init wqe (qpn = %u): wr_id %"PRIu64" "\ -- "opcode %u wr_flags %x ring_size %u \n", -- uqp->verbs_qp.qp_ex.qp_base.qp_num, -- uqp->verbs_qp.qp_ex.wr_id, opcode, -- send_flags, SXE2_RING_SIZE(qp->sq_ring)); -- } -- -- if (SXE2_RING_FULL_ERR(qp->sq_ring)) { -- uqp->wqe_hdr = NULL; -- uqp->err = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("sq wqe buf full, qpn [%u]\n", -- uqp->verbs_qp.qp_ex.qp_base.qp_num); -- return NULL; -- } -- -- wqe = (__u64 *)uqp->wqebuf; -- memset(wqe, 0, SXE2_QP_WQE_MIN_SIZE); -- -- SXE2_SET_FIELD(hdr, SXE2_WQE_OPCODE, opcode); -- if ((send_flags & IBV_SEND_SIGNALED) || uqp->sq_sig_all) { -- SXE2_SET_FIELD(hdr, SXE2_WQE_SIGCOMPL, true); -- } -- -- if (send_flags & IBV_SEND_FENCE || ord_fence) { -- SXE2_SET_FIELD(hdr, SXE2_WQE_READFENCE, true); -- } -- -- if (uqp->send_cq->report_rtt) { -- SXE2_SET_FIELD(hdr, SXE2_WQE_REPORTRTT, true); -- } -- -- if(uqp->qp.push_db) { -- SXE2_SET_FIELD(hdr, SXE2_WQE_PUSHWQE, true); -- } -- -- set_64bit_val(wqe, 24, hdr); -- -- uqp->wqe_hdr = wqe; -- -- return wqe; --} -- --void sxe2_wr_start(struct ibv_qp_ex *qp_ex) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- enum ibv_qp_state state = qp_ex->qp_base.state; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr start enter, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- -- if (state < IBV_QPS_RTS) { -- uqp->err = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF("qp state err [%d] qpn [%u]\n", -- state, qp_ex->qp_base.qp_num); -- return; -- } -- -- (void)pthread_spin_lock(&uqp->lock); -- -- uqp->rb_sq_head = SXE2_RING_CURRENT_HEAD(uqp->qp.sq_ring); -- uqp->err = 0; --} -- --int sxe2_wr_complete(struct ibv_qp_ex *qp_ex) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_qp_common *qp; -- struct sxe2_rdma_ucontext *sctx = NULL; -- bool push_wqe; -- int err; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- err = uqp->err; -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr complete enter, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- if (unlikely(err)) { -- SXE2_RING_CURRENT_HEAD(uqp->qp.sq_ring) = uqp->rb_sq_head; -- SXE2_VERBS_LOG_ERROR_BDF("qp err [%d] qpn [%u]\n", -- err, qp_ex->qp_base.qp_num); -- goto out; -- } -- -- qp = &uqp->qp; -- -- push_wqe = qp->push_db ? true : false; -- -- if (push_wqe) -- sxe2_qp_push_wqe(qp, uqp->cur_wqe, uqp->quanta, uqp->wqe_idx); -- else -- sxe2_qp_ring_normal_db(qp); -- -- sxe2_dump_wqe(qp, uqp->cur_wqe, uqp->quanta, uqp->wqe_idx, "new_send"); -- --out: -- pthread_spin_unlock(&uqp->lock); -- -- return err; --} -- --void sxe2_wr_abort(struct ibv_qp_ex *qp_ex) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr abort enter, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- SXE2_RING_CURRENT_HEAD(uqp->qp.sq_ring) = uqp->rb_sq_head; -- -- pthread_spin_unlock(&uqp->lock); --} -- --void sxe2_wr_send_rc_ud(struct ibv_qp_ex *qp_ex) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- unsigned int send_flags = qp_ex->wr_flags; -- unsigned int opcode = SXE2_OP_TYPE_SEND; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr send enter, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- -- if (send_flags & IBV_SEND_SOLICITED) -- opcode = SXE2_OP_TYPE_SEND_SOL; -- -- (void)sxe2_init_wqe(uqp, qp_ex->wr_id, opcode, false); -- -- uqp->funid = SXE2_RDMA_SEND; --} -- --void sxe2_wr_send_imm_rc_ud(struct ibv_qp_ex *qp_ex, __be32 imm_data) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- unsigned int send_flags = qp_ex->wr_flags; -- unsigned int opcode = SXE2_OP_TYPE_SEND; -- __le64 *wqe; -- __u64 hdr; -- __u64 val = 0; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr send immediate enter, qpn [%u] imm [%u]\n", -- qp_ex->qp_base.qp_num, be32toh(imm_data)); -- } -- if (send_flags & IBV_SEND_SOLICITED) -- opcode = SXE2_OP_TYPE_SEND_SOL; -- -- wqe = sxe2_init_wqe(uqp, qp_ex->wr_id, opcode, false); -- if (!wqe) -- return; -- -- get_64bit_val(wqe, 24, &hdr); -- -- SXE2_SET_FIELD(hdr, SXE2_WQE_IMMEDATAFLG, true); -- SXE2_SET_FIELD(val, SXE2_WQE_IMMEDATA, be32toh(imm_data)); -- -- set_64bit_val(wqe, 0, val); -- set_64bit_val(wqe, 24, hdr); -- -- uqp->funid = SXE2_RDMA_SEND; --} -- --void sxe2_wr_send_inv_rc(struct ibv_qp_ex *qp_ex, uint32_t invalidate_rkey) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- unsigned int send_flags = qp_ex->wr_flags; -- unsigned int opcode = SXE2_OP_TYPE_SEND_INV; -- __le64 *wqe; -- __u64 hdr; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr send inv enter, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- -- if (send_flags & IBV_SEND_SOLICITED) { -- opcode = SXE2_OP_TYPE_SEND_SOL_INV; -- } -- -- wqe = sxe2_init_wqe(uqp, qp_ex->wr_id, opcode, false); -- if (!wqe) -- return; -- -- get_64bit_val(wqe, 24, &hdr); -- -- SXE2_SET_FIELD(hdr, SXE2_WQE_REMOTEINVTAG, invalidate_rkey); -- -- set_64bit_val(wqe, 24, hdr); -- -- uqp->funid = SXE2_RDMA_SEND; --} -- --void sxe2_wr_rdma_read_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, -- uint64_t remote_addr) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- __le64 *wqe; -- __u64 hdr; -- bool ord_fence = false; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr read enter, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- if (uqp->qp.rd_fence_rate && (uqp->qp.ord_cnt++ == uqp->qp.rd_fence_rate)) { -- ord_fence = true; -- uqp->qp.ord_cnt = 0; -- } -- -- wqe = sxe2_init_wqe(uqp, qp_ex->wr_id, SXE2_OP_TYPE_RDMA_READ, ord_fence); -- if (!wqe) -- return; -- -- get_64bit_val(wqe, 24, &hdr); -- -- sxe2_set_remote_offset(wqe, 16, remote_addr); -- -- SXE2_SET_FIELD(hdr, SXE2_WQE_REMOTETAG, rkey); -- -- set_64bit_val(wqe, 24, hdr); -- -- uqp->funid = SXE2_RDMA_READ; --} -- --void sxe2_wr_rdma_write_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, -- uint64_t remote_addr) --{ -- unsigned int send_flags = qp_ex->wr_flags; -- unsigned int opcode = SXE2_OP_TYPE_RDMA_WRITE; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- __le64 *wqe; -- __u64 hdr; -- -- if (send_flags & IBV_SEND_SOLICITED) -- opcode = SXE2_OP_TYPE_RDMA_WRITE_SOL; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr write enter, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- -- wqe = sxe2_init_wqe(uqp, qp_ex->wr_id, opcode, false); -- if (!wqe) -- return; -- -- get_64bit_val(wqe, 24, &hdr); -- -- sxe2_set_remote_offset(wqe, 16, remote_addr); -- -- SXE2_SET_FIELD(hdr, SXE2_WQE_REMOTETAG, rkey); -- -- set_64bit_val(wqe, 24, hdr); -- -- uqp->funid = SXE2_RDMA_WRITE; --} -- --void sxe2_wr_rdma_write_imm_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, -- uint64_t remote_addr, __be32 imm_data) --{ -- unsigned int send_flags = qp_ex->wr_flags; -- unsigned int opcode = SXE2_OP_TYPE_RDMA_WRITE; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- __le64 *wqe; -- __u64 hdr; -- __u64 val = 0; -- -- if (send_flags & IBV_SEND_SOLICITED) -- opcode = SXE2_OP_TYPE_RDMA_WRITE_SOL; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr write with immediate enter, "\ -- "qpn [%u] imm [%u]\n", qp_ex->qp_base.qp_num, -- be32toh(imm_data)); -- } -- -- wqe = sxe2_init_wqe(uqp, qp_ex->wr_id, opcode, false); -- if (!wqe) -- return; -- -- get_64bit_val(wqe, 24, &hdr); -- -- sxe2_set_remote_offset(wqe, 16, remote_addr); -- -- SXE2_SET_FIELD(hdr, SXE2_WQE_REMOTETAG, rkey); -- SXE2_SET_FIELD(hdr, SXE2_WQE_IMMEDATAFLG, true); -- SXE2_SET_FIELD(val, SXE2_WQE_IMMEDATA, be32toh(imm_data)); -- -- set_64bit_val(wqe, 24, hdr); -- -- set_64bit_val(wqe, 0, val); -- uqp->funid = SXE2_RDMA_WRITE; --} -- --void sxe2_wr_set_ud_addr(struct ibv_qp_ex *qp_ex, struct ibv_ah *ah, -- uint32_t remote_qpn, uint32_t remote_qkey) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- struct sxe2_uah *uah; -- __le64 *wqe; -- __u64 hdr; -- __u64 val = 0; -- -- uah = container_of(ah, struct sxe2_uah, ibv_ah); -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr set ud addr enter, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- -- wqe = uqp->wqe_hdr; -- if (!wqe) { -- return; -- } -- -- get_64bit_val(wqe, 24, &hdr); -- -- SXE2_SET_FIELD(hdr, SXE2_WQE_AH_ID, uah->ah_id); -- -- SXE2_SET_FIELD(val, SXE2_WQE_DESTQKEY, remote_qkey); -- SXE2_SET_FIELD(val, SXE2_WQE_DESTQPN, remote_qpn); -- -- set_64bit_val(wqe, 16, val); -- set_64bit_val(wqe, 24, hdr); --} -- --static void sxe2_fill_inline_data_buflist(__u8 *wqe, size_t num_buf, -- const struct ibv_data_buf *buf_list, __u8 polarity) --{ -- __u8 inline_valid = (__u8)(polarity << SXE2_INLINE_VALID_S); -- __u32 quanta_bytes_remaining = 8; -- __u32 i; -- bool first_quanta = true; -- -- wqe += 8; -- -- for (i = 0; i < num_buf; i++) { -- __u8 *cur_sge = (__u8 *)(uintptr_t)buf_list[i].addr; -- __u32 sge_len = (__u32)buf_list[i].length; -- -- while (sge_len) { -- __u32 bytes_copied; -- -- bytes_copied = min(sge_len, quanta_bytes_remaining); -- memcpy(wqe, cur_sge, bytes_copied); -- wqe += bytes_copied; -- cur_sge += bytes_copied; -- quanta_bytes_remaining -= bytes_copied; -- sge_len -= bytes_copied; -- -- if (!quanta_bytes_remaining) { -- quanta_bytes_remaining = 31; -- -- if (first_quanta) { -- first_quanta = false; -- wqe += 16; -- } else { -- *wqe = inline_valid; -- wqe++; -- } -- } -- } -- } -- if (!first_quanta && quanta_bytes_remaining < 31) { -- *(wqe + quanta_bytes_remaining) = inline_valid; -- } --} -- --static inline void sxe2_fill_sgelist_data(void *wqe, __u32 offset, -- const struct ibv_sge *sge, __u8 valid) --{ -- __u64 frag_info = 0; -- uint32_t len; -- -- if (sge) { -- len = (sge->length & ((uint32_t)1 << 31)) ? 0 : sge->length; -- set_64bit_val(wqe, offset,sge->addr); -- SXE2_SET_FIELD(frag_info, SXE2_WQE_VALID, valid); -- SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_LEN, len); -- SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_STAG, sge->lkey); -- set_64bit_val(wqe, offset + 8, frag_info); -- } else { -- SXE2_SET_FIELD(frag_info, SXE2_WQE_VALID, valid); -- set_64bit_val(wqe, offset, 0); -- set_64bit_val(wqe, offset + 8, frag_info); -- } --} -- --void sxe2_wr_set_inline_data_rc_ud(struct ibv_qp_ex *qp_ex, -- void *addr, size_t length) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_qp_common *qp; -- struct sxe2_rdma_ucontext *sctx = NULL; -- const struct ibv_data_buf buff = {.addr = addr, .length = length}; -- __le64 *wqe; -- __le64 *cache_hdr; -- __u32 wqe_idx; -- __u16 quanta; -- bool push_wqe; -- __u64 hdr; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr set inline data enter, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- -- qp = &uqp->qp; -- push_wqe = qp->push_db ? true : false; -- cache_hdr = uqp->wqe_hdr; -- if (!cache_hdr) { -- SXE2_VERBS_LOG_WARN_BDF("wr cache hdr null, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- return; -- } -- -- if (length > uqp->qp.max_inline_data) { -- SXE2_VERBS_LOG_ERROR_BDF("wr length %zu exceed %u , qpn [%u]\n", -- length, uqp->qp.max_inline_data, qp_ex->qp_base.qp_num); -- uqp->err = EINVAL; -- return; -- } -- -- quanta = sxe2_inline_to_quanta_cnt((__u32)length); -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, (__u32)length, -- qp_ex->wr_id, push_wqe); -- if (!wqe) { -- uqp->err = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("wr no memory, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- return; -- } -- -- uqp->wqe_idx = wqe_idx; -- uqp->cur_wqe = wqe; -- uqp->quanta = quanta; -- -- memcpy(wqe, cache_hdr, SXE2_QP_WQE_MIN_SIZE); -- -- sxe2_fill_inline_data_buflist((__u8 *)wqe, 1, &buff, qp->swqe_polarity); -- -- get_64bit_val(wqe, 24, &hdr); -- -- SXE2_SET_FIELD(hdr, SXE2_WQE_INLINEDATAFLG, true); -- SXE2_SET_FIELD(hdr, SXE2_WQE_INLINEDATALEN, length); -- SXE2_SET_FIELD(hdr, SXE2_WQE_VALID, qp->swqe_polarity); -- set_64bit_val(wqe, 24, hdr); -- if (uqp->funid == SXE2_RDMA_SEND) { -- sxe2_dump_wqe_hdr(uqp, &hdr, SXE2_RDMA_SEND_INLINE); -- } else if (uqp->funid == SXE2_RDMA_WRITE) { -- sxe2_dump_wqe_hdr(uqp, &hdr, SXE2_RDMA_WRITE_INLINE); -- } --} -- --void sxe2_wr_set_inline_data_list_rc_ud(struct ibv_qp_ex *qp_ex, -- size_t num_buf, const struct ibv_data_buf *buf_list) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_qp_common *qp; -- struct sxe2_rdma_ucontext *sctx = NULL; -- size_t length = 0; -- __le64 *wqe; -- __le64 *cache_hdr; -- __u32 wqe_idx; -- __u16 quanta; -- bool push_wqe; -- __u64 hdr; -- size_t i; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr set inline data list enter, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- -- qp = &uqp->qp; -- push_wqe = qp->push_db ? true : false; -- cache_hdr = uqp->wqe_hdr; -- if (!cache_hdr) { -- SXE2_VERBS_LOG_WARN_BDF("wr cache hdr null, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- return; -- } -- for (i = 0; i < num_buf; i++) -- length += buf_list[i].length; -- -- if (length > uqp->qp.max_inline_data) { -- uqp->err = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF("wr length %zu exceed %u , qpn [%u]\n", -- length, uqp->qp.max_inline_data, qp_ex->qp_base.qp_num); -- return; -- } -- -- quanta = sxe2_inline_to_quanta_cnt((__u32)length); -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, (__u32)length, -- qp_ex->wr_id, push_wqe); -- if (!wqe) { -- uqp->err = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("wr no memory, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- return; -- } -- -- uqp->wqe_idx = wqe_idx; -- uqp->cur_wqe = wqe; -- uqp->quanta = quanta; -- -- memcpy(wqe, cache_hdr, SXE2_QP_WQE_MIN_SIZE); -- -- sxe2_fill_inline_data_buflist((__u8 *)wqe, num_buf, buf_list, qp->swqe_polarity); -- -- get_64bit_val(wqe, 24, &hdr); -- SXE2_SET_FIELD(hdr, SXE2_WQE_INLINEDATAFLG, true); -- SXE2_SET_FIELD(hdr, SXE2_WQE_INLINEDATALEN, length); -- SXE2_SET_FIELD(hdr, SXE2_WQE_VALID, qp->swqe_polarity); -- set_64bit_val(wqe, 24, hdr); -- -- if (uqp->funid == SXE2_RDMA_SEND) { -- sxe2_dump_wqe_hdr(uqp, &hdr, SXE2_RDMA_SEND_INLINE); -- } else if (uqp->funid == SXE2_RDMA_WRITE) { -- sxe2_dump_wqe_hdr(uqp, &hdr, SXE2_RDMA_WRITE_INLINE); -- } --} -- --void sxe2_wr_set_sge_rc_ud(struct ibv_qp_ex *qp_ex, uint32_t lkey, -- uint64_t addr, uint32_t length) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_qp_common *qp; -- struct sxe2_rdma_ucontext *sctx = NULL; -- __le64 *cache_hdr; -- __le64 *wqe; -- bool push_wqe; -- bool has_imme_data = false; -- __u32 wqe_idx; -- __u32 addl_frag_cnt; -- __u16 quanta; -- __u64 hdr; -- int ret_code; -- __u32 frag_cnt = 1; -- __u64 frag_info = 0; -- const struct ibv_sge sg_list = {.addr = addr, .length = length, .lkey = lkey}; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr set sge data enter, "\ -- "qpn [%u] addr %" PRIu64 " len [%u]\n", -- qp_ex->qp_base.qp_num, addr, length); -- } -- -- qp = &uqp->qp; -- push_wqe = qp->push_db ? true : false; -- cache_hdr = uqp->wqe_hdr; -- if (!cache_hdr) { -- SXE2_VERBS_LOG_WARN_BDF("wr cache hdr null, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- return; -- } -- -- get_64bit_val(cache_hdr, 24, &hdr); -- has_imme_data = SXE2_GET_FIELD(SXE2_WQE_IMMEDATAFLG, hdr); -- if (has_imme_data) { -- frag_cnt = frag_cnt + 1; -- } -- -- ret_code = sxe2_fragcnt_to_quanta_cnt(frag_cnt, &quanta); -- if (ret_code) { -- uqp->err = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF("wr sge num %u exceed, qpn [%u]\n", -- frag_cnt, qp_ex->qp_base.qp_num); -- return; -- } -- -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, length, -- qp_ex->wr_id, push_wqe); -- if (!wqe) { -- uqp->err = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("wr no memory, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- return; -- } -- -- uqp->wqe_idx = wqe_idx; -- uqp->cur_wqe = wqe; -- uqp->quanta = quanta; -- -- addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; -- -- memcpy(wqe, cache_hdr, SXE2_QP_WQE_MIN_SIZE); -- -- if (has_imme_data) { -- sxe2_fill_sgelist_data(wqe, 32, &sg_list, qp->swqe_polarity); -- sxe2_fill_sgelist_data(wqe, 48, NULL, qp->swqe_polarity); -- } else { -- sxe2_fill_sgelist_data(wqe, 0, &sg_list, qp->swqe_polarity); -- } -- -- if (length == 0) { -- get_64bit_val(wqe, 8, &frag_info); -- frag_info = frag_info & (~SXE2_WQE_FRAG_VALID); -- SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_VALID, !qp->swqe_polarity); -- set_64bit_val(wqe, 8, frag_info); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr set frag_info [%#llx] field [%d]\n", -- frag_info, !qp->swqe_polarity); -- } -- } -- -- SXE2_SET_FIELD(hdr, SXE2_WQE_ADDSGECNT, addl_frag_cnt); -- SXE2_SET_FIELD(hdr, SXE2_WQE_VALID, qp->swqe_polarity); -- set_64bit_val(wqe, 24, hdr); -- sxe2_dump_wqe_hdr(uqp, &hdr, uqp->funid); --} -- --void sxe2_wr_set_sge_list_rc_ud(struct ibv_qp_ex *qp_ex, -- size_t num_sge, const struct ibv_sge *sg_list) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_qp_common *qp; -- struct sxe2_rdma_ucontext *sctx = NULL; -- __le64 *cache_hdr; -- bool push_wqe; -- bool has_imme_data = false; -- __le64 *wqe; -- __u32 i, wqe_idx, total_size = 0, byte_off; -- int ret_code; -- __u32 frag_cnt, addl_frag_cnt; -- __u64 hdr; -- __u16 quanta; -- __u64 frag_info = 0; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- qp = &uqp->qp; -- push_wqe = qp->push_db ? true : false; -- cache_hdr = uqp->wqe_hdr; -- if (!cache_hdr) { -- SXE2_VERBS_LOG_WARN_BDF("wr cache hdr null, qpn [%u]\n", qp_ex->qp_base.qp_num); -- return; -- } -- if (qp->max_sq_sge_cnt < num_sge) { -- uqp->err = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF("wr sge num %zu exceed %u, qpn [%u]\n", -- num_sge, qp->max_sq_sge_cnt, qp_ex->qp_base.qp_num); -- return; -- } -- for (i = 0; i < num_sge; i++) { -- total_size += sg_list[i].length; -- } -- get_64bit_val(cache_hdr, 24, &hdr); -- has_imme_data = SXE2_GET_FIELD(SXE2_WQE_IMMEDATAFLG, hdr); -- if (has_imme_data) { -- frag_cnt = (__u32)(num_sge + 1); -- } else { -- frag_cnt = (__u32)num_sge; -- } -- ret_code = sxe2_fragcnt_to_quanta_cnt(frag_cnt, &quanta); -- if (ret_code) { -- uqp->err = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF("wr sge num %u exceed, qpn [%u]\n", -- frag_cnt, qp_ex->qp_base.qp_num); -- return; -- } -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, -- qp_ex->wr_id, push_wqe); -- if (!wqe) { -- uqp->err = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("wr no memory, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- return; -- } -- uqp->wqe_idx = wqe_idx; -- uqp->cur_wqe = wqe; -- uqp->quanta = quanta; -- -- memcpy(wqe, cache_hdr, SXE2_QP_WQE_MIN_SIZE); -- -- addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; -- if (has_imme_data) { -- i = 0; -- } else { -- sxe2_fill_sgelist_data(wqe, 0, sg_list, qp->swqe_polarity); -- i = 1; -- } -- if (total_size == 0) { -- get_64bit_val(wqe, 8, &frag_info); -- frag_info = frag_info & (~SXE2_WQE_FRAG_VALID); -- SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_VALID, !qp->swqe_polarity); -- set_64bit_val(wqe, 8, frag_info); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr set frag_info [%#llx] field [%d]\n", -- frag_info, !qp->swqe_polarity); -- } -- } -- -- for (byte_off = 32; i < num_sge;) { -- sxe2_fill_sgelist_data(wqe, byte_off, &sg_list[i], qp->swqe_polarity); -- byte_off += 16; -- i++; -- } -- -- if (!(frag_cnt & 0x01) && frag_cnt) { -- sxe2_fill_sgelist_data(wqe, byte_off, NULL, qp->swqe_polarity); -- } -- SXE2_SET_FIELD(hdr, SXE2_WQE_ADDSGECNT, addl_frag_cnt); -- SXE2_SET_FIELD(hdr, SXE2_WQE_VALID, qp->swqe_polarity); -- set_64bit_val(wqe, 24, hdr); -- sxe2_dump_wqe_hdr(uqp, &hdr, uqp->funid); --} -- --void sxe2_wr_bind_mw_rc(struct ibv_qp_ex *qp_ex, struct ibv_mw *mw, -- uint32_t rkey, const struct ibv_mw_bind_info *bind_info) --{ -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- __u64 hdr; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr bind mw unsupport, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- -- uqp->funid = SXE2_RDMA_BIND_MW; -- -- uqp->err = EOPNOTSUPP; -- -- sxe2_dump_wqe_hdr(uqp, &hdr, uqp->funid); -- return; --} -- --void sxe2_wr_local_inv_rc(struct ibv_qp_ex *qp_ex, uint32_t invalidate_rkey) --{ -- struct sxe2_qp_common *qp; -- struct sxe2_uqp *uqp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- __le64 *cache_hdr; -- bool push_wqe; -- __u32 wqe_idx; -- __le64 *wqe; -- __u64 hdr; -- __u16 quanta = SXE2_QP_WQE_MIN_QUANTA; -- __u64 val = 0; -- -- uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); -- sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, -- struct sxe2_rdma_ucontext, ibv_ctx.context); -- if (sctx->enable_io_log) { -- SXE2_VERBS_LOG_DEBUG_BDF("wr local inv enter, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- } -- -- qp = &uqp->qp; -- push_wqe = qp->push_db ? true : false; -- cache_hdr = sxe2_init_wqe(uqp, qp_ex->wr_id, SXE2_OP_TYPE_LOCAL_INV, false); -- if (!cache_hdr) { -- return; -- } -- -- wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, -- 0, qp_ex->wr_id, push_wqe); -- if (!wqe) { -- uqp->err = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("wr no memory, qpn [%u]\n", -- qp_ex->qp_base.qp_num); -- return; -- } -- -- get_64bit_val(cache_hdr, 24, &hdr); -- -- uqp->wqe_idx = wqe_idx; -- uqp->cur_wqe = wqe; -- uqp->quanta = quanta; -- -- SXE2_SET_FIELD(val, SXE2_WQE_REMOTEINVTAG, invalidate_rkey); -- set_64bit_val(cache_hdr, 8, val); -- -- memcpy(wqe, cache_hdr, SXE2_QP_WQE_MIN_SIZE); -- -- SXE2_SET_FIELD(hdr, SXE2_WQE_VALID, qp->swqe_polarity); -- set_64bit_val(wqe, 24, hdr); -- uqp->funid = SXE2_RDMA_LOCAL_INV; -- sxe2_dump_wqe_hdr(uqp, &hdr, uqp->funid); --} -- --void sxe2_push_nop_wqe(struct sxe2_qp_common *qp) { -- __le64 *wqe; -- __u32 wqe_idx; -- -- wqe_idx = SXE2_RING_CURRENT_HEAD(qp->sq_ring); -- if (!wqe_idx) { -- qp->swqe_polarity = !qp->swqe_polarity; -- } -- wqe = qp->sq_base[wqe_idx].elem; -- -- qp->sq_wrtrk_array[wqe_idx].quanta = SXE2_QP_WQE_MIN_QUANTA; -- -- set_64bit_val(wqe, 0, 0); -- set_64bit_val(wqe, 8, 0); -- set_64bit_val(wqe, 16, 0); -- -- sxe2_set_nop_hdr(wqe, 24, qp); -- SXE2_RING_MOVE_HEAD_NOCHECK(qp->sq_ring); -- SXE2_RING_MOVE_TAIL(qp->sq_ring); --} -- -diff -Naur rdma-core-48.0/providers/sxe2/io.h rdma-core-48.0.bak/providers/sxe2/io.h ---- rdma-core-48.0/providers/sxe2/io.h 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/io.h 1970-01-01 08:00:00.000000000 +0800 -@@ -1,477 +0,0 @@ -- -- --#ifndef __IO_H__ --#define __IO_H__ -- --#define SXE2_OP_TYPE_RDMA_WRITE 0x00 --#define SXE2_OP_TYPE_RDMA_READ 0x01 --#define SXE2_OP_TYPE_RSVD1 0x02 --#define SXE2_OP_TYPE_SEND 0x03 --#define SXE2_OP_TYPE_SEND_INV 0x04 --#define SXE2_OP_TYPE_SEND_SOL 0x05 --#define SXE2_OP_TYPE_SEND_SOL_INV 0x06 --#define SXE2_OP_TYPE_RSVD2 0x07 --#define SXE2_OP_TYPE_BIND_MW 0x08 --#define SXE2_OP_TYPE_FAST_REG_MR 0x09 --#define SXE2_OP_TYPE_LOCAL_INV 0x0a --#define SXE2_OP_TYPE_RDMA_READ_INV 0x0b --#define SXE2_OP_TYPE_NOP 0x0c --#define SXE2_OP_TYPE_RDMA_WRITE_SOL 0x0d -- --#define SXE2_CQE_QTYPE_RQ 0 --#define SXE2_CQE_QTYPE_SQ 1 -- --#define SXE2_MAX_FRAGCNT 16 --#define SXE2_QP_WQE_MIN_SIZE 32 --#define SXE2_QP_WQE_MAX_SIZE 256 --#define SXE2_QP_MAX_INLINE_PER_QUANTA 31 -- --#define SXE2_SQ_RSVD 8 --#define SXE2_RQ_RSVD 8 -- --#define SXE2_INLINE_VALID_S 7 --#define HCA_CORE_CLOCK_500_MHZ 500 -- --#define SXE2_WQE_IMMEDATA GENMASK_ULL(31, 0) -- --#define SXE2_WQE_FRAG_LEN GENMASK_ULL(62, 32) --#define SXE2_WQE_FRAG_STAG GENMASK_ULL(31, 0) --#define SXE2_WQE_FRAG_VALID BIT_ULL(63) -- --#define SXE2_WQE_DESTQKEY GENMASK_ULL(31, 0) --#define SXE2_WQE_DESTQPN GENMASK_ULL(55, 32) -- --#define SXE2_WQE_REMOTEINVTAG GENMASK_ULL(31, 0) --#define SXE2_WQE_REMOTETAG GENMASK_ULL(31, 0) --#define SXE2_WQE_AH_ID GENMASK_ULL(16, 0) --#define SXE2_WQE_OPCODE GENMASK_ULL(37, 32) --#define SXE2_WQE_ADDSGECNT GENMASK_ULL(41, 38) --#define SXE2_WQE_REPORTRTT BIT_ULL(46) --#define SXE2_WQE_IMMEDATAFLG BIT_ULL(47) --#define SXE2_WQE_INLINEDATALEN GENMASK_ULL(55, 48) --#define SXE2_WQE_PUSHWQE BIT_ULL(56) --#define SXE2_WQE_INLINEDATAFLG BIT_ULL(57) --#define SXE2_WQE_READFENCE BIT_ULL(60) --#define SXE2_WQE_LOCALFENCE BIT_ULL(61) --#define SXE2_WQE_SIGCOMPL BIT_ULL(62) --#define SXE2_WQE_VALID BIT_ULL(63) -- --#define SXE2_WQE_MR_STAG GENMASK_ULL(63, 32) --#define SXE2_WQE_MW_STAG GENMASK_ULL(31, 0) --#define SXE2_WQE_MW_LEN GENMASK_ULL(45, 0) --#define SXE2_WQE_ACCESSRIGHT GENMASK_ULL(52, 48) --#define SXE2_WQE_VA_BASE_FLAG BIT_ULL(53) --#define SXE2_WQE_MW_TYPE BIT_ULL(54) -- --#define SXE2_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20) --#define SXE2_CQE_VALID BIT_ULL(63) --#define SXE2_CQE_WQEIDX GENMASK_ULL(46, 32) -- --#define SXE2_SET_FIELD(origin, mask, val) \ -- ((origin) |= FIELD_PREP(mask, val)) -- --#define SXE2_GET_FIELD(mask, val) \ -- FIELD_GET(mask, val) -- --enum sxe2_addressing_type { -- SXE2_ADDR_TYPE_ZERO_BASED = 0, -- SXE2_ADDR_TYPE_VA_BASED = 1, --}; -- --enum sxe2_cq_err{ -- SXE2_CQ_OK = 0, -- SXE2_CQ_NOENT = -2, --}; -- --enum { -- SXE2_CQ_DB_REQ_SOLICITED = 1 << 31, -- SXE2_CQ_DB_REQ_NOSOLICITED = 0 << 31 --}; -- --enum { -- SXE2_CQ_SET_CI = 0, -- SXE2_CQ_ARM_DB = 1, --}; -- --enum { -- SXE2_QP_RQ_PI = 0, -- SXE2_QP_SQ_CI = 3, --}; -- --enum sxe2_arm_type { -- SXE2_CQ_ARM_NEXT = 0, -- SXE2_CQ_ARM_SOLICITED = 1, --}; -- --enum sxe2_major_opcode { -- SXE2_SUCCESS = 0, -- SXE2_LOCAL_LEN_ERR = 0x1, -- SXE2_LOCAL_QP_OP_ERR = 0x2, -- SXE2_RSV1_ERR, -- SXE2_LOCAL_PROTECTION_ERR = 0x4, -- SXE2_WR_FLUSH_ERR = 0x5, -- SXE2_MW_BIND_ERR = 0x6, -- SXE2_FAST_REGISTER_ERR = 0x7, -- SXE2_INVALID_KEY_ERR = 0x8, -- SXE2_BAD_RESPONSE_ERR = 0x10, -- SXE2_LOCAL_ACCESS_ERR = 0x11, -- SXE2_REMOTE_INVALID_REQUEST_ERR = 0x12, -- SXE2_REMOTE_ACCESS_ERR = 0x13, -- SXE2_REMOTE_OPERATION_ERR = 0x14, -- SXE2_TRANS_RETRY_CNT_EXCEED_ERR = 0x15, -- SXE2_RNR_RETRY_CNT_EXCEED_ERR = 0x16, -- SXE2_ABORT_ERR = 0x22, -- SXE2_DIF_CHECK_ERR = 0x23, -- SXE2_SQ_FAKE_ERR = 0x24, --}; -- --struct sxe2_post_send { -- struct ibv_sge *sg_list; -- __u32 num_sges; -- __u32 qkey; -- __u32 dest_qp; -- __u32 ah_id; --}; -- --struct sxe2_rdma_write { -- struct ibv_sge *lo_sg_list; -- struct ibv_sge rem_addr; -- __u32 num_lo_sges; --}; -- --struct sxe2_rdma_read { -- struct ibv_sge *lo_sg_list; -- struct ibv_sge rem_addr; -- __u32 num_lo_sges; --}; -- --struct sxe2_bind_window { -- __u32 mr_stag; -- __u32 mw_stag; -- __u64 bind_len; -- void *va; -- enum sxe2_addressing_type addressing_type; -- bool ena_reads : 1; -- bool ena_writes : 1; -- bool mem_window_type_1 : 1; --}; -- --struct sxe2_local_invalidate { -- __u32 target_stag; --}; -- --struct sxe2_wr_info { -- __u64 wr_id; -- __u8 op_type; -- __u8 l4len; -- bool signaled : 1; -- bool read_fence : 1; -- bool local_fence : 1; -- bool inline_data : 1; -- bool imm_data_valid : 1; -- bool push_wqe : 1; -- bool report_rtt : 1; -- bool udp_hdr : 1; -- bool defer_flag : 1; -- bool post_wqe : 1; -- __u32 imm_data; -- __u32 rkey_to_inv; -- union { -- struct sxe2_post_send send; -- struct sxe2_rdma_write rdma_write; -- struct sxe2_rdma_read rdma_read; -- struct sxe2_bind_window bind_window; -- struct sxe2_local_invalidate local_inval; -- } op_info; -- enum sxe2_disp_id funid; --}; -- --struct sxe2_rq_info { -- __u64 wr_id; -- struct ibv_sge *sg_list; -- __u32 num_sges; --}; -- --struct sxe2_imme_data { -- __le64 imme_data; --}; -- --struct sxe2_frag_data { -- __le64 tag_offset; -- union { -- struct { -- __le64 stag : 32; -- __le64 frag_len : 31; -- __le64 frag_valid : 1; -- } field; -- __le64 val; -- } offset8; --}; -- --union sxe2_dqpn_data { -- struct { -- __le64 dest_qkey : 32; -- __le64 dest_qpn : 24; -- __le64 rsv0 : 8; -- } field; -- __le64 val; --}; -- --struct sxe2_bindmw_info { -- __le64 mw_va_base; -- union { -- struct { -- __le64 mw_key : 32; -- __le64 mr_key : 32; -- } field; -- __le64 val; -- } offset8; -- union { -- struct { -- __le64 mw_len : 46; -- __le64 rsv0 : 18; -- } field; -- __le64 val; -- } offset16; --}; -- --union sxe2_send_hdr { -- struct { -- __u64 remote_inv_rkey : 32; -- __u64 op : 6; -- __u64 addfragcnt : 4; -- __u64 rsvd1 : 4; -- __u64 report_rtt : 1; -- __u64 imme_data_flag : 1; -- __u64 rsvd2 : 8; -- __u64 push_wqe : 1; -- __u64 inline_data_flag : 1; -- __u64 rsvd3 : 1; -- __u64 rsvd4 : 1; -- __u64 read_fence : 1; -- __u64 local_fence : 1; -- __u64 signaled_completion : 1; -- __u64 wqe_valid : 1; -- } field; -- __u64 val; --}; -- --union sxe2_send_inline_hdr { -- struct { -- __u64 remote_inv_rkey : 32; -- __u64 op : 6; -- __u64 rsvd1 : 8; -- __u64 report_rtt : 1; -- __u64 imme_data_flag : 1; -- __u64 inline_data_len : 8; -- __u64 push_wqe : 1; -- __u64 inline_data_flag : 1; -- __u64 rsvd2 : 2; -- __u64 read_fence : 1; -- __u64 local_fence : 1; -- __u64 signaled_completion : 1; -- __u64 wqe_valid : 1; -- } field; -- __u64 val; --}; -- --union sxe2_write_hdr { -- struct { -- __u64 remote_key : 32; -- __u64 op : 6; -- __u64 addfragcnt : 4; -- __u64 rsvd1 : 4; -- __u64 report_rtt : 1; -- __u64 imme_data_flag : 1; -- __u64 rsvd2 : 8; -- __u64 push_wqe : 1; -- __u64 inline_data_flag : 1; -- __u64 rsvd3 : 2; -- __u64 read_fence : 1; -- __u64 local_fence : 1; -- __u64 signaled_completion : 1; -- __u64 wqe_valid : 1; -- } field; -- __u64 val; --}; -- --union sxe2_write_inline_hdr { -- struct { -- __u64 remote_key : 32; -- __u64 op : 6; -- __u64 rsvd1 : 8; -- __u64 report_rtt : 1; -- __u64 imme_data_flag : 1; -- __u64 inline_data_len : 8; -- __u64 push_wqe : 1; -- __u64 inline_data_flag : 1; -- __u64 rsvd2 : 2; -- __u64 read_fence : 1; -- __u64 local_fence : 1; -- __u64 signaled_completion : 1; -- __u64 wqe_valid : 1; -- } field; -- __u64 val; --}; -- --union sxe2_read_hdr { -- struct { -- __u64 remote_key : 32; -- __u64 op : 6; -- __u64 addfragcnt : 4; -- __u64 rsvd1 : 4; -- __u64 report_rtt : 1; -- __u64 rsvd2 : 1; -- __u64 rsvd3 : 8; -- __u64 push_wqe : 1; -- __u64 rsvd4 : 3; -- __u64 read_fence : 1; -- __u64 local_fence : 1; -- __u64 signaled_completion : 1; -- __u64 wqe_valid : 1; -- } field; -- __u64 val; --}; -- --union sxe2_bindmw_hdr { -- struct { -- __u64 rsvd0 : 32; -- __u64 op : 6; -- __u64 rsvd1 : 10; -- __u64 access : 5; -- __u64 va_base_flag : 1; -- __u64 mw_type : 1; -- __u64 rsvd2 : 1; -- __u64 push_wqe : 1; -- __u64 rsvd3 : 3; -- __u64 read_fence : 1; -- __u64 local_fence : 1; -- __u64 signaled_completion : 1; -- __u64 wqe_valid : 1; -- } field; -- __u64 val; --}; -- --union sxe2_inval_hdr { -- struct { -- __u64 rsvd0 : 32; -- __u64 op : 6; -- __u64 rsvd1 : 18; -- __u64 push_wqe : 1; -- __u64 rsvd3 : 3; -- __u64 read_fence : 1; -- __u64 local_fence : 1; -- __u64 signaled_completion : 1; -- __u64 wqe_valid : 1; -- } field; -- __u64 val; --}; -- --union sxe2_fregmr_hdr { -- struct { -- __u64 consumer_key : 8; -- __u64 mr_index : 24; -- __u64 op : 6; -- __u64 log_entity_size : 5; -- __u64 rsvd1 : 5; -- __u64 access : 5; -- __u64 va_base_flag : 1; -- __u64 pbl_mode : 2; -- __u64 push_wqe : 1; -- __u64 rsvd3 : 3; -- __u64 read_fence : 1; -- __u64 local_fence : 1; -- __u64 signaled_completion : 1; -- __u64 wqe_valid : 1; -- } field; -- __u64 val; --}; -- --union sxe2_rq_hdr { -- struct { -- __u64 rsv0 : 38; -- __u64 addfragcnt : 4; -- __u64 rsv1 : 21; -- __u64 wqe_valid : 1; -- } field; -- __u64 val; --}; -- --union sxe2_nop_hdr { -- struct { -- __u64 rsv0 : 32; -- __u64 op : 6; -- __u64 addfragcnt : 4; -- __u64 rsv1 : 14; -- __u64 push_wqe : 1; -- __u64 rsv2 : 3; -- __u64 read_fence : 1; -- __u64 local_fence : 1; -- __u64 signaled_completion : 1; -- __u64 wqe_valid : 1; -- } field; -- __u64 val; --}; -- --enum sxe2_qp_caps { -- SXE2_WRITE_WITH_IMM = 1, -- SXE2_SEND_WITH_IMM = 2, -- SXE2_LLWQE_MODE = 8, --}; -- --enum sxe2_qp_wqe_size { -- SXE2_WQE_SIZE_32 = 32, -- SXE2_WQE_SIZE_64 = 64, -- SXE2_WQE_SIZE_96 = 96, -- SXE2_WQE_SIZE_128 = 128, -- SXE2_WQE_SIZE_256 = 256, --}; -- --static inline void set_64bit_val(__le64 *wqe_words, __u32 byte_index, __u64 val) --{ -- wqe_words[byte_index >> 3] = htole64(val); --} -- --static inline void set_32bit_val(__le32 *wqe_words, __u32 byte_index, __u32 val) --{ -- wqe_words[byte_index >> 2] = htole32(val); --} -- --static inline void get_64bit_val(__le64 *wqe_words, __u32 byte_index, -- __u64 *val) --{ -- *val = le64toh(wqe_words[byte_index >> 3]); --} -- --static inline void get_32bit_val(__le32 *wqe_words, __u32 byte_index, -- __u32 *val) --{ -- *val = le32toh(wqe_words[byte_index >> 2]); --} -- --static inline void db_wr32(__u32 val, __u32 *wqe_word) --{ -- *wqe_word = val; --} -- --typedef int (*rdma_disp_func)(struct sxe2_qp_common *qp, -- struct sxe2_wr_info *wr_info, bool post_sq); --int sxe2_hw_send(struct sxe2_qp_common *qp, struct sxe2_wr_info *wr_info, -- bool post_sq); --int sxe2_hw_inline_send(struct sxe2_qp_common *qp, -- struct sxe2_wr_info *wr_info, bool post_sq); --int sxe2_hw_rdma_write(struct sxe2_qp_common *qp, -- struct sxe2_wr_info *wr_info, bool post_sq); --int sxe2_hw_inline_rdma_write(struct sxe2_qp_common *qp, -- struct sxe2_wr_info *wr_info, bool post_sq); --int sxe2_hw_rdma_read(struct sxe2_qp_common *qp, -- struct sxe2_wr_info *wr_info, bool post_sq); --int sxe2_hw_mw_bind(struct sxe2_qp_common *qp, struct sxe2_wr_info *wr_info, -- bool post_sq); --int sxe2_hw_local_invalidate(struct sxe2_qp_common *qp, -- struct sxe2_wr_info *wr_info, bool post_sq); --void sxe2_clean_cqes(struct sxe2_qp_common *qp, -- struct sxe2_ucq *vendor_cq, int cq_type); --void sxe2_push_nop_wqe(struct sxe2_qp_common *qp); -- --#endif -diff -Naur rdma-core-48.0/providers/sxe2/log.c rdma-core-48.0.bak/providers/sxe2/log.c ---- rdma-core-48.0/providers/sxe2/log.c 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/log.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,468 +0,0 @@ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include "log.h" -- --#define SXE2_LOG_EXTEND_INFO "SXE2_LOG_EXTEND_INFO" --#define SXE2_LOG_FILE_SIZE "SXE2_LOG_FILE_SIZE" --#define SXE2_LOG_SERIAL "SXE2_LOG_SERIAL" --#define SXE2_LOG_COVER_WRITE "SXE2_LOG_COVER_WRITE" -- --#define SXE2_LOG_PATH "/var/log/" --#define SXE2_LOG_FILE_NAME "sxe2_rdma_file_current.log" --#define SXE2_LOG_FLOCK_NAME "sxe2_rdma_file_log.flock" --#define CONCAT(a, b) a##b -- --#define SXE2_LOG_DEFAULT_DEBUG_FILE_SIZE \ -- (800) --#define SXE2_LOG_DEFAULT_RELEASE_FILE_SIZE \ -- (300) -- --log_mgr_s g_log_mgr = {false, NULL, 200, false, false, -1, -1, false}; --pthread_mutex_t g_log_mutex; -- --log_mgr_s *log_mgr_point_get(void) --{ -- return &g_log_mgr; --} -- --void log_set_serial_switch(bool on) --{ -- log_mgr_s *p_log_mgr = NULL; -- -- p_log_mgr = log_mgr_point_get( ); -- p_log_mgr->serial_on = on; -- return; --} -- --int log_init0(bool is_ut) --{ -- char *env = NULL; -- int file_size; -- int rc; -- -- env = getenv(SXE2_LOG_EXTEND_INFO); -- if (env) { -- if (0 == strncmp(env, "true", 4)) { -- g_log_mgr.extend = true; -- } else { -- g_log_mgr.extend = false; -- } -- } -- -- env = getenv(SXE2_LOG_SERIAL); -- if (env) { -- if (0 == strncmp(env, "true", 4)) { -- g_log_mgr.serial_on = true; -- } else { -- g_log_mgr.serial_on = false; -- } -- } -- -- env = getenv(SXE2_LOG_COVER_WRITE); -- if (env) { -- if (0 == strncmp(env, "true", 4)) { -- g_log_mgr.file_cover_write_flag = true; -- } else { -- g_log_mgr.file_cover_write_flag = false; -- } -- } -- --#if defined(SXE2_CFG_DEBUG) -- g_log_mgr.file_size = SXE2_LOG_DEFAULT_DEBUG_FILE_SIZE; --#else -- g_log_mgr.file_size = SXE2_LOG_DEFAULT_RELEASE_FILE_SIZE; --#endif -- -- env = getenv(SXE2_LOG_FILE_SIZE); -- if (env) { -- file_size = atoi(env); -- if (file_size > 0) { -- g_log_mgr.file_size = file_size; -- } -- } -- -- if (g_log_mgr.fd == -1) { -- if (is_ut) { -- g_log_mgr.is_ut = true; -- g_log_mgr.fd = -- open(SXE2_LOG_FILE_NAME, -- O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); -- } else { -- g_log_mgr.is_ut = false; -- g_log_mgr.fd = -- open(SXE2_LOG_PATH SXE2_LOG_FILE_NAME, -- O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); -- } -- if (g_log_mgr.fd == -1) { -- perror("open"); -- } -- } -- -- if (g_log_mgr.lockfd == -1) { -- if (is_ut) { -- g_log_mgr.lockfd = -- open(SXE2_LOG_FLOCK_NAME, O_WRONLY | O_CREAT, 0666); -- } else { -- g_log_mgr.lockfd = open(SXE2_LOG_PATH SXE2_LOG_FLOCK_NAME, -- O_WRONLY | O_CREAT, 0666); -- } -- if (g_log_mgr.fd == -1) { -- perror("open"); -- } -- } -- -- rc = pthread_mutex_init(&g_log_mutex, NULL); -- if (rc != 0) { -- perror("mutex_init"); -- } -- -- return 0; --} -- --int log_uninit0(void) --{ -- if (g_log_mgr.fd != -1) { -- close(g_log_mgr.fd); -- g_log_mgr.fd = -1; -- } -- -- if (g_log_mgr.lockfd != -1) { -- close(g_log_mgr.lockfd); -- g_log_mgr.lockfd = -1; -- } -- -- pthread_mutex_destroy(&g_log_mutex); -- -- return 0; --} -- --static inline bool log_file_lock(int fd) --{ -- int rc = flock(fd, LOCK_EX); -- return (bool)(rc == 0); --} -- --static inline bool log_file_unlock(int fd) --{ -- int rc = flock(fd, LOCK_UN); -- return (bool)(rc == 0); --} -- --void log_file_name_set(char *p_name) --{ -- struct timeval tv; -- time_t time_stamp_now; -- struct tm *p_time_now; -- long usec; -- -- gettimeofday(&tv, NULL); -- usec = tv.tv_usec; -- time_stamp_now = tv.tv_sec; -- p_time_now = localtime(&time_stamp_now); -- -- if (g_log_mgr.is_ut) { -- sprintf(p_name, -- "sxe2_rdma_file_%04d_%02d_%02d-%02d_%02d_%02d.%06ld.log", -- p_time_now->tm_year + 1900, p_time_now->tm_mon + 1, -- p_time_now->tm_mday, p_time_now->tm_hour, p_time_now->tm_min, -- p_time_now->tm_sec, usec); -- } else { -- sprintf(p_name, -- SXE2_LOG_PATH -- "sxe2_rdma_file_%04d_%02d_%02d-%02d_%02d_%02d.%06ld.log", -- p_time_now->tm_year + 1900, p_time_now->tm_mon + 1, -- p_time_now->tm_mday, p_time_now->tm_hour, p_time_now->tm_min, -- p_time_now->tm_sec, usec); -- } -- -- return; --} -- --void log_file_store(const char *p_name) --{ -- char new_name[35] = {0}; -- log_file_name_set(new_name); -- (void)rename(p_name, new_name); -- return; --} -- --void log_file_not_cover_write(void) --{ -- int new_fd = -1; -- int old_fd = g_log_mgr.fd; -- struct stat st; -- if (fstat(old_fd, &st) < 0 || (st.st_size >> 20) < g_log_mgr.file_size) { -- goto end; -- } -- -- if (g_log_mgr.is_ut) { -- new_fd = open(SXE2_LOG_FILE_NAME, -- O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); -- } else { -- new_fd = open(SXE2_LOG_PATH SXE2_LOG_FILE_NAME, -- O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); -- } -- if (new_fd < 0) { -- goto end; -- } -- -- if (fstat(new_fd, &st) < 0 || (st.st_size >> 20) < g_log_mgr.file_size) { -- g_log_mgr.fd = new_fd; -- close(old_fd); -- goto end; -- } -- close(new_fd); -- -- if (g_log_mgr.is_ut) { -- log_file_store(SXE2_LOG_FILE_NAME); -- } else { -- log_file_store(SXE2_LOG_PATH SXE2_LOG_FILE_NAME); -- } -- -- if (g_log_mgr.is_ut) { -- new_fd = open(SXE2_LOG_FILE_NAME, -- O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); -- } else { -- new_fd = open(SXE2_LOG_PATH SXE2_LOG_FILE_NAME, -- O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); -- } -- if (new_fd < 0) { -- goto end; -- } -- g_log_mgr.fd = new_fd; -- -- close(old_fd); --end: -- return; --} -- --void log_file_cover_write(void) --{ -- int old_fd = -1, new_fd = -1; -- old_fd = g_log_mgr.fd; -- struct stat st; -- if (fstat(g_log_mgr.fd, &st) < 0 || -- (st.st_size >> 20) < g_log_mgr.file_size) { -- goto end; -- } -- -- if (g_log_mgr.is_ut) { -- new_fd = -- open(SXE2_LOG_FILE_NAME, -- O_APPEND | O_TRUNC | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); -- } else { -- new_fd = -- open(SXE2_LOG_PATH SXE2_LOG_FILE_NAME, -- O_APPEND | O_TRUNC | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); -- } -- if (new_fd < 0) { -- goto end; -- } -- g_log_mgr.fd = new_fd; -- close(old_fd); --end: -- return; --} -- --int log_file_rotate(void) --{ -- int ret = 0; -- __u64 fileSize; -- struct stat st; -- sigset_t mask, oldmask; -- if (fstat(g_log_mgr.fd, &st) == -1) { -- perror("fstat"); -- ret = -1; -- goto end; -- } -- -- fileSize = st.st_size >> 20; -- if (fileSize >= (__u64)g_log_mgr.file_size) { -- if (sigfillset(&mask) == -1) { -- perror("sigfillset"); -- ret = -1; -- goto end; -- } -- -- if (pthread_sigmask(SIG_BLOCK, &mask, &oldmask) != 0) { -- perror("pthread_sigmask"); -- ret = -1; -- goto end; -- } -- -- log_file_lock(g_log_mgr.lockfd); -- -- if (g_log_mgr.file_cover_write_flag) { -- log_file_cover_write( ); -- } else { -- log_file_not_cover_write( ); -- } -- -- if (g_log_mgr.fd < 0) { -- ret = -1; -- } -- -- log_file_unlock(g_log_mgr.lockfd); -- -- if (pthread_sigmask(SIG_SETMASK, &oldmask, NULL) != 0) { -- perror("pthread_sigmask"); -- ret = -1; -- } -- } --end: -- return ret; --} -- --__u64 sxe2_current_time_us_get(void) --{ -- struct timeval tv = {0}; -- gettimeofday(&tv, NULL); -- return (SXE2_SEC2US(tv.tv_sec) + tv.tv_usec); --} -- --__u32 log_time_to_str(char *p_buf, __u32 buf_size, struct timeval *p_time) --{ -- struct tm time; -- __u32 use_len; -- -- (void)localtime_r(&p_time->tv_sec, &time); -- -- use_len = (__u32)strftime(p_buf, buf_size, "[%Y/%m/%d.%H:%M:%S", &time); -- use_len += snprintf(p_buf + use_len, buf_size - use_len, ".%06lu]", -- p_time->tv_usec); -- -- return use_len; --} -- --__u32 log_format_prefix(FILE *p_stream, char *p_buf, __u32 buf_size, -- struct timeval *p_time_stamp, const char *bdf, -- __u32 level, const char *p_func_name, __u32 line) --{ -- static const char *log_level_str[LOG_LEVEL_MAX] = { -- "INVALID", "FATAL", "ERROR", "WARN", "INFO", "DEBUG", "TRACE", -- }; -- __u32 use_len = 0; -- int fd; -- if (p_stream == NULL) { -- fd = g_log_mgr.fd; -- } else { -- fd = fileno(p_stream); -- } -- -- use_len = log_time_to_str(p_buf, buf_size, p_time_stamp); -- if (g_log_mgr.extend) { -- use_len += snprintf(p_buf + use_len, buf_size - use_len, "[%s][%d][%d]", -- log_level_str[level], (int)getpid( ), fd); -- } else { -- use_len += snprintf(p_buf + use_len, buf_size - use_len, "[%s]", -- log_level_str[level]); -- } -- -- if (NULL != bdf) { -- use_len += snprintf(p_buf + use_len, buf_size - use_len, "[%s][%s:%u]", -- bdf, p_func_name, line); -- } else { -- use_len += snprintf(p_buf + use_len, buf_size - use_len, "[%s:%u]", -- p_func_name, line); -- } -- -- return use_len; --} -- --__u32 log_buff_fill(FILE *p_stream, char *p_buff, __u32 buff_size, -- struct timeval *p_time_stamp, const char *bdf, __u32 level, -- const char *p_func_name, __u32 line, const char *p_format, -- va_list va_list) --{ -- log_hdr_s *p_log_hdr = NULL; -- char *p_data_buf; -- __u32 log_data_cap; -- __u32 use_len; -- -- p_log_hdr = (log_hdr_s *)p_buff; -- p_data_buf = p_buff + sizeof(log_hdr_s); -- log_data_cap = -- buff_size - sizeof(log_hdr_s) - 1; -- -- p_log_hdr->ts = SXE2_SEC2US(p_time_stamp->tv_sec) + p_time_stamp->tv_usec; -- p_log_hdr->magic = LOG_MAGIC; -- -- use_len = log_format_prefix(p_stream, p_data_buf, log_data_cap, -- p_time_stamp, bdf, level, p_func_name, line); -- use_len += vsnprintf(p_data_buf + use_len, log_data_cap - use_len, p_format, -- va_list); -- use_len = use_len > log_data_cap - 1 ? log_data_cap - 1 : use_len; -- if (p_data_buf[use_len - 1] != '\n') { -- use_len += sprintf(p_data_buf + use_len, "\n"); -- } -- p_log_hdr->len = (__u16)use_len; -- -- return use_len; --} -- --void log_store(FILE *p_stream, const char *bdf, log_level_e level, -- const char *p_file, const char *p_func_name, __u32 line, -- const char *p_format, ...) --{ -- struct timeval time_stamp; -- __u64 us_now; -- char *local_log_buf = malloc(LOG_LINE_MAX); -- __u32 use_len; -- va_list va_list; -- ssize_t ret = 0; -- char *p_data_buf = local_log_buf + sizeof(log_hdr_s); -- int fd; -- -- if ((p_func_name == NULL) || (p_format == NULL)) { -- goto end; -- } -- -- us_now = sxe2_current_time_us_get( ); -- time_stamp.tv_sec = (__time_t)SXE2_US2SEC(us_now); -- time_stamp.tv_usec = (__suseconds_t)(us_now - SXE2_SEC2US(time_stamp.tv_sec)); -- va_start(va_list, p_format); -- use_len = log_buff_fill(p_stream, local_log_buf, LOG_LINE_MAX, &time_stamp, -- bdf, level, p_func_name, line, p_format, va_list); -- va_end(va_list); -- -- if (NULL == p_stream) { -- if (g_log_mgr.fd == -1) { -- goto err; -- } -- -- if (log_file_rotate( ) < 0) { -- goto err; -- } -- -- ret = write(g_log_mgr.fd, p_data_buf, use_len); -- if (ret < 1) { -- perror("write"); -- } -- } else { -- fd = fileno(p_stream); -- ret = write(fd, p_data_buf, use_len); -- if (ret < 1) { -- perror("write"); -- } -- } -- --err: -- if (g_log_mgr.serial_on) { -- printf("%s", p_data_buf); -- } --end: -- (void)p_file; -- free(local_log_buf); -- return; --} -diff -Naur rdma-core-48.0/providers/sxe2/log.h rdma-core-48.0.bak/providers/sxe2/log.h ---- rdma-core-48.0/providers/sxe2/log.h 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/log.h 1970-01-01 08:00:00.000000000 +0800 -@@ -1,245 +0,0 @@ -- --#ifndef _SXE2_LOG_H_ --#define _SXE2_LOG_H_ -- --#include --#include --#include --#include -- --#define SXE2_LOG_FD_NULL NULL --#define SXE2_LOG_FILE_NULL NULL --#define LOG_LINE_MAX (2048) --#define LOG_MAGIC (0xBAAB) -- --#define SXE2_SEC2US(_sec) ((_sec)*1000000ULL) --#define SXE2_US2SEC(_us) ((_us)/1000000ULL) -- --#define SXE2_LOG_IOLOG_ON "SXE2_LOG_IOLOG_SWITCH" --#define SXE2_VERBS_LOG_LEVEL "SXE2_RDMA_LOG_LEVEL" --#define SXE2_INVALID_BDF "ff:ff.f" -- --typedef struct log_hdr{ -- __u64 ts; -- __u32 pad; -- __u16 len; -- __u16 magic; --}log_hdr_s; -- --typedef enum log_level { -- LOG_LEVEL_INVALID, -- LOG_LEVEL_FATAL, -- LOG_LEVEL_ERROR, -- LOG_LEVEL_WARN, -- LOG_LEVEL_INFO, -- LOG_LEVEL_DEBUG, -- LOG_LEVEL_TRACE, -- LOG_LEVEL_MAX, --} log_level_e; -- --typedef struct log_persist_ops { -- void (*pre_persist)(void *persist_mgr); -- bool (*checkPersistSpace)(void *persist_mgr, __u32 persist_threshold); -- int (*flushInBlock)(void *persist_mgr, __u8 *buf, __u32 len); -- int (*flushOne)(void *persist_mgr, __u8 *buf, __u32 len); -- void (*postPersist)(void *persist_mgr); -- void (*printPersistInfo)(void *persist_mgr); -- int (*forcedExport)(void *persist_mgr); -- int (*forcedSync)(void *persist_mgr); --}log_persist_ops_s; -- --typedef struct log_mgr { -- bool serial_on; -- log_persist_ops_s *persist_ops; -- int file_size; -- bool file_cover_write_flag; -- bool extend; -- int fd; -- int lockfd; -- bool is_ut; --} log_mgr_s; -- --log_mgr_s *log_mgr_point_get(void); -- --int log_init0(bool is_ut); -- --int log_uninit0(void); -- --void log_set_serial_switch(bool on); -- --void log_file_name_set(char *p_name); -- --void log_file_store(const char *p_name); -- --void log_file_not_cover_write(void); -- --void log_file_cover_write(void); -- --int log_file_rotate(void); -- --__u64 sxe2_current_time_us_get(void); -- --__u32 log_time_to_str(char *p_buf, __u32 buf_size, struct timeval *p_time); -- --__u32 log_format_prefix(FILE *p_stream, char *p_buf, __u32 buf_size, -- struct timeval *p_time_stamp, const char *bdf, -- __u32 level, const char *p_func_name, __u32 line); -- --__u32 log_buff_fill(FILE *p_stream, char *p_buff, __u32 buff_size, -- struct timeval *p_time_stamp, const char *bdf, __u32 level, -- const char *p_func_name, __u32 line, const char *p_format, -- va_list va_list); -- --void log_store(FILE *p_stream, const char *bdf, -- log_level_e level, const char *p_file, const char *p_func_name, __u32 line, -- const char *p_format, ...) __attribute__((format(printf, 7, 8))); -- --#define SXE2_LOG_STORE(p_stream, bdf, level, fmt, ...) \ -- do { \ -- log_store(p_stream, bdf, level, \ -- (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) \ -- : __FILE__), \ -- __func__, __LINE__, fmt, ##__VA_ARGS__); \ -- } while (0) -- --#define SXE2_LOG_DEV_INVALID(p_stream, bdf, fmt, ...) \ -- SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_INVALID, fmt, ##__VA_ARGS__) -- --#define SXE2_LOG_DEV_FATAL(p_stream, bdf, fmt, ...) \ -- SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_FATAL, fmt, ##__VA_ARGS__) -- --#define SXE2_LOG_DEV_ERROR(p_stream, bdf, fmt, ...) \ -- SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__) -- --#define SXE2_LOG_DEV_WARN(p_stream, bdf, fmt, ...) \ -- SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_WARN, fmt, ##__VA_ARGS__) -- --#define SXE2_LOG_DEV_INFO(p_stream, bdf, fmt, ...) \ -- SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_INFO, fmt, ##__VA_ARGS__) -- --#define SXE2_LOG_DEV_DEBUG(p_stream, bdf, fmt, ...) \ -- SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) -- --#define SXE2_LOG_DEV_TRACE(p_stream, bdf, fmt, ...) \ -- SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_TRACE, fmt, ##__VA_ARGS__) -- --#define SXE2_VERBS_LOG_FATAL(fmt, ...) \ -- do { \ -- (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ -- (void)printf("\n"); \ -- } while (0) -- --#define SXE2_VERBS_LOG_ERROR(fmt, ...) \ -- do { \ -- (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ -- (void)printf("\n"); \ -- } while (0) -- --#define SXE2_VERBS_LOG_WARN(fmt, ...) \ -- do { \ -- (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ -- (void)printf("\n"); \ -- } while (0) -- --#define SXE2_VERBS_LOG_INFO(fmt, ...) \ -- do { \ -- (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ -- (void)printf("\n"); \ -- } while (0) -- --#define SXE2_VERBS_LOG_DEBUG(fmt, ...) \ -- do { \ -- (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ -- (void)printf("\n"); \ -- } while (0) --#define SXE2_VERBS_LOG_TRACE(fmt, ...) \ -- do { \ -- (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ -- (void)printf("\n"); \ -- } while (0) -- --#if defined SXE2_CFG_DEBUG -- --#define SXE2_VERBS_LOG_FATAL_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_FATAL) { \ -- SXE2_LOG_DEV_FATAL(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ -- } \ -- } while (0) -- --#define SXE2_VERBS_LOG_ERROR_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_ERROR) { \ -- SXE2_LOG_DEV_ERROR(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ -- } \ -- } while (0) -- --#define SXE2_VERBS_LOG_WARN_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_WARN) { \ -- SXE2_LOG_DEV_WARN(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ -- } \ -- } while (0) -- --#define SXE2_VERBS_LOG_INFO_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_INFO) { \ -- SXE2_LOG_DEV_INFO(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ -- } \ -- } while (0) -- --#define SXE2_VERBS_LOG_DEBUG_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_DEBUG) { \ -- SXE2_LOG_DEV_DEBUG(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ -- } \ -- } while (0) -- --#define SXE2_VERBS_LOG_TRACE_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_TRACE) { \ -- SXE2_LOG_DEV_TRACE(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ -- } \ -- } while (0) -- --#else -- --#define SXE2_VERBS_LOG_FATAL_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_FATAL) \ -- SXE2_VERBS_LOG_FATAL(fmt, ##__VA_ARGS__); \ -- } while (0) -- --#define SXE2_VERBS_LOG_ERROR_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_ERROR) \ -- SXE2_VERBS_LOG_ERROR(fmt, ##__VA_ARGS__); \ -- } while (0) -- --#define SXE2_VERBS_LOG_WARN_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_WARN) \ -- SXE2_VERBS_LOG_WARN(fmt, ##__VA_ARGS__); \ -- } while (0) -- --#define SXE2_VERBS_LOG_INFO_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_INFO) \ -- SXE2_VERBS_LOG_INFO(fmt, ##__VA_ARGS__); \ -- } while (0) -- --#define SXE2_VERBS_LOG_DEBUG_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_DEBUG) \ -- SXE2_VERBS_LOG_DEBUG(fmt, ##__VA_ARGS__); \ -- } while (0) -- --#define SXE2_VERBS_LOG_TRACE_BDF(fmt, ...) \ -- do { \ -- if (sctx && sctx->log_level >= LOG_LEVEL_TRACE) \ -- SXE2_VERBS_LOG_TRACE(fmt, ##__VA_ARGS__); \ -- } while (0) -- --#endif -- --#endif -diff -Naur rdma-core-48.0/providers/sxe2/Makefile rdma-core-48.0.bak/providers/sxe2/Makefile ---- rdma-core-48.0/providers/sxe2/Makefile 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/Makefile 1970-01-01 08:00:00.000000000 +0800 -@@ -1,80 +0,0 @@ --################################################################################ --# Copyright (C) --################################################################################ --#(1) 固定模式. 定义路径信息 --################################################################################ --#注: MD_TOPDIR在Makefile.define将被使用, 因此需要预先定义 --#考虑到内核模块编译, Makefile的调用路径与存储路径不一致, 因此不可以$(shell pwd)方式获取路径. --TOPDIR_MP :=. --MAKEPATH := $(abspath $(lastword $(MAKEFILE_LIST))) --CURDIR := $(shell dirname $(MAKEPATH)) --TOPDIR := $(abspath $(CURDIR)/$(TOPDIR_MP)) --MD_TOPDIR := $(TOPDIR) --MT_TOPDIR := $(TOPDIR) -- --################################################################################# --#(2) 自定义模式 --################################################################################# -- --###################################### --#(2.1) 定义模块基本信息 --####################################### -- --SRCS := $(wildcard *.c) --OBJS := $(patsubst %.c,%.o,$(SRCS)) --OBJ_S := libsxe2-rdmav34.a --#CMR_INCS := -I$(INCS_ARCH) --#CMR_INCS += -I$(INCS_ARCH)/cmr/ --CMR_INCS += -I/usr/include/libnl3 --CMR_INCS += -I/usr/include/drm --CMR_INCS += -I$(TOPDIR)/../rdma-core-46.0/build/include/ --LIBS_DIR := $(TOPDIR)/../rdma-core-46.0/build/lib/libibverbs.so --CCAN_DIR := $(TOPDIR)/../rdma-core-46.0/build/ccan/libccan_pic.a --UTIL_DIR := $(TOPDIR)/../rdma-core-46.0/build/util/librdma_util_pic.a --CMR_LIBS := -lpthread --#LIBS_DIR += $(TOPDIR_MP)/lib/lib_shca_libbase_x86_64.a -- --CFLAGS := -g -ggdb #支持调试信息 --CFLAGS += -Wall --#CFLAGS += -Werror #打开gcc所有告警选项,并将告警当作错误信息 --CFLAGS += -Wextra #打开gcc额外告警选项 --CFLAGS += -frecord-gcc-switches #后续可通过命令“readelf -p .GCC.command.line”查看编译选项参数 --CFLAGS += -fprofile-arcs -ftest-coverage #启用gcover统计代码覆盖率 -- --VERBS_CFLAGS := $(CFLAGS) #verbs使用的cflags --VERBS_CFLAGS += -std=gnu11 #使用gnu11编译 --VERBS_CFLAGS += -Wno-sign-compare #临时关闭 --VERBS_CFLAGS += -Wno-unused-parameter #临时关闭 --VERBS_CFLAGS += -Wno-missing-field-initializers #临时关闭 --####################################### --#(2.2) 定义执行目标 --####################################### -- --all:$(OBJS) -- ar -cr -o $(OBJ_S) $^ -- ranlib $(OBJ_S) -- --$(OBJS):%.o:%.c -- gcc $(VERBS_CFLAGS) $(CMR_INCS) -c -o $@ $< $(CMR_LIBS) -- --clean: -- @rm -rf *.o ut *.gcno *.gcda *.a -- @rm -rf $(TOPDIR)/rdma-core-46.0/build/* -- @rm -rf $(TOPDIR)/bin/verbs/* --# @-$(MAKE) clean -C $(TOPDIR)/lib -- @rm -rf logfile.txt result.txt -- @echo all files made removed -- --#makefile帮助文档 --help: -- @echo Usage: -- @echo "The following are some of the valid targets for this Makefile:" -- @echo " all: Compile the whole project" -- @echo " clean: Clean Compile file" -- @echo "" --.PHONY:all clean help liba --%: -- @-$(warning MAKE target needed, please get help from command 'make help') -- @$(MAKE) help -- --.DEFAULT_GOAL=all -diff -Naur rdma-core-48.0/providers/sxe2/mc.c rdma-core-48.0.bak/providers/sxe2/mc.c ---- rdma-core-48.0/providers/sxe2/mc.c 2026-05-26 10:42:01.854075208 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/mc.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,13 +0,0 @@ -- --#include "verbs.h" --#include "sxe2_common.h" -- --int sxe2_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, __u16 lid) --{ -- return ibv_cmd_attach_mcast(qp, gid, lid); --} -- --int sxe2_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, __u16 lid) --{ -- return ibv_cmd_detach_mcast(qp, gid, lid); --} -diff -Naur rdma-core-48.0/providers/sxe2/mr.c rdma-core-48.0.bak/providers/sxe2/mr.c ---- rdma-core-48.0/providers/sxe2/mr.c 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/mr.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,98 +0,0 @@ -- --#include --#include -- --#include "sxe2_common.h" --#include "sxe2_abi.h" --#include "log.h" -- --struct ibv_mr *sxe2_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, -- uint64_t hca_va, int acc) --{ -- struct sxe2_umr *mr; -- struct ibv_reg_mr cmd; -- int ret; -- struct ib_uverbs_reg_mr_resp resp; -- struct sxe2_rdma_ucontext *sctx = to_sctx(pd->context); -- -- mr = calloc(1, sizeof(*mr)); -- if (!mr) { -- SXE2_VERBS_LOG_ERROR_BDF("user alloc mr mem failed\n"); -- return NULL; -- } -- -- ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, acc, &mr->vmr, &cmd, -- sizeof(cmd), &resp, sizeof(resp)); -- if (ret) { -- free(mr); -- errno = ret; -- SXE2_VERBS_LOG_ERROR_BDF("ibv cmd reg mr err(%d)\n", ret); -- return NULL; -- } -- mr->acc_flags = acc; -- SXE2_VERBS_LOG_DEBUG_BDF("addr:%#lx, length:%zu, hca_va:%#lx,acc:%#x,handle:%#x, lkey:%#x, rkey:%#x", -- (uint64_t)addr, length, hca_va, acc, mr->vmr.ibv_mr.handle, -- mr->vmr.ibv_mr.lkey, mr->vmr.ibv_mr.rkey); -- -- return &mr->vmr.ibv_mr; --} -- --int sxe2_udereg_mr(struct verbs_mr *vmr) --{ -- int ret; -- -- ret = ibv_cmd_dereg_mr(vmr); -- if (ret) { -- SXE2_VERBS_LOG_ERROR("ibv cmd dereg mr err(%d)\n", ret); -- return ret; -- } -- -- free(vmr); -- return 0; --} -- --int sxe2_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, -- void *addr, size_t length, int access) --{ -- int ret; -- struct ibv_rereg_mr cmd; -- struct ib_uverbs_rereg_mr_resp resp; -- -- ret = ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr, -- access, pd, &cmd, sizeof(cmd), &resp, -- sizeof(resp)); -- if (ret) { -- SXE2_VERBS_LOG_ERROR("ibv cmd rereg mr err(%d)\n", ret); -- } -- SXE2_VERBS_LOG_DEBUG("addr:%#lx, length:%zu, acc:%#x,handle:%#x, lkey:%#x, rkey:%#x, flags:%#x", -- (uint64_t)addr, length, access,vmr->ibv_mr.handle, vmr->ibv_mr.lkey, -- vmr->ibv_mr.rkey, flags); -- return ret; --} -- --struct ibv_mr *sxe2_ureg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, -- size_t length, uint64_t iova, int fd, -- int acc) --{ -- struct sxe2_umr *mr; -- int ret; -- struct sxe2_rdma_ucontext *sctx = to_sctx(pd->context); -- -- mr = calloc(1, sizeof(*mr)); -- if (!mr) { -- SXE2_VERBS_LOG_ERROR_BDF("user alloc dma buf mr failed\n"); -- return NULL; -- } -- -- ret = ibv_cmd_reg_dmabuf_mr(pd, offset, length, iova, fd, acc, -- &mr->vmr); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("ibv cmd reg dma buf mr err(%d)\n", ret); -- free(mr); -- errno = ret; -- return NULL; -- } -- mr->acc_flags = acc; -- -- return &mr->vmr.ibv_mr; --} -diff -Naur rdma-core-48.0/providers/sxe2/pd.c rdma-core-48.0.bak/providers/sxe2/pd.c ---- rdma-core-48.0/providers/sxe2/pd.c 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/pd.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,87 +0,0 @@ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include "ccan/array_size.h" --#include "util/compiler.h" --#include "util/mmio.h" --#include "util/util.h" --#include --#include --#include "sxe2_common.h" --#include "log.h" -- --struct ibv_pd *sxe2_ualloc_pd(struct ibv_context *context) --{ -- struct ibv_alloc_pd cmd; -- struct sxe2_ualloc_pd_resp resp; -- struct sxe2_upd *pd; -- struct ibv_pd *ibv_pd = NULL; -- struct sxe2_rdma_ucontext *sctx; -- int ret; -- -- memset(&resp, 0, sizeof(resp)); -- if (context == NULL) { -- SXE2_VERBS_LOG_ERROR("user alloc pd context err\n"); -- errno = EINVAL; -- goto end; -- } -- -- sctx = container_of(context, struct sxe2_rdma_ucontext, ibv_ctx.context); -- -- pd = calloc(1, sizeof(*pd)); -- if (!pd) { -- SXE2_VERBS_LOG_ERROR_BDF("user pd mem alloc failed\n"); -- goto end; -- } -- -- ret = ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd), -- &resp.ibv_resp, sizeof(resp)); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("ibv cmd alloc pd err(%d)\n", ret); -- free(pd); -- errno = ret; -- goto end; -- } -- -- pd->pd_id = resp.pd_id; -- ibv_pd = &pd->ibv_pd; -- -- SXE2_VERBS_LOG_DEBUG_BDF("user pd alloc resp.pdn:%u\n", pd->pd_id); -- --end: -- return ibv_pd; --} -- --int sxe2_ufree_pd(struct ibv_pd *pd) --{ -- int ret; -- struct sxe2_upd *upd; -- -- if (pd == NULL) { -- SXE2_VERBS_LOG_ERROR("user free pd is null\n"); -- ret = EINVAL; -- goto end; -- } -- -- upd = container_of(pd, struct sxe2_upd, ibv_pd); -- ret = ibv_cmd_dealloc_pd(pd); -- if (ret) { -- SXE2_VERBS_LOG_ERROR("ibv cmd dealloc pd err(%d)\n", ret); -- goto end; -- } -- free(upd); --end: -- return ret; --} -- -diff -Naur rdma-core-48.0/providers/sxe2/qp.c rdma-core-48.0.bak/providers/sxe2/qp.c ---- rdma-core-48.0/providers/sxe2/qp.c 2026-05-26 10:42:01.855075215 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/qp.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,782 +0,0 @@ --#include --#include -- --#include "log.h" --#include "verbs.h" --#include "sxe2_common.h" --#include "sxe2_abi.h" --#include "io.h" --#include "ring.h" --#include "buf.h" -- --#define MIN_SQ_SIZE (8u) --#define MIN_RQ_SIZE (8u) --#define SQ_RSV_SIZE (8) --#define RQ_RSV_SIZE (8) --#define QP_QUANTA_SIZE (32) --#define QP_DB_NOTE_SIZE (16) --#define QP_MEMALIGN_1K (1024) -- --enum { -- SXE2_SUPPORTED_SEND_OPS_FLAGS_RC = -- IBV_QP_EX_WITH_SEND | -- IBV_QP_EX_WITH_SEND_WITH_INV | -- IBV_QP_EX_WITH_SEND_WITH_IMM | -- IBV_QP_EX_WITH_RDMA_WRITE | -- IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | -- IBV_QP_EX_WITH_RDMA_READ | -- IBV_QP_EX_WITH_LOCAL_INV | -- IBV_QP_EX_WITH_BIND_MW, -- SXE2_SUPPORTED_SEND_OPS_FLAGS_UD = -- IBV_QP_EX_WITH_SEND | -- IBV_QP_EX_WITH_SEND_WITH_IMM, --}; -- --static void sxe2_calc_sq_wqe_shift(__u32 max_sq_sge, __u32 max_inline_data, -- __u8 *wqe_shift) --{ -- *wqe_shift = WQE_SIZE_32BYTE; -- -- if (max_sq_sge > MAX_SGE_SIZE_1 || max_inline_data > MAX_INLINE_DATA_8) { -- if (max_sq_sge < MAX_SGE_SIZE_4 && max_inline_data <= MAX_INLINE_DATA_39) { -- *wqe_shift = WQE_SIZE_64BYTE; -- } else if (max_sq_sge < MAX_SGE_SIZE_8 && max_inline_data <= MAX_INLINE_DATA_101) { -- *wqe_shift = WQE_SIZE_128BYTE; -- } else { -- *wqe_shift = WQE_SIZE_256BYTE; -- } -- } --} --static void sxe2_calc_rq_wqe_shift(__u32 max_rq_sge, __u8 *wqe_shift) --{ -- *wqe_shift = WQE_SIZE_32BYTE; -- -- if (max_rq_sge > MAX_SGE_SIZE_1) { -- if (max_rq_sge < MAX_SGE_SIZE_4) { -- *wqe_shift = WQE_SIZE_64BYTE; -- } else if (max_rq_sge < MAX_SGE_SIZE_8) { -- *wqe_shift = WQE_SIZE_128BYTE; -- } else { -- *wqe_shift = WQE_SIZE_256BYTE; -- } -- } --} --static int sxe2_calc_sqdepth(struct sxe2_rdma_ucontext *sctx, -- struct sxe2_common_attrs *common_attrs, -- __u32 max_wr, __u8 wqe_shift, -- __u32 *sq_depth) --{ -- int ret = 0; -- __u32 depth; -- -- depth = sxe2_round_up_pow_2((max_wr << wqe_shift) + SQ_RSV_SIZE); -- -- depth = max(depth, MIN_SQ_SIZE); -- *sq_depth = depth; -- if (depth > common_attrs->max_hw_wq_quanta) { -- SXE2_VERBS_LOG_ERROR_BDF("sq depth illegal, sq depth(%u)\n", depth); -- ret = EINVAL; -- } -- -- return ret; --} --static int sxe2_calc_rqdepth(struct sxe2_rdma_ucontext *sctx, -- struct sxe2_common_attrs *common_attrs, -- __u32 max_wr, __u8 wqe_shift, -- __u32 *rq_depth) --{ -- int ret = 0; -- __u32 depth; -- depth = sxe2_round_up_pow_2((max_wr << wqe_shift) + RQ_RSV_SIZE); -- -- depth = max(depth, MIN_RQ_SIZE); -- *rq_depth = depth; -- if (depth > common_attrs->max_hw_rq_quanta) { -- SXE2_VERBS_LOG_ERROR_BDF("rq depth illegal, rq depth(%u)\n", depth); -- ret = EINVAL; -- } -- return ret; --} --static int sxe2_calc_sq_depth_shift(struct sxe2_rdma_ucontext *sctx, -- struct sxe2_qp_common_init_info *init_info, -- __u32 *sq_depth, __u8 *sq_shift) --{ -- int ret; -- -- sxe2_calc_sq_wqe_shift(init_info->max_sq_sge_cnt + 1, init_info->max_inline_data, -- sq_shift); -- ret = sxe2_calc_sqdepth(sctx, init_info->common_attrs, init_info->sq_size, -- *sq_shift, sq_depth); -- return ret; --} -- --static int sxe2_calc_rq_depth_shift(struct sxe2_rdma_ucontext *sctx, -- struct sxe2_qp_common_init_info *init_info, -- __u32 *rq_depth, __u8 *rq_shift) --{ -- int ret; -- -- if ((init_info->srq) || (init_info->rq_size == 0)) { -- *rq_depth = 0; -- *rq_shift = 0; -- ret = 0; -- goto end; -- } -- -- sxe2_calc_rq_wqe_shift(init_info->max_rq_sge_cnt, rq_shift); -- ret = sxe2_calc_rqdepth(sctx, init_info->common_attrs, init_info->rq_size, -- *rq_shift, rq_depth); --end: -- return ret; --} -- --static void sxe2_fill_qp(struct sxe2_uqp *uqp, -- struct sxe2_qp_common_init_info *init_info, -- struct sxe2_ucreate_qp_resp resp, -- struct ibv_qp_init_attr_ex *attr_ex) --{ -- __u32 sq_ring_size; -- uqp->sq_sig_all = (__u32)attr_ex->sq_sig_all; -- uqp->qp_type = attr_ex->qp_type; -- uqp->qp.back_qp = uqp; -- uqp->qp.lock = &uqp->lock; -- uqp->verbs_qp.qp.qp_num = resp.qpn; -- uqp->send_cq = container_of(attr_ex->send_cq, struct sxe2_ucq, verbs_cq.cq); -- uqp->recv_cq = container_of(attr_ex->recv_cq, struct sxe2_ucq, verbs_cq.cq); -- uqp->send_cq->uqp = uqp; -- uqp->recv_cq->uqp = uqp; -- -- uqp->qp.common_attrs = init_info->common_attrs; -- uqp->qp.sq_base = init_info->sq; -- uqp->qp.rq_base = init_info->rq; -- uqp->qp.srq = init_info->srq ? init_info->srq : NULL; -- uqp->qp.doorbell_note = init_info->doorbell_note; -- uqp->qp.sq_wrtrk_array = init_info->sq_wrtrk_array; -- uqp->qp.rq_wrid_array = init_info->rq_wrid_array; -- uqp->qp.qp_db_no_llwqe = init_info->qp_db_no_llwqe; -- uqp->qp.qp_caps = resp.qp_caps; -- uqp->qp.qpn = resp.qpn; -- uqp->qp.llwqe_mode = false; -- uqp->qp.max_sq_sge_cnt = init_info->max_sq_sge_cnt; -- uqp->qp.max_rq_sge_cnt = init_info->max_rq_sge_cnt; -- uqp->qp.sq_size = init_info->sq_size; -- uqp->qp.rq_size = init_info->rq_size; -- sq_ring_size = uqp->qp.sq_size << init_info->sq_shift; -- SXE2_RING_INIT(uqp->qp.sq_ring, sq_ring_size); -- SXE2_RING_INIT(uqp->qp.initial_ring, sq_ring_size); -- uqp->qp.swqe_polarity = 0; -- uqp->qp.rwqe_polarity = 0; -- uqp->qp.max_inline_data = init_info->max_inline_data; -- uqp->qp.rq_wqe_size = init_info->rq_shift; -- SXE2_RING_INIT(uqp->qp.rq_ring, uqp->qp.rq_size); -- uqp->qp.rq_wqe_size_multiplier = init_info->srq ? 0 : 1 << init_info->rq_shift; --} -- --static void sxe2_fill_wr_builders_rc_xrc_dc(struct ibv_qp_ex *ibqp) --{ -- ibqp->wr_send = sxe2_wr_send_rc_ud; -- ibqp->wr_send_imm = sxe2_wr_send_imm_rc_ud; -- ibqp->wr_send_inv = sxe2_wr_send_inv_rc; -- ibqp->wr_rdma_write = sxe2_wr_rdma_write_rc; -- ibqp->wr_rdma_write_imm = sxe2_wr_rdma_write_imm_rc; -- ibqp->wr_rdma_read = sxe2_wr_rdma_read_rc; -- ibqp->wr_bind_mw = sxe2_wr_bind_mw_rc; -- ibqp->wr_local_inv = sxe2_wr_local_inv_rc; --} -- --static void sxe2_fill_wr_setters_rc_ud(struct ibv_qp_ex *ibqp) --{ -- ibqp->wr_set_sge = sxe2_wr_set_sge_rc_ud; -- ibqp->wr_set_sge_list = sxe2_wr_set_sge_list_rc_ud; -- ibqp->wr_set_inline_data = sxe2_wr_set_inline_data_rc_ud; -- ibqp->wr_set_inline_data_list = sxe2_wr_set_inline_data_list_rc_ud; --} -- --static void sxe2_fill_wr_builders_ud(struct ibv_qp_ex *ibqp) --{ -- ibqp->wr_send = sxe2_wr_send_rc_ud; -- ibqp->wr_send_imm = sxe2_wr_send_imm_rc_ud; --} -- --static int sxe2_qp_fill_wr_func(struct sxe2_rdma_ucontext *sctx, struct sxe2_uqp *uqp, -- const struct ibv_qp_init_attr_ex *attr) --{ -- struct ibv_qp_ex *ibqp = &uqp->verbs_qp.qp_ex; -- uint64_t ib_ops = attr->send_ops_flags; -- int ret = 0; -- -- SXE2_VERBS_LOG_DEBUG_BDF("qp_type:%u, ops:%#llx\n", attr->qp_type, (__u64)ib_ops); -- -- ibqp->wr_start = sxe2_wr_start; -- ibqp->wr_complete = sxe2_wr_complete; -- ibqp->wr_abort = sxe2_wr_abort; -- -- if (ib_ops & IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP || -- ib_ops & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD) { -- ret = EOPNOTSUPP; -- SXE2_VERBS_LOG_ERROR_BDF("atomics not support\n"); -- goto end; -- } -- -- switch (attr->qp_type) { -- case IBV_QPT_RC: -- if (ib_ops & ~SXE2_SUPPORTED_SEND_OPS_FLAGS_RC) { -- SXE2_VERBS_LOG_ERROR_BDF("not support ops:%#llx", (__u64)ib_ops); -- ret = EOPNOTSUPP; -- goto end; -- } -- -- sxe2_fill_wr_builders_rc_xrc_dc(ibqp); -- sxe2_fill_wr_setters_rc_ud(ibqp); -- -- break; -- -- case IBV_QPT_UD: -- if (ib_ops & ~SXE2_SUPPORTED_SEND_OPS_FLAGS_UD) { -- SXE2_VERBS_LOG_ERROR_BDF("not support ops:%#llx", (__u64)ib_ops); -- ret = EOPNOTSUPP; -- goto end; -- } -- -- sxe2_fill_wr_builders_ud(ibqp); -- sxe2_fill_wr_setters_rc_ud(ibqp); -- ibqp->wr_set_ud_addr = sxe2_wr_set_ud_addr; -- break; -- -- default: -- ret = EOPNOTSUPP; -- break; -- } -- --end: -- return ret; --} -- --static const char *sxe2_qptype2key(enum ibv_qp_type type) --{ -- const char *key = "HUGE_NA"; -- -- switch (type) { -- case IBV_QPT_RC: -- key = "HUGE_RC"; -- break; -- case IBV_QPT_UD: -- key = "HUGE_UD"; -- break; -- default: -- key = "HUGE_NA"; -- } -- -- return key; --} -- --static bool sxe2_is_huge_buf(const char *key) --{ -- bool ret = false; -- char *env = getenv(key); -- -- if (env && (!strncmp(env, "y", strlen(env)))) { -- ret = true; -- } -- -- return ret; --} --static int sxe2_alloc_qp_buf(struct sxe2_rdma_ucontext *sctx, sxe2_buf_s *buf, size_t size, -- enum ibv_qp_type qp_type) --{ -- int ret; -- sxe2_alloc_type_e type; -- sxe2_alloc_type_e default_type = SXE2_ALLOC_TYPE_ANON; -- const char *qp_huge_key; -- long page_size = 0; -- -- qp_huge_key = sxe2_qptype2key(qp_type); -- SXE2_VERBS_LOG_DEBUG_BDF("in sxe2_alloc_qp_buf qp_huge_key(%s)", qp_huge_key); -- if (sxe2_is_huge_buf(qp_huge_key)) { -- default_type = SXE2_ALLOC_TYPE_HUGE; -- } -- -- sxe2_alloc_type_get(SXE2_QP_PREFIX, &type, default_type); -- -- page_size = sysconf(_SC_PAGE_SIZE); -- if (page_size < 0) { -- SXE2_VERBS_LOG_ERROR_BDF("get system page size failed."); -- ret = EPERM; -- goto end; -- } -- ret = sxe2_prefered_buf_alloc( -- sctx, buf, size, (size_t)page_size, type); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("qp buf alloc err ret (%d)", ret); -- goto end; -- } -- -- memset(buf->buf, 0, buf->length); -- --end: -- return ret; --} -- --static void sxe2_free_qp_buf(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf) --{ -- sxe2_actual_buf_free(ctx, buf); --} -- --static struct ibv_qp *ucreate_qp(struct ibv_context *context, struct ibv_qp_init_attr_ex *attr_ex) --{ -- struct sxe2_qp_common_init_info init_info = {}; -- struct sxe2_ucreate_qp cmd = {}; -- struct sxe2_ucreate_qp_resp resp = {}; -- struct sxe2_uqp *uqp; -- int err; -- struct ibv_qp *ibqp = NULL; -- struct sxe2_rdma_ucontext *sctx = to_sctx(context); -- struct sxe2_common_attrs *common_attrs = &sctx->uk_attrs; -- -- if (attr_ex->qp_type != IBV_QPT_RC && attr_ex->qp_type != IBV_QPT_UD) { -- errno = EOPNOTSUPP; -- SXE2_VERBS_LOG_ERROR_BDF("qp type illegal!, qp_type:%d\n", attr_ex->qp_type); -- goto end; -- } -- if (attr_ex->cap.max_send_sge > common_attrs->max_hw_wq_frags || -- attr_ex->cap.max_recv_sge > common_attrs->max_hw_wq_frags || -- attr_ex->cap.max_send_wr > common_attrs->max_hw_wq_quanta || -- attr_ex->cap.max_recv_wr > common_attrs->max_hw_rq_quanta || -- attr_ex->cap.max_inline_data > common_attrs->max_hw_inline) { -- errno = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF("qp request caps illegal!, max_send_sge(%u) max_recv_sge(%u)" -- "max_send_wr(%u) max_recv_wr(%u) max_inline_data(%u)" -- "dev hw_attrs: max_hw_inline(%d), max_hw_wq_frags(%d)," -- "max_hw_wq_quanta(%d), max_hw_rq_quanta(%d),\n", attr_ex->cap.max_send_sge, -- attr_ex->cap.max_recv_sge, attr_ex->cap.max_send_wr, attr_ex->cap.max_recv_wr, -- attr_ex->cap.max_inline_data, common_attrs->max_hw_inline, -- common_attrs->max_hw_wq_frags, common_attrs->max_hw_wq_quanta, -- common_attrs->max_hw_rq_quanta); -- goto end; -- } -- -- SXE2_VERBS_LOG_INFO_BDF("qp caps, max_send_sge(%u) max_recv_sge(%u)" -- "max_send_wr(%u) max_recv_wr(%u) max_inline_data(%u)\n", attr_ex->cap.max_send_sge, -- attr_ex->cap.max_recv_sge, attr_ex->cap.max_send_wr, attr_ex->cap.max_recv_wr, -- attr_ex->cap.max_inline_data); -- init_info.sq_size = attr_ex->cap.max_send_wr; -- init_info.rq_size = attr_ex->cap.max_recv_wr; -- -- init_info.max_sq_sge_cnt = attr_ex->cap.max_send_sge; -- init_info.max_rq_sge_cnt = attr_ex->cap.max_recv_sge; -- init_info.max_inline_data = attr_ex->cap.max_inline_data; -- init_info.common_attrs = common_attrs; -- if (attr_ex->srq) { -- init_info.srq = &((to_usrq(attr_ex->srq))->srq_verbs); -- } -- -- err = sxe2_calc_sq_depth_shift(sctx, &init_info, &init_info.sq_depth, &init_info.sq_shift); -- if (err) { -- errno = err; -- SXE2_VERBS_LOG_ERROR_BDF("sxe2_calc_sq_depth_shift fail, err(%d)\n", err); -- goto end; -- } -- err = sxe2_calc_rq_depth_shift(sctx, &init_info, &init_info.rq_depth, &init_info.rq_shift); -- if (err) { -- errno = err; -- SXE2_VERBS_LOG_ERROR_BDF("sxe2_calc_rq_depth_shift fail, err(%d)\n", err); -- goto end; -- } -- uqp = memalign(QP_MEMALIGN_1K, sizeof(*uqp)); -- if (!uqp) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("memalign uqp fail!\n"); -- goto end; -- } -- memset(uqp, 0, sizeof(*uqp)); -- if (pthread_spin_init(&uqp->lock, PTHREAD_PROCESS_PRIVATE)) { -- errno = EBUSY; -- SXE2_VERBS_LOG_ERROR_BDF("pthread_spin_init fail!\n"); -- goto err_free_qp; -- } -- uqp->qp.common_attrs = common_attrs; -- init_info.sq_size = init_info.sq_depth >> init_info.sq_shift; -- init_info.rq_size = init_info.rq_depth >> init_info.rq_shift; -- -- init_info.qp_db_no_llwqe = (__u32 *)sctx->qp_db_no_llwqe; -- init_info.sq_wrtrk_array = calloc(init_info.sq_depth, sizeof(*init_info.sq_wrtrk_array)); -- if (!init_info.sq_wrtrk_array) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("calloc sq_wrtrk_array fail!\n"); -- goto err_destroy_lock; -- } -- -- if (init_info.rq_depth != 0) { -- init_info.rq_wrid_array = calloc(init_info.rq_depth, sizeof(*init_info.rq_wrid_array)); -- if (!init_info.rq_wrid_array) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("calloc rq_wrid_array fail!\n"); -- goto err_free_sq_wrtrk; -- } -- } else { -- init_info.rq_wrid_array = NULL; -- } -- -- uqp->buf_size = (init_info.sq_depth + init_info.rq_depth) * QP_QUANTA_SIZE; -- -- if (sxe2_alloc_qp_buf(sctx, &uqp->buf, uqp->buf_size, attr_ex->qp_type)) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("total_size %zu", uqp->buf_size); -- goto err_free_rq_wrid; -- } -- init_info.rq = uqp->buf.buf; -- if (!init_info.rq) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("alloc qp buffer fail!\n"); -- goto err_free_rq_wrid; -- } -- memset(init_info.rq, 0, uqp->buf_size); -- -- init_info.doorbell_note = sxe2_alloc_hw_buf(QP_DB_NOTE_SIZE); -- if (!init_info.doorbell_note) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("alloc shadow area buffer fail!\n"); -- goto err_free_vmap_qp; -- } -- memset(init_info.doorbell_note, 0, QP_DB_NOTE_SIZE); -- -- init_info.sq = &init_info.rq[init_info.rq_depth]; -- -- if (attr_ex->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { -- err = sxe2_qp_fill_wr_func(sctx, uqp, attr_ex); -- if (err) { -- errno = err; -- SXE2_VERBS_LOG_ERROR_BDF("fill_wr_func fail, err:%d", err); -- goto err_free_vmap_doorbell_note; -- } -- } -- -- SXE2_VERBS_LOG_INFO_BDF("rq depth %u shift %u", init_info.rq_depth, init_info.rq_shift); -- cmd.sq_depth = init_info.sq_depth; -- cmd.rq_depth = init_info.rq_depth; -- cmd.sq_shift = init_info.sq_shift; -- cmd.rq_shift = init_info.rq_shift; -- cmd.user_wqe_bufs = (__u64)(init_info.rq); -- cmd.doorbell_note = (__u64)(init_info.doorbell_note); -- cmd.user_compl_ctx = (__u64)(&uqp->qp); -- err = ibv_cmd_create_qp_ex(context, &uqp->verbs_qp, attr_ex, &(cmd.ibv_cmd), -- sizeof(cmd), &(resp.ibv_resp), -- sizeof(struct sxe2_ucreate_qp_resp)); -- if (err) { -- errno = err; -- SXE2_VERBS_LOG_ERROR_BDF("ibv_cmd_create_qp_ex fail!, err(%d)\n", err); -- goto err_free_vmap_doorbell_note; -- } -- -- sxe2_fill_qp(uqp, &init_info, resp, attr_ex); -- -- attr_ex->cap.max_send_wr = (init_info.sq_depth - SQ_RSV_SIZE) >> init_info.sq_shift; -- if ((init_info.srq) || (init_info.rq_depth == 0)) { -- attr_ex->cap.max_recv_wr = 0; -- } else { -- attr_ex->cap.max_recv_wr = (init_info.rq_depth - RQ_RSV_SIZE) >> init_info.rq_shift; -- } -- -- if (attr_ex->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { -- uqp->verbs_qp.comp_mask |= VERBS_QP_EX; -- } -- -- ibqp = &uqp->verbs_qp.qp; -- goto end; --err_free_vmap_doorbell_note: -- sxe2_free_hw_buf(init_info.doorbell_note, QP_DB_NOTE_SIZE); --err_free_vmap_qp: -- sxe2_free_qp_buf(sctx, &uqp->buf); --err_free_rq_wrid: -- if (init_info.rq_wrid_array) { -- free(init_info.rq_wrid_array); -- init_info.rq_wrid_array = NULL; -- } --err_free_sq_wrtrk: -- free(init_info.sq_wrtrk_array); -- init_info.sq_wrtrk_array = NULL; --err_destroy_lock: -- pthread_spin_destroy(&uqp->lock); --err_free_qp: -- if (uqp) { -- free(uqp); -- uqp = NULL; -- } --end: -- return ibqp; --} -- --struct ibv_qp *sxe2_ucreate_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) --{ -- struct ibv_qp *qp = NULL; -- struct ibv_qp_init_attr_ex attr_ex; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- if (!pd || !attr) { -- errno = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF("inval param, pd(%p), attr(%p)", pd, attr); -- goto out; -- } -- -- sctx = to_sctx(pd->context); -- -- SXE2_VERBS_LOG_DEBUG_BDF( -- "type:%u, max_inline_data:%u, max_send_wr:%u" -- "max_send_sge:%u, max_send_recv_wr:%u, max_recv_sge:%u, pd_handle:%u", -- attr->qp_type, attr->cap.max_inline_data, attr->cap.max_send_wr, -- attr->cap.max_send_sge, attr->cap.max_recv_wr, attr->cap.max_recv_sge, -- pd->handle); -- -- memset(&attr_ex, 0, sizeof(attr_ex)); -- memcpy(&attr_ex, attr, sizeof(*attr)); -- -- attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD; -- attr_ex.pd = pd; -- qp = ucreate_qp(pd->context, &attr_ex); -- if (qp) { -- memcpy(attr, &attr_ex, sizeof(*attr)); -- } -- --out: -- return qp; --} -- --struct ibv_qp *sxe2_ucreate_qp_ex(struct ibv_context *context, -- struct ibv_qp_init_attr_ex *attr_ex) --{ -- struct ibv_qp *qp = NULL; -- struct sxe2_rdma_ucontext *sctx = NULL; -- -- if (!context || !attr_ex) { -- errno = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF("inval param, context(%p), attr_ex(%p)", context, attr_ex); -- goto out; -- } -- -- sctx = to_sctx(context); -- -- SXE2_VERBS_LOG_DEBUG_BDF( -- "type:%u, max_inline_data:%u, max_send_wr:%u, " -- "max_send_sge:%u, max_send_recv_wr:%u, " -- "max_recv_sge:%u, pd_handle:%u, comp_mask:%#x," -- "create_flags:%#x, send_flags:%#lx", -- attr_ex->qp_type, attr_ex->cap.max_inline_data, -- attr_ex->cap.max_send_wr, attr_ex->cap.max_send_sge, -- attr_ex->cap.max_recv_wr, attr_ex->cap.max_recv_sge, -- attr_ex->pd->handle, attr_ex->comp_mask, attr_ex->create_flags, -- attr_ex->send_ops_flags); -- qp = ucreate_qp(context, attr_ex); -- --out: -- return qp; --} -- --int sxe2_udestroy_qp(struct ibv_qp *ibqp) --{ -- struct sxe2_uqp *uqp; --#ifdef SXE2_CFG_DEBUG -- struct sxe2_qp_common *qp; --#endif -- struct sxe2_rdma_ucontext *sctx = container_of(ibqp->context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- int ret = 0; -- -- uqp = container_of(ibqp, struct sxe2_uqp, verbs_qp.qp); --#ifdef SXE2_CFG_DEBUG -- qp = &uqp->qp; --#endif -- ret = pthread_spin_destroy(&uqp->lock); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("pthread_spin_destroy fail!\n"); -- } -- -- ret = ibv_cmd_destroy_qp(ibqp); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("ibv_cmd_destroy_qp fail!, ret(%d)\n", ret); -- } -- -- if (uqp->qp.verbs_llwqe) { -- db_uput_qp_llwqe(sctx, uqp->qp.verbs_llwqe); -- } -- if (uqp->send_cq) { -- sxe2_clean_cqes(&uqp->qp, uqp->send_cq, SQ_CQ); -- } -- if (uqp->recv_cq && uqp->recv_cq != uqp->send_cq) { -- sxe2_clean_cqes(&uqp->qp, uqp->recv_cq, RQ_CQ); -- } -- if (uqp->qp.sq_wrtrk_array) { -- free(uqp->qp.sq_wrtrk_array); -- uqp->qp.sq_wrtrk_array = NULL; -- } -- if (uqp->qp.rq_wrid_array) { -- free(uqp->qp.rq_wrid_array); -- uqp->qp.rq_wrid_array = NULL; -- } -- sxe2_free_qp_buf(sctx, &uqp->buf); -- sxe2_free_hw_buf(uqp->qp.doorbell_note, QP_DB_NOTE_SIZE); -- uqp->qp.doorbell_note = NULL; --#ifdef SXE2_CFG_DEBUG -- SXE2_VERBS_LOG_DEBUG_BDF("qp [%d] io statistics:\n"\ -- "total_sqe_cnt : %u\n"\ -- "total_rqe_cnt : %u\n"\ -- "finished_sqe_cnt : %u\n"\ -- "finished_rqe_cnt : %u\n"\ -- "finished_rqe_insrq_cnt : %u\n"\ -- "flushed_sq_cnt : %u\n"\ -- "flushed_rq_cnt : %u\n"\ -- "cleaned_sq_cnt : %u\n"\ -- "cleaned_rq_cnt : %u\n"\ -- "cleaned_flushsq_cnt : %u\n"\ -- "cleaned_flushrq_cnt : %u\n"\ -- "total_signal_cnt : %u\n"\ -- "send_cnt : %u\n"\ -- "send_inv_cnt : %u\n"\ -- "read_cnt : %u\n"\ -- "write_cnt : %u\n"\ -- "local_inv_cnt : %u\n"\ -- "bind_mw_cnt : %u\n"\ -- "fast_regmr_cnt : %u\n"\ -- "last_send_sqwrid : %llu\n"\ -- "last_rcvd_sqwrid : %llu\n"\ -- "last_send_rqwrid : %llu\n"\ -- "last_rcvd_rqwrid : %llu\n", -- qp->qpn, -- qp->statistics.total_sqe_cnt, -- qp->statistics.total_rqe_cnt, -- qp->statistics.finished_sqe_cnt, -- qp->statistics.finished_rqe_cnt, -- qp->statistics.finished_rqe_insrq_cnt, -- qp->statistics.flushed_sq_cnt, -- qp->statistics.flushed_rq_cnt, -- qp->statistics.cleaned_sq_cnt, -- qp->statistics.cleaned_rq_cnt, -- qp->statistics.cleaned_flushsq_cnt, -- qp->statistics.cleaned_flushrq_cnt, -- qp->statistics.total_signal_cnt, -- qp->statistics.send_cnt, -- qp->statistics.send_inv_cnt, -- qp->statistics.read_cnt, -- qp->statistics.write_cnt, -- qp->statistics.local_inv_cnt, -- qp->statistics.bind_mw_cnt, -- qp->statistics.fast_regmr_cnt, -- qp->statistics.last_send_sqwrid, -- qp->statistics.last_rcvd_sqwrid, -- qp->statistics.last_send_rqwrid, -- qp->statistics.last_rcvd_rqwrid); --#endif -- if (uqp) { -- free(uqp); -- uqp = NULL; -- } -- return ret; --} -- --int sxe2_umodify_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask) --{ -- struct sxe2_umodify_qp cmd = {}; -- struct sxe2_umodify_qp_resp resp = {}; -- struct sxe2_uqp *uqp; -- struct sxe2_verbs_llwqe *ll_wqe = NULL; -- struct sxe2_rdma_ucontext *sctx = container_of(ibqp->context, struct sxe2_rdma_ucontext, -- ibv_ctx.context); -- bool need_alloc_page = false; -- __u8 alloc_page_type = 0; -- __u32 db_mmap_size; -- __u32 db_page_id; -- __u64 db_mmap_offset; -- int ret; -- SXE2_VERBS_LOG_DEBUG_BDF("uqp:umodify qp %u start\n", ibqp->qp_num); -- if (attr_mask & IBV_QP_STATE) { -- SXE2_VERBS_LOG_DEBUG_BDF("uqp %u :umodify qp cur start %u next state %u\n", -- ibqp->qp_num, ibqp->state, attr->qp_state); -- } -- uqp = container_of(ibqp, struct sxe2_uqp, verbs_qp.qp); -- if (sctx->ll_mode && -- attr_mask & IBV_QP_STATE && attr->qp_state == IBV_QPS_RTS) { -- ll_wqe = db_uget_qp_llwqe(ibqp->context, &need_alloc_page, &alloc_page_type); -- if (ll_wqe) { -- uqp->qp.verbs_llwqe = ll_wqe; -- uqp->qp.push_wqe = ll_wqe->wqe_addr; -- uqp->qp.push_db = ll_wqe->db_addr; -- uqp->qp.llwqe_enable = true; -- uqp->qp.llwqe_mode = true; -- cmd.llwqe_enable = true; -- cmd.llwqe_page_index = ll_wqe->db_page_id; -- } -- if (need_alloc_page) { -- cmd.new_page_alloc = true; -- } -- } else if (attr_mask & IBV_QP_STATE && attr->qp_state == IBV_QPS_RESET) { -- if (uqp->send_cq) { -- sxe2_clean_cqes(&uqp->qp, uqp->send_cq, SQ_CQ); -- } -- if (uqp->recv_cq && uqp->recv_cq != uqp->send_cq) { -- sxe2_clean_cqes(&uqp->qp, uqp->recv_cq, RQ_CQ); -- } -- -- uqp->qp.sq_ring.head = 0; -- uqp->qp.sq_ring.tail = 0; -- uqp->qp.rq_ring.head = 0; -- uqp->qp.rq_ring.tail = 0; -- uqp->qp.initial_ring.head = 0; -- uqp->qp.initial_ring.tail = 0; -- if (uqp->qp.verbs_llwqe) { -- db_uput_qp_llwqe(sctx, uqp->qp.verbs_llwqe); -- uqp->qp.verbs_llwqe = NULL; -- } -- uqp->qp.swqe_polarity = 0; -- uqp->qp.rwqe_polarity = 0; -- uqp->qp.llwqe_mode = false; -- memset(uqp->qp.rq_base, 0, uqp->buf_size); -- } -- if ((attr_mask & IBV_QP_STATE) && -- (ibqp->state == IBV_QPS_SQD) -- &&(attr->qp_state == IBV_QPS_RTS)) { -- if (uqp->qp.sq_ring.head == uqp->qp.sq_ring.tail) { -- sxe2_push_nop_wqe(&uqp->qp); -- SXE2_VERBS_LOG_DEBUG_BDF("uqp %u:before modify sqd to rts push nop head=%u tail=%u\n", -- ibqp->qp_num, uqp->qp.sq_ring.head, uqp->qp.sq_ring.tail); -- } -- } -- -- ret = ibv_cmd_modify_qp_ex(ibqp, attr, attr_mask, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("ibv_modify_qp ret(%d)\n", ret); -- } -- if (sctx->ll_mode && need_alloc_page && -- attr_mask & IBV_QP_STATE && attr->qp_state == IBV_QPS_RTS) { -- db_mmap_size = resp.db_mmap_size; -- db_page_id = resp.db_page_id; -- db_mmap_offset = resp.db_mmap_offset; -- if (!ll_wqe) { -- ll_wqe = alloc_db_page_and_get_qp_llwqe(ibqp->context, db_mmap_size, db_page_id, db_mmap_offset, alloc_page_type); -- if (ll_wqe) { -- uqp->qp.verbs_llwqe = ll_wqe; -- uqp->qp.push_wqe = ll_wqe->wqe_addr; -- uqp->qp.push_db = ll_wqe->db_addr; -- uqp->qp.llwqe_enable = true; -- uqp->qp.llwqe_mode = true; -- } else { -- uqp->qp.llwqe_enable = false; -- uqp->qp.llwqe_mode = false; -- } -- } else { -- if (!db_ualloc_page_and_llwqes(ibqp->context, false, db_mmap_size, db_page_id, db_mmap_offset)) { -- SXE2_VERBS_LOG_WARN_BDF("LLWQE:Alloc new shared db page failed"); -- } -- } -- } -- -- uqp->qp.rd_fence_rate = resp.rd_fence_rate; -- SXE2_VERBS_LOG_DEBUG_BDF("uqp %u:modify qp finish\n", ibqp->qp_num); -- return ret; --} -- --int sxe2_uquery_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask, -- struct ibv_qp_init_attr *init_attr) --{ -- struct ibv_query_qp cmd; -- -- return ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof(cmd)); --} -diff -Naur rdma-core-48.0/providers/sxe2/readme.txt rdma-core-48.0.bak/providers/sxe2/readme.txt ---- rdma-core-48.0/providers/sxe2/readme.txt 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/readme.txt 1970-01-01 08:00:00.000000000 +0800 -@@ -1,15 +0,0 @@ --ah.c:ah用户态接口实现 --ah.h:ah用户态接口声明 --cq.c:cq用户态创建、删除、查询、修改接口实现 --db.c:doorbell资源管理 --sxe2_abi.h:基于ibv标准req、resp结构体扩展sxe2厂商字段 --sxe2_private_verbs.h:厂商私有接口声明 --qp.c :qp用户态创建、删除、查询、修改接口实现 --srq.c :srq用户态创建、删除、查询、修改接口、post_srq_recv实现 --verbs.c :用户态厂商驱动注册到libibverbs的接口函数,qp.c、srq.c是各个接口的具体实现 --io.h:数据面相关结构体声明,如wqe结构体 --io.c:post_send实现、post_recv实现、post_srq_recv实现、poll_cq实现、arm_cq实现、cq_event实现 --mr.c:mr用户态接口实现 --mw.c:mw用户态接口实现 --pd.c:pd用户态接口实现 --mc.c:组播用户态接口实现 -diff -Naur rdma-core-48.0/providers/sxe2/ring.h rdma-core-48.0.bak/providers/sxe2/ring.h ---- rdma-core-48.0/providers/sxe2/ring.h 2026-05-26 10:42:01.854075208 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/ring.h 1970-01-01 08:00:00.000000000 +0800 -@@ -1,127 +0,0 @@ --#ifndef __RING_H_ --#define __RING_H_ -- --#define SXE2_RING_INIT(_ring, _size) \ -- { \ -- (_ring).head = 0; \ -- (_ring).tail = 0; \ -- (_ring).size = (_size); \ -- } --#define SXE2_RING_SIZE(_ring) ((_ring).size) --#define SXE2_RING_CURRENT_HEAD(_ring) ((_ring).head) --#define SXE2_RING_CURRENT_TAIL(_ring) ((_ring).tail) -- --#define SXE2_RING_USED_QUANTA(_ring) \ -- ( \ -- (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \ -- ) --#define SXE2_RING_USED_QUANTA_PAD(_ring) \ -- ( \ -- (((_ring).head + (_ring).size - (_ring).tail - 1) % (_ring).size) \ -- ) -- --#define SXE2_RING_FREE_QUANTA(_ring) \ -- ( \ -- ((_ring).size - SXE2_RING_USED_QUANTA(_ring) - 1) \ -- ) -- --#define SXE2_SQ_RING_FREE_QUANTA(_ring) \ -- ( \ -- ((_ring).size - SXE2_RING_USED_QUANTA(_ring) - 1) \ -- ) -- --#define SXE2_RING_FULL_ERR(_ring) \ -- ( \ -- (SXE2_RING_USED_QUANTA(_ring) == ((_ring).size - 1)) \ -- ) -- --#define SXE2_SQ_RING_FULL_ERR(_ring) \ -- ( \ -- (SXE2_RING_USED_QUANTA(_ring) == ((_ring).size - 1)) \ -- ) -- --#define SXE2_RING_MORE_WORK(_ring) \ -- ( \ -- (SXE2_RING_USED_QUANTA(_ring) != 0) \ -- ) --#define SXE2_RING_MORE_WORK_PAD(_ring) \ -- ( \ -- (SXE2_RING_USED_QUANTA_PAD(_ring) != 0) \ -- ) -- --#define SXE2_RING_MOVE_HEAD(_ring, _retcode) \ -- { \ -- register __u32 size; \ -- size = (_ring).size; \ -- if (!SXE2_RING_FULL_ERR(_ring)) { \ -- (_ring).head = ((_ring).head + 1) % size; \ -- (_retcode) = 0; \ -- } else { \ -- (_retcode) = ENOMEM; \ -- } \ -- } -- --#define SXE2_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ -- { \ -- register __u32 size; \ -- size = (_ring).size; \ -- if ((SXE2_RING_USED_QUANTA(_ring) + (_count)) < size) { \ -- (_ring).head = ((_ring).head + (_count)) % size; \ -- (_retcode) = 0; \ -- } else { \ -- (_retcode) = ENOMEM; \ -- } \ -- } -- --#define SXE2_SQ_RING_MOVE_HEAD(_ring, _retcode) \ -- { \ -- register __u32 size; \ -- size = (_ring).size; \ -- if (!SXE2_SQ_RING_FULL_ERR(_ring)) { \ -- (_ring).head = ((_ring).head + 1) % size; \ -- (_retcode) = 0; \ -- } else { \ -- (_retcode) = ENOMEM; \ -- } \ -- } -- --#define SXE2_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ -- { \ -- register __u32 size; \ -- size = (_ring).size; \ -- if ((SXE2_RING_USED_QUANTA(_ring) + (_count)) < (size - 1)) { \ -- (_ring).head = ((_ring).head + (_count)) % size; \ -- (_retcode) = 0; \ -- } else { \ -- (_retcode) = ENOMEM; \ -- } \ -- } -- --#define SXE2_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ -- (_ring).head = ((_ring).head + (_count)) % (_ring).size -- --#define SXE2_RING_MOVE_TAIL(_ring) \ -- (_ring).tail = ((_ring).tail + 1) % (_ring).size -- --#define SXE2_RING_MOVE_HEAD_NOCHECK(_ring) \ -- (_ring).head = ((_ring).head + 1) % (_ring).size -- --#define SXE2_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ -- (_ring).tail = ((_ring).tail + (_count)) % (_ring).size -- --#define SXE2_RING_SET_TAIL(_ring, _pos) \ -- (_ring).tail = (_pos) % (_ring).size -- --#define SXE2_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \ -- { \ -- index = SXE2_RING_CURRENT_HEAD(_ring); \ -- SXE2_RING_MOVE_HEAD(_ring, _retcode); \ -- } -- --#define SXE2_CQP_INIT_WQE(wqe) memset(wqe, 0, 64) -- --#define SXE2_GET_CURRENT_CQ_ELEM(_cq) \ -- ( \ -- (_cq)->cq_base[SXE2_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \ -- ) --#endif -diff -Naur rdma-core-48.0/providers/sxe2/srq.c rdma-core-48.0.bak/providers/sxe2/srq.c ---- rdma-core-48.0/providers/sxe2/srq.c 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/srq.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,451 +0,0 @@ -- --#include --#include --#include "log.h" --#include "verbs.h" --#include "sxe2_common.h" --#include "sxe2_abi.h" --#include "ring.h" --#include "io.h" --#include "buf.h" -- --#define SXE2_VERBS_SRQE_MIN_SIZE (32) --#define SXE2_SRQE_MAX_FRAG_CNT_SHIFT \ -- (2) --#define SXE2_SRQE_MAX_FRAG_CNT_SUB (1) --#define SXE2_SRQ_SIZE_MUL_SHIFT (1) --#define SXE2_SRQ_POLARITY_INIT (0) --#define SXE2_SRQ_DB_NOTE_SIZE (16) --#define SXE2_SRQ_MAX_BUF_SIZE (2 * 1024 * 1024) -- --static void srq_ucal_wqe_shift(__u32 max_srq_sge, __u8 *srqe_shift) --{ -- *srqe_shift = WQE_SIZE_32BYTE; -- -- if (max_srq_sge > MAX_SGE_SIZE_1) { -- if (max_srq_sge < MAX_SGE_SIZE_4) { -- *srqe_shift = WQE_SIZE_64BYTE; -- } else if (max_srq_sge < MAX_SGE_SIZE_8) { -- *srqe_shift = WQE_SIZE_128BYTE; -- } else { -- *srqe_shift = WQE_SIZE_256BYTE; -- } -- } -- return; --} -- --static int srq_uget_depth(struct sxe2_common_attrs *common_attrs, -- __u32 srq_size, __u8 shift, __u32 *srq_depth) --{ -- int ret = 0; -- -- *srq_depth = sxe2_round_up_pow_2((srq_size << shift) + SXE2_RQ_RSVD); -- -- if (*srq_depth > common_attrs->max_hw_srq_quanta) { -- ret = EINVAL; -- } -- -- return ret; --} -- --static void srq_uinit_verbs_common(struct sxe2_srq_verbs *srq, -- struct sxe2_srq_verbs_init_info *info) --{ -- __u8 srqshift; -- -- srq->common_attrs = info->common_attrs; -- -- srq_ucal_wqe_shift(info->max_srq_frag_cnt, &srqshift); -- -- srq->srq_base = info->srq; -- srq->db_note = info->db_note; -- srq->srq_wrid_array = info->srq_wrid_array; -- srq->srqe_array = info->srqe_array; -- -- srq->srq_id = info->srq_id; -- -- srq->srq_polarity = SXE2_SRQ_POLARITY_INIT; -- -- srq->srq_size = info->srq_size; -- -- srq->wqe_size = srqshift; -- -- srq->max_srq_frag_cnt = info->max_srq_frag_cnt; -- -- SXE2_RING_INIT(srq->srq_ring, srq->srq_size); -- -- srq->wqe_size_multiplier = SXE2_SRQ_SIZE_MUL_SHIFT << srqshift; -- -- srq->srq_buf_size = info->srq_buf_size; -- -- return; --} -- --int sxe2_uget_srq_num(struct ibv_srq *ibv_srq, uint32_t *srqn) --{ -- int ret = 0; -- struct sxe2_usrq *usrq; -- struct sxe2_rdma_ucontext *sctx; -- -- if (!ibv_srq || !srqn) { -- ret = EINVAL; -- SXE2_VERBS_LOG_ERROR("inv param, ibv_srq %p, srqn addr %p", -- ibv_srq, srqn); -- goto end; -- } -- -- usrq = container_of(ibv_srq, struct sxe2_usrq, vsrq.srq); -- sctx = to_sctx(ibv_srq->context); -- if (!usrq || !sctx) { -- ret = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF( -- "SRQ:destroy srq attr illegal:usrq:%p, sctx:%p, ret:%d\n", usrq, -- sctx, ret); -- goto end; -- } -- -- *srqn = usrq->srq_verbs.srq_id; -- --end: -- return ret; --} -- --static int sxe2_alloc_srq_buf(struct sxe2_rdma_ucontext *sctx, sxe2_buf_s *buf, size_t size) --{ -- int ret; -- sxe2_alloc_type_e type; -- sxe2_alloc_type_e default_type = SXE2_ALLOC_TYPE_ANON; -- long page_size; -- -- sxe2_alloc_type_get(SXE2_SRQ_PREFIX, &type, default_type); -- -- page_size = sysconf(_SC_PAGE_SIZE); -- if (page_size < 0) { -- SXE2_VERBS_LOG_ERROR_BDF("get system page size failed."); -- ret = EPERM; -- goto end; -- } -- ret = sxe2_prefered_buf_alloc( -- sctx, buf, size, page_size, type); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("cq buf alloc err ret (%d)", ret); -- goto end; -- } -- -- memset(buf->buf, 0, buf->length); -- --end: -- return ret; --} -- --static void sxe2_free_srq_buf(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf) --{ -- sxe2_actual_buf_free(ctx, buf); --} -- --struct ibv_srq *sxe2_ucreate_srq(struct ibv_pd *pd, -- struct ibv_srq_init_attr *attr) --{ -- struct sxe2_usrq *usrq; -- struct ibv_srq *ibvsrq = NULL; -- struct sxe2_rdma_ucontext *sctx; -- struct sxe2_ucreate_srq cmd; -- struct sxe2_ucreate_srq_resp resp; -- struct sxe2_srq_verbs_init_info info = { }; -- __u32 depth = 0; -- __u8 shift = 0; -- int ret = 0; -- -- if (!pd || !attr) { -- errno = EINVAL; -- SXE2_VERBS_LOG_ERROR( -- "SRQ:inv create srq input param, pd:%p, attr:%p, ret:%d", pd, attr, -- errno); -- goto end; -- } -- -- sctx = to_sctx(pd->context); -- if (attr->attr.max_wr > sctx->uk_attrs.max_hw_srq_wr || -- attr->attr.max_sge > sctx->uk_attrs.max_hw_wq_frags) { -- errno = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF( -- "SRQ:create srq attr illegal:attr max_wr:%#x, max_sge:%#x, " -- "srq_limit:%#x, ret:%d\n", -- attr->attr.max_wr, attr->attr.max_sge, attr->attr.srq_limit, errno); -- goto end; -- } -- SXE2_VERBS_LOG_INFO_BDF( -- "SRQ:create srq start, attr max_wr:%#x, max_sge:%#x, srq_limit:%#x", -- attr->attr.max_wr, attr->attr.max_sge, attr->attr.srq_limit); -- -- usrq = calloc(1, sizeof(*usrq)); -- if (!usrq) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF( -- "SRQ:user mem alloc failed, mem size:%zu, ret:%d\n", sizeof(*usrq), -- errno); -- goto end; -- } -- -- if (pthread_spin_init(&usrq->lock, PTHREAD_PROCESS_SHARED)) { -- errno = EBUSY; -- SXE2_VERBS_LOG_ERROR_BDF("SRQ:lock init failed, ret:%d\n", errno); -- goto free_usrq; -- } -- -- memset(&cmd, 0, sizeof(cmd)); -- memset(&resp, 0, sizeof(resp)); -- memset(&info, 0, sizeof(info)); -- ibvsrq = &usrq->vsrq.srq; -- -- info.common_attrs = &sctx->uk_attrs; -- info.max_srq_frag_cnt = attr->attr.max_sge; -- -- srq_ucal_wqe_shift(info.max_srq_frag_cnt, &shift); -- ret = srq_uget_depth(info.common_attrs, attr->attr.max_wr, shift, &depth); -- if (ret) { -- errno = ret; -- SXE2_VERBS_LOG_ERROR_BDF("SRQ:calculate srq depth failed, ret:%d\n", -- ret); -- goto free_mutex; -- } -- -- info.srq_buf_size = depth * SXE2_VERBS_SRQE_MIN_SIZE; -- -- info.srq_size = depth >> shift; -- -- info.srq_wrid_array = calloc(info.srq_size, sizeof(*info.srq_wrid_array)); -- if (!info.srq_wrid_array) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("SRQ:calloc srq_wrid_array failed, ret: %d\n", -- errno); -- goto free_mutex; -- } -- -- info.srqe_array = calloc(info.srq_size, sizeof(*info.srqe_array)); -- if (!info.srqe_array) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("SRQ:calloc srqe_array failed, ret: %d\n", -- errno); -- goto free_wrid_array; -- } -- -- if (sxe2_alloc_srq_buf(sctx, &usrq->buf, info.srq_buf_size)) { -- errno = ENOMEM; -- goto free_srqe_array; -- } -- -- info.srq = usrq->buf.buf; -- if (!info.srq) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF("SRQ:buf alloc failed, size:%#x, ret:%d\n", -- info.srq_buf_size, errno); -- goto free_srqe_array; -- } -- memset(info.srq, 0, info.srq_buf_size); -- -- info.db_note = (__u64 *)sxe2_alloc_hw_buf(SXE2_SRQ_DB_NOTE_SIZE); -- if (!info.db_note) { -- errno = ENOMEM; -- SXE2_VERBS_LOG_ERROR_BDF( -- "SRQ:DB Note buf alloc failed, size:%#x, ret %d\n", -- SXE2_SRQ_DB_NOTE_SIZE, errno); -- goto free_srq_buf; -- } -- memset(info.db_note, 0, SXE2_SRQ_DB_NOTE_SIZE); -- -- pthread_spin_lock(&usrq->lock); -- cmd.user_srq_buf = (__u64)((uintptr_t)info.srq); -- cmd.user_srq_db_note = (__u64)((uintptr_t)info.db_note); -- cmd.srq_cmpl_ctx = (__u64)((uintptr_t)(&usrq->srq_verbs)); -- cmd.srq_buf_size = info.srq_buf_size; -- cmd.srq_size = info.srq_size; -- cmd.max_wr_cal = ((depth - SXE2_RQ_RSVD) >> shift); -- ret = ibv_cmd_create_srq(pd, ibvsrq, attr, &cmd.ibv_cmd, sizeof(cmd), -- &resp.ibv_resp, sizeof(resp)); -- pthread_spin_unlock(&usrq->lock); -- if (ret) { -- errno = ret; -- SXE2_VERBS_LOG_ERROR_BDF("SRQ:ibv cmd create srq failed, ret:%d\n", -- ret); -- goto free_srq_db_note; -- } -- -- info.srq_id = resp.srq_id; -- -- srq_uinit_verbs_common(&usrq->srq_verbs, &info); -- -- SXE2_VERBS_LOG_INFO_BDF( -- "SRQ(%#x):Create over, srq_buf_base addr:%p, srq_db_note addr:%p" -- "srq_size:%#x, max_srq_frag_cnt:%#x, srq_buf_size:%#x\n", -- usrq->srq_verbs.srq_id, usrq->srq_verbs.srq_base, -- usrq->srq_verbs.db_note, usrq->srq_verbs.srq_size, -- usrq->srq_verbs.max_srq_frag_cnt, usrq->srq_verbs.srq_buf_size); -- -- usrq->max_wr = (depth - SXE2_RQ_RSVD) >> shift; -- usrq->max_sge = usrq->srq_verbs.max_srq_frag_cnt; -- attr->attr.max_wr = usrq->max_wr; -- attr->attr.max_sge = usrq->max_sge; -- -- goto end; -- --free_srq_db_note: -- sxe2_free_hw_buf(info.db_note, SXE2_SRQ_DB_NOTE_SIZE); -- info.db_note = NULL; --free_srq_buf: -- sxe2_free_srq_buf(sctx, &usrq->buf); -- info.srq = NULL; --free_srqe_array: -- free(info.srqe_array); -- info.srqe_array = NULL; --free_wrid_array: -- free(info.srq_wrid_array); -- info.srq_wrid_array = NULL; --free_mutex: -- pthread_spin_destroy(&usrq->lock); --free_usrq: -- free(usrq); -- usrq = NULL; -- ibvsrq = NULL; --end: -- return ibvsrq; --} -- --struct ibv_srq *sxe2_ucreate_srq_ex(struct ibv_context *context, -- struct ibv_srq_init_attr_ex *attr) --{ -- struct ibv_srq *srq = NULL; -- struct sxe2_rdma_ucontext *sctx; -- -- if (!context || !attr) { -- SXE2_VERBS_LOG_ERROR("SRQ:Create inv param, context(%p) attr(%p)", -- context, attr); -- goto end; -- } -- -- sctx = to_sctx(context); -- SXE2_VERBS_LOG_INFO_BDF("SRQ:Create ex in, mask(%#x) srq_type(%#x) " -- "max_sge(%#x) max_wr(%#x) srq_limit(%#x)", -- attr->comp_mask, (__u32)attr->srq_type, -- attr->attr.max_sge, attr->attr.max_wr, -- attr->attr.srq_limit); -- -- if ((attr->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) && -- (attr->srq_type == IBV_SRQT_BASIC)) { -- srq = sxe2_ucreate_srq(attr->pd, (struct ibv_srq_init_attr *)attr); -- } else { -- SXE2_VERBS_LOG_ERROR_BDF("SRQ:Create inv mask, comp_mask(%d) srq_type(%d)", -- attr->comp_mask, attr->srq_type); -- } -- --end: -- return srq; --} -- --int sxe2_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, -- int attr_mask) --{ -- int ret = 0; -- struct sxe2_rdma_ucontext *sctx; -- struct ibv_modify_srq cmd; -- -- if (!srq || !attr) { -- ret = EINVAL; -- SXE2_VERBS_LOG_ERROR( -- "SRQ:inv modify srq input param, srq:%p, attr:%p, ret:%d", srq, -- attr, ret); -- goto end; -- } -- sctx = to_sctx(srq->context); -- -- ret = ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof(cmd)); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("SRQ:ibv modify srq kernel failed, ret:%d", ret); -- goto end; -- } -- -- SXE2_VERBS_LOG_INFO_BDF("SRQ:Modify attr_mask:%#x, srq_limit:%#x\n", -- (__u32)attr_mask, attr->srq_limit); -- --end: -- return ret; --} -- --int sxe2_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr) --{ -- int ret = 0; -- struct sxe2_rdma_ucontext *sctx; -- struct ibv_query_srq cmd; -- -- if (!srq || !attr) { -- ret = EINVAL; -- SXE2_VERBS_LOG_ERROR( -- "SRQ:inv modify srq input param, srq:%p, attr:%p, ret:%d", srq, -- attr, ret); -- goto end; -- } -- sctx = to_sctx(srq->context); -- -- ret = ibv_cmd_query_srq(srq, attr, &cmd, sizeof(cmd)); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("SRQ:ibv query srq kernel failed, ret:%d", -- ret); -- goto end; -- } -- -- SXE2_VERBS_LOG_INFO_BDF("SRQ:Query max_wr:%#x, max_sge:%#x, limit:%#x\n", -- attr->max_wr, attr->max_sge, attr->srq_limit); -- --end: -- return ret; --} -- --int sxe2_udestroy_srq(struct ibv_srq *ibv_srq) --{ -- struct sxe2_usrq *usrq; -- struct sxe2_rdma_ucontext *sctx; -- int ret = 0; -- -- if (!ibv_srq) { -- ret = EINVAL; -- SXE2_VERBS_LOG_ERROR( -- "SRQ:inv destroy srq input param, ibv_srq:%p, ret:%d", ibv_srq, -- ret); -- goto end; -- } -- -- usrq = container_of(ibv_srq, struct sxe2_usrq, vsrq.srq); -- sctx = to_sctx(ibv_srq->context); -- if (!usrq || !sctx) { -- ret = EINVAL; -- SXE2_VERBS_LOG_ERROR_BDF( -- "SRQ:destroy srq attr illegal:usrq:%p, sctx:%p, ret:%d\n", usrq, -- sctx, ret); -- goto end; -- } -- -- ret = ibv_cmd_destroy_srq(ibv_srq); -- if (ret) { -- SXE2_VERBS_LOG_ERROR_BDF("SRQ(%#x):destroy srq kernel failed: " -- "srq_size:%#x, ret:%d\n", -- usrq->srq_verbs.srq_id, -- usrq->srq_verbs.srq_size, ret); -- goto end; -- } -- -- sxe2_free_srq_buf(sctx, &usrq->buf); -- sxe2_free_hw_buf(usrq->srq_verbs.db_note, SXE2_DB_NOTE_SIZE); -- usrq->srq_verbs.db_note = NULL; -- free(usrq->srq_verbs.srqe_array); -- usrq->srq_verbs.srqe_array = NULL; -- free(usrq->srq_verbs.srq_wrid_array); -- usrq->srq_verbs.srq_wrid_array = NULL; -- -- pthread_spin_destroy(&usrq->lock); -- -- free(usrq); -- usrq = NULL; -- --end: -- return ret; --} -diff -Naur rdma-core-48.0/providers/sxe2/sxe2-abi.h rdma-core-48.0.bak/providers/sxe2/sxe2-abi.h ---- rdma-core-48.0/providers/sxe2/sxe2-abi.h 2026-05-26 10:42:01.854075208 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/sxe2-abi.h 1970-01-01 08:00:00.000000000 +0800 -@@ -1,108 +0,0 @@ --#ifndef __SXE2_ABI_STRUCT_H__ --#define __SXE2_ABI_STRUCT_H__ -- --#include -- --#define SXE2_RDMA_ABI_VER (5) -- --struct sxe2_create_ah_resp { -- __u32 ah_id; -- __u8 rsvd[4]; --}; -- --struct sxe2_modify_qp_req { -- __u8 sq_flush; -- __u8 rq_flush; -- __u8 llwqe_enable; -- __u8 new_page_alloc; -- __u32 llwqe_page_index; --}; -- --struct sxe2_modify_qp_resp { -- __u8 rd_fence_rate; -- __u8 rsvd[3]; -- __u32 db_mmap_size; -- __u32 db_page_id; -- __u32 rsvd1; -- __u64 db_mmap_offset; --}; -- --struct sxe2_create_qp_req { -- __u32 sq_depth; -- __u32 rq_depth; -- __u8 sq_shift; -- __u8 rq_shift; -- __u8 rsvd[6]; -- __u64 user_wqe_bufs; -- __u64 doorbell_note; -- __u64 user_compl_ctx; --}; -- --struct sxe2_create_cq_req { -- __aligned_u64 user_cq_buf; -- __aligned_u64 user_cq_db_note; --}; -- --struct sxe2_create_cq_resp { -- __u32 cq_id; -- __u32 ncqe; --}; -- --struct sxe2_alloc_pd_resp { -- __u32 pd_id; -- __u8 rsvd[4]; --}; -- --struct sxe2_create_qp_resp { -- __u32 qpn; -- __u32 qp_caps; --}; -- --struct sxe2_alloc_ucontext_req { -- __u32 rsvd32; -- __u8 userspace_ver; -- __u8 rsvd8[3]; -- __aligned_u64 comp_mask; --}; -- --struct sxe2_alloc_ucontext_resp { -- __u32 max_pds; -- __u32 max_qps; -- __u32 wq_size; -- __u8 kernel_ver; -- __u8 rsvd[3]; -- __aligned_u64 feature_flags; -- __aligned_u64 db_mmap_key; -- __u32 max_hw_wq_frags; -- __u32 max_hw_read_sges; -- __u32 max_hw_inline; -- __u32 max_hw_rq_quanta; -- __u32 max_hw_wq_quanta; -- __u32 min_hw_cq_size; -- __u32 max_hw_cq_size; -- __u16 max_hw_sq_chunk; -- __u8 hw_rev; -- __u8 is_pf; -- __aligned_u64 comp_mask; -- __u16 min_hw_wq_size; -- __u32 max_db; -- __u8 rsvd3[2]; -- __u8 bdf[16]; -- __u32 max_hw_srq_quanta; -- __u32 max_hw_srq_wr; --}; -- --struct sxe2_create_srq_req { -- __aligned_u64 user_srq_buf; -- __aligned_u64 user_srq_db_note; -- __aligned_u64 srq_cmpl_ctx; -- __u32 srq_buf_size; -- __u32 srq_size; -- __u32 max_wr_cal; --}; -- --struct sxe2_create_srq_resp { -- __u32 srq_id; --}; -- --#endif -diff -Naur rdma-core-48.0/providers/sxe2/sxe2_abi.h rdma-core-48.0.bak/providers/sxe2/sxe2_abi.h ---- rdma-core-48.0/providers/sxe2/sxe2_abi.h 2026-05-26 10:42:01.854075208 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/sxe2_abi.h 1970-01-01 08:00:00.000000000 +0800 -@@ -1,29 +0,0 @@ --#ifndef __SXE2_ABI_H__ --#define __SXE2_ABI_H__ -- --#include --#include --#include --#include "sxe2_common.h" -- --#define SXE2_IB_UVERBS_ABI_VERSION 1 --DECLARE_DRV_CMD(sxe2_ucreate_ah, IB_USER_VERBS_CMD_CREATE_AH, -- empty, sxe2_create_ah_resp); --DECLARE_DRV_CMD(sxe2_umodify_qp, IB_USER_VERBS_EX_CMD_MODIFY_QP, -- sxe2_modify_qp_req, sxe2_modify_qp_resp); --DECLARE_DRV_CMD(sxe2_ucreate_cq, IB_USER_VERBS_CMD_CREATE_CQ, -- sxe2_create_cq_req, sxe2_create_cq_resp); --DECLARE_DRV_CMD(sxe2_ucreate_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ, -- sxe2_create_cq_req, sxe2_create_cq_resp); --DECLARE_DRV_CMD(sxe2_ualloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, -- empty, sxe2_alloc_pd_resp); --DECLARE_DRV_CMD(sxe2_ucreate_qp, IB_USER_VERBS_CMD_CREATE_QP, -- sxe2_create_qp_req, sxe2_create_qp_resp); --DECLARE_DRV_CMD(sxe2_get_context, IB_USER_VERBS_CMD_GET_CONTEXT, -- sxe2_alloc_ucontext_req, sxe2_alloc_ucontext_resp); --DECLARE_DRV_CMD(sxe2_ucreate_srq, IB_USER_VERBS_CMD_CREATE_SRQ, -- sxe2_create_srq_req, sxe2_create_srq_resp); --DECLARE_DRV_CMD(sxe2_ucreate_srq_ex, IB_USER_VERBS_CMD_CREATE_XSRQ, -- sxe2_create_srq_req, sxe2_create_srq_resp); -- --#endif -diff -Naur rdma-core-48.0/providers/sxe2/sxe2_common.c rdma-core-48.0.bak/providers/sxe2/sxe2_common.c ---- rdma-core-48.0/providers/sxe2/sxe2_common.c 2026-05-26 10:42:01.854075208 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/sxe2_common.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1,37 +0,0 @@ --#include "sxe2_common.h" -- --void *sxe2_alloc_hw_buf(size_t size) --{ -- void *buf; -- long page_size = sysconf(_SC_PAGE_SIZE); -- -- buf = memalign(page_size, size); -- if (!buf) { -- goto end; -- } -- -- if (ibv_dontfork_range(buf, size)) { -- free(buf); -- buf = NULL; -- goto end; -- } --end: -- return buf; --} -- --void sxe2_free_hw_buf(void *buf, size_t size) --{ -- ibv_dofork_range(buf, size); -- free(buf); --} -- --__u32 sxe2_round_up_pow_2(__u32 value) --{ -- int count = 1; -- -- for (value--; count <= 16; count *= 2) { -- value |= value >> count; -- } -- return ++value; --} -- -diff -Naur rdma-core-48.0/providers/sxe2/sxe2_common.h rdma-core-48.0.bak/providers/sxe2/sxe2_common.h ---- rdma-core-48.0/providers/sxe2/sxe2_common.h 2026-05-26 10:42:01.855075215 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/sxe2_common.h 1970-01-01 08:00:00.000000000 +0800 -@@ -1,717 +0,0 @@ --#ifndef __SXE2_COMMON_H__ --#define __SXE2_COMMON_H__ -- --#include --#include --#include --#include --#include --#include --#include --#include -- --#include "util/compiler.h" --#include --#include "util/udma_barrier.h" --#include "util/util.h" --#include "ccan/minmax.h" --#include --#include "sxe2_abi.h" -- --#define SXE2_RDMA_BDF_BUFF_LEN 16 --#define SXE2_WQE_SIZE 4 --#define SXE2_QP_WQE_MAX_QUANTA 8 --#define SXE2_QP_WQE_MIN_QUANTA 1 --#define SXE2_CQE_SIZE 8 --#define SXE2_U_MINCQ_SIZE (4) --#define SXE2_DB_NOTE_SIZE (64) --#define SXE2_DB_PAGE_SIZE (4096) --#define SXE2_FEATURE_CQ_RESIZE (2ULL) --#define SXE2_SRQE_BUSY 1 --#define SXE2_SRQE_FREE 0 -- --#define SQ_CQ 1 --#define RQ_CQ 2 -- --#define SXE2_WQE_QUANTA_ODD_NUMBER 0x1 -- --#define SXE2_RQ_WQE_HEAD_OFFSET 32 --#define SXE2_RQ_WQE_FRAG_OFFSET 16 -- --#define SXE2_SINGLE_THREADED "SXE2_SINGLE_THREADED" --#define SXE2_LL_MODE "SXE2_LL_MODE" --#define SXE2_TOTAL_LL_WQE "SXE2_TOTAL_LL_WQE" --#define SXE2_DEDICATED_LL_WQE "SXE2_DEDICATED_LL_WQE" -- --#define sxe2_handle void* --#define sxe2_qp_handle sxe2_handle -- --#define SXE2_CQ_PREFIX "SXE2_CQ" --#define SXE2_QP_PREFIX "SXE2_QP" --#define SXE2_SRQ_PREFIX "SXE2_SRQ" -- --#if defined(SXE2_CFG_DEBUG) && defined(SXE2_SUPPORT_INJECT) --#define SXE2_INJECT_LLWQE_ERR "SXE2_INJECT_LLWQE_ERR" --#endif -- --enum sxe2_supported_wc_flags { -- SXE2_CQ_SUPPORTED_WC_FLAGS = IBV_WC_EX_WITH_BYTE_LEN -- | IBV_WC_EX_WITH_IMM -- | IBV_WC_EX_WITH_QP_NUM -- | IBV_WC_EX_WITH_SRC_QP -- | IBV_WC_EX_WITH_SLID -- | IBV_WC_EX_WITH_SL -- | IBV_WC_EX_WITH_DLID_PATH_BITS -- | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK -- | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP, --}; -- --enum inline_data_size { -- MAX_INLINE_DATA_8 = 8, -- MAX_INLINE_DATA_39 = 39, -- MAX_INLINE_DATA_101 = 101, --}; -- --enum sge_size { -- MAX_SGE_SIZE_0 = 0, -- MAX_SGE_SIZE_1, -- MAX_SGE_SIZE_2, -- MAX_SGE_SIZE_3, -- MAX_SGE_SIZE_4, -- MAX_SGE_SIZE_5, -- MAX_SGE_SIZE_6, -- MAX_SGE_SIZE_7, -- MAX_SGE_SIZE_8, -- MAX_SGE_SIZE_9, -- MAX_SGE_SIZE_10, -- MAX_SGE_SIZE_11, -- MAX_SGE_SIZE_12, -- MAX_SGE_SIZE_13, -- MAX_SGE_SIZE_14, --}; -- --enum wqe_size_shift { -- WQE_SIZE_32BYTE = 0, -- WQE_SIZE_64BYTE, -- WQE_SIZE_128BYTE, -- WQE_SIZE_256BYTE, --}; -- --enum sxe2_verbs_db_page_type { -- SXE2_VERBS_DB_PAGE_TYPE_LLWQE = 0x0, -- SXE2_VERBS_DB_PAGE_TYPE_NO_LLWQE = 0x1, --}; -- --enum sxe2_verbs_db_llwqe_page_type { -- SXE2_VERBS_DB_PAGE_TYPE_DEDICATED = 0x0, -- SXE2_VERBS_DB_PAGE_TYPE_SHARED = 0x1, --}; -- --enum sxe2_disp_id { -- SXE2_RDMA_SEND = 0, -- SXE2_RDMA_SEND_INLINE = 1, -- SXE2_RDMA_WRITE = 2, -- SXE2_RDMA_WRITE_INLINE = 3, -- SXE2_RDMA_READ = 4, -- SXE2_RDMA_BIND_MW = 5, -- SXE2_RDMA_LOCAL_INV = 6, -- SXE2_RDMA_FAST_REG_MR = 7, -- SXE2_RDMA_MAX_ID --}; -- --typedef enum sxe2_alloc_type { -- SXE2_ALLOC_TYPE_ANON, -- SXE2_ALLOC_TYPE_HUGE, -- SXE2_ALLOC_TYPE_PREFER_HUGE, --} sxe2_alloc_type_e; -- --typedef struct sxe2_hugetlb_mem { -- int shmid; -- __u32 reserved; -- void *shmaddr; -- unsigned long *bitmap; -- unsigned long bmp_size; -- struct list_node entry; --} sxe2_hugetlb_mem_s; -- --typedef struct sxe2_buf { -- void *buf; -- size_t length; -- __u64 base; -- __u32 reserved; -- sxe2_hugetlb_mem_s *hmem; -- sxe2_alloc_type_e type; -- __u64 resource_type; -- size_t req_alignment; --} sxe2_buf_s; -- --#define SXE2_RING_INIT(_ring, _size) \ -- { \ -- (_ring).head = 0; \ -- (_ring).tail = 0; \ -- (_ring).size = (_size); \ -- } -- --struct sxe2_ring { -- __u32 head; -- __u32 tail; -- __u32 size; --}; -- --struct sxe2_sq_common_wr_trk_info { -- __u64 wrid; -- __u32 wr_len; -- __u16 quanta; -- __u8 reserved[2]; --}; -- --struct sxe2_qp_quanta { -- __le64 elem[SXE2_WQE_SIZE]; --}; -- --struct sxe2_common_attrs { -- __u64 feature_flags; -- __u32 max_hw_wq_frags; -- __u32 max_hw_read_sges; -- __u32 max_hw_inline; -- __u32 max_hw_rq_quanta; -- __u32 max_hw_wq_quanta; -- __u32 min_hw_cq_size; -- __u32 max_hw_cq_size; -- __u16 max_hw_push_len; -- __u16 max_hw_sq_chunk; -- __u16 min_hw_wq_size; -- __u8 hw_rev; -- __u8 rsv; -- __u32 max_hw_srq_quanta; -- __u32 max_hw_srq_wr; --}; -- --typedef struct sxe2_qp_common_init_info { -- struct sxe2_qp_quanta *sq; -- struct sxe2_qp_quanta *rq; -- struct sxe2_srq_verbs *srq; -- struct sxe2_common_attrs *common_attrs; -- __u32 *qp_db_no_llwqe; -- __u32 *doorbell_note; -- struct sxe2_sq_common_wr_trk_info *sq_wrtrk_array; -- __u64 *rq_wrid_array; -- __u32 qpn; -- __u32 qp_caps; -- __u32 sq_size; -- __u32 rq_size; -- __u32 max_sq_sge_cnt; -- __u32 max_rq_sge_cnt; -- __u32 max_inline_data; -- __u32 sq_depth; -- __u32 rq_depth; -- __u8 qp_type; -- __u8 sq_shift; -- __u8 rq_shift; -- bool legacy_mode; --} sxe2_qp_common_init_info_s; -- --struct sxe2_io_info { -- __u32 total_sqe_cnt; -- __u32 total_rqe_cnt; -- __u32 finished_sqe_cnt; -- __u32 finished_rqe_cnt; -- __u32 finished_rqe_insrq_cnt; -- __u32 flushed_sq_cnt; -- __u32 flushed_rq_cnt; -- __u32 cleaned_sq_cnt; -- __u32 cleaned_rq_cnt; -- __u32 cleaned_flushsq_cnt; -- __u32 cleaned_flushrq_cnt; -- __u32 total_signal_cnt; -- __u32 send_cnt; -- __u32 send_inv_cnt; -- __u32 read_cnt; -- __u32 write_cnt; -- __u32 local_inv_cnt; -- __u32 bind_mw_cnt; -- __u32 fast_regmr_cnt; -- __u64 last_send_sqwrid; -- __u64 last_rcvd_sqwrid; -- __u64 last_send_rqwrid; -- __u64 last_rcvd_rqwrid; --}; -- --typedef struct sxe2_qp_common { -- struct sxe2_qp_quanta *sq_base; -- struct sxe2_qp_quanta *rq_base; -- struct sxe2_srq_verbs *srq; -- struct sxe2_common_attrs *common_attrs; -- __u32 *qp_db_no_llwqe; -- struct sxe2_sq_common_wr_trk_info *sq_wrtrk_array; -- __u64 *rq_wrid_array; -- __u32 *doorbell_note; -- struct sxe2_verbs_llwqe *verbs_llwqe; -- __le32 *push_db; -- __u64 *push_wqe; -- struct sxe2_ring sq_ring; -- struct sxe2_ring rq_ring; -- struct sxe2_ring initial_ring; -- __u32 qpn; -- __u32 qp_caps; -- __u32 sq_size; -- __u32 rq_size; -- __u32 max_sq_sge_cnt; -- __u32 max_rq_sge_cnt; -- __u32 max_inline_data; -- __u8 swqe_polarity; -- __u8 rwqe_polarity; -- __u8 rq_wqe_size; -- __u8 rq_wqe_size_multiplier; -- __u8 rd_fence_rate; -- __u16 ord_cnt; -- bool llwqe_enable : 1; -- bool llwqe_mode : 1; -- bool push_dropped : 1; -- bool destroy_pending : 1; -- void *back_qp; -- pthread_spinlock_t *lock; -- struct sxe2_io_info statistics; --} sxe2_qp_common_s; -- --typedef struct sxe2_uqp { -- struct verbs_qp verbs_qp; -- struct sxe2_ucq *send_cq; -- struct sxe2_ucq *recv_cq; -- size_t buf_size; -- pthread_spinlock_t lock; -- __u32 sq_sig_all; -- sxe2_qp_common_s qp; -- enum ibv_qp_type qp_type; -- int err; -- __u32 wqe_idx; -- __le64 *cur_wqe; -- __u16 quanta; -- __le64 *wqe_hdr; -- unsigned int rb_sq_head; -- enum sxe2_disp_id funid; -- struct sxe2_qp_quanta wqebuf[SXE2_QP_WQE_MIN_QUANTA]; -- sxe2_buf_s buf; --} sxe2_uqp_s; -- --struct sxe2_cqe { -- __le64 buf[SXE2_CQE_SIZE]; --}; -- --struct sxe2_cq_db_note { -- __le64 buf[SXE2_CQE_SIZE]; --}; -- --struct sxe2_cqe_info { -- __u64 wr_id; -- __u32 bytes; -- union { -- struct { -- __u64 payload_len : 32; -- __u64 packet_seq : 24; -- __u64 rsvd1 : 8; -- __u64 qpc; -- __u64 l_r_key : 32; -- __u64 qp_id : 18; -- __u64 rsvd2 : 14; -- __u64 minor_err : 16; -- __u64 major_err : 16; -- __u64 wq_desc_idx : 15; -- __u64 rsvd3 : 3; -- __u64 extended_cqe : 1; -- __u64 push_dropped : 1; -- __u64 ipv4 : 1; -- __u64 stag_or_lrkey : 1; -- __u64 solicited_evt : 1; -- __u64 error : 1; -- __u64 op : 6; -- __u64 qp_type : 1; -- __u64 rsvd4 : 1; -- __u64 imme_data : 32; -- __u64 srqn : 18; -- __u64 is_srq : 1; -- __u64 rsvd5 : 13; -- __u64 cqe_timestamp; -- __u64 ud_smac : 48; -- __u64 ud_vlan_tag : 16; -- __u64 ud_src_qpn : 24; -- __u64 rsvd6 : 8; -- __u64 rsvd7 : 6; -- __u64 vsi_index : 10; -- __u64 rsvd8 : 12; -- __u64 vlan_tag_flag : 1; -- __u64 ud_smac_valid : 1; -- __u64 imm_data_flag : 1; -- __u64 cqe_valid : 1; -- } field; -- __u64 buf[SXE2_CQE_SIZE]; -- }info; --}; -- --struct sxe2_cq_uk_init_info { -- __le64 *cqe_alloc_db; -- struct sxe2_cqe *cq_base; -- __le32 *doorbell_note; -- __u32 cq_id; -- __u32 ncqe; --}; -- --struct sxe2_cq_uk { -- struct sxe2_cqe *cq_base; -- __le64 *cqe_alloc_db; -- __le32 *doorbell_note; -- __u32 arm_sn; -- __u32 cq_id; -- __u32 ncqe; -- struct sxe2_ring cq_ring; -- __u8 polarity; --}; -- --struct sxe2_ucq { -- struct verbs_cq verbs_cq; -- pthread_spinlock_t lock; -- sxe2_buf_s buf; -- size_t buf_size; -- uint32_t comp_vector; -- uint32_t report_rtt; -- struct sxe2_uqp *uqp; -- struct sxe2_cq_uk cq; -- struct sxe2_cqe_info cur_cqe; --}; -- --struct sxe2_upd { -- struct ibv_pd ibv_pd; -- void *arm_cq_page; -- void *arm_cq; -- uint32_t pd_id; --}; -- --struct sxe2_srq_verbs { -- struct sxe2_qp_quanta *srq_base; -- struct sxe2_common_attrs *common_attrs; -- __u64 *db_note; -- struct sxe2_ring srq_ring; -- __u32 srq_id; -- __u32 srq_size; -- __u32 max_srq_frag_cnt; -- __u8 srq_polarity; -- __u8 wqe_size; -- __u8 wqe_size_multiplier; -- __u32 srq_buf_size; -- __u64 *srq_wrid_array; -- __u8 *srqe_array; --}; -- --struct sxe2_srq_verbs_init_info { -- struct sxe2_qp_quanta *srq; -- struct sxe2_common_attrs *common_attrs; -- __le64 *db_note; -- __u64 *srq_wrid_array; -- __u8 *srqe_array; -- __u32 srq_id; -- __u32 srq_size; -- __u32 max_srq_frag_cnt; -- __u32 srq_buf_size; --}; -- --struct sxe2_usrq { -- struct verbs_srq vsrq; -- struct sxe2_srq_verbs srq_verbs; -- pthread_spinlock_t lock; -- __u32 max_wr; -- __u32 max_sge; -- sxe2_buf_s buf; --}; -- --struct sxe2_rdma_hw_attrs { -- struct sxe2_common_attrs uk_attrs; -- __u64 max_hw_outbound_msg_size; -- __u64 max_hw_inbound_msg_size; -- __u64 max_mr_size; -- __u32 min_hw_qp_id; -- __u32 min_hw_aeq_size; -- __u32 max_hw_aeq_size; -- __u32 min_hw_ceq_size; -- __u32 max_hw_ceq_size; -- __u32 max_hw_device_pages; -- __u32 max_hw_vf_fpm_id; -- __u32 first_hw_vf_fpm_id; -- __u32 max_rra; -- __u32 max_sra; -- __u32 max_hw_wqes; -- __u32 max_hw_pds; -- __u32 max_hw_ena_vf_count; -- __u32 max_qp_wr; -- __u32 max_pe_ready_count; -- __u32 max_done_count; -- __u32 max_sleep_count; -- __u32 max_mq_compl_wait_time_ms; --}; -- --typedef struct sxe2_spinlock { -- pthread_spinlock_t lock; -- int in_use; -- int need_lock; --} sxe2_spinlock_s; -- --struct sxe2_rdma_ucontext { -- struct verbs_context ibv_ctx; -- int abi_ver; -- struct sxe2_upd *sxe2_upd; -- struct sxe2_common_attrs uk_attrs; -- FILE *dbg_fp; -- char bdf[SXE2_RDMA_BDF_BUFF_LEN]; -- int ll_mode; -- __u32 tot_ll_wqes; -- __u32 ded_ll_wqes; -- __u32 shared_ll_wqes; -- struct list_head shared_llwqe_list; -- struct list_head dedicated_llwqe_list; -- struct list_head mmap_page_addr_list; -- __u16 alloc_dedicated_llwqes; -- __u16 alloc_shared_llwqes; -- pthread_mutex_t alloc_llwqe_mutex; -- void *qp_db_no_llwqe; -- void *cq_db_arm; -- void *cq_db_info; -- __u32 enable_io_log; -- __u32 log_level; -- sxe2_spinlock_s hugetlb_lock; -- struct list_head hugetlb_list; --}; -- --struct sxe2_umr { -- struct verbs_mr vmr; -- int acc_flags; --}; -- --struct sxe2_uah { -- struct ibv_ah ibv_ah; -- __u32 ah_id; --}; -- --struct sxe2_verbs_llwqe { -- void *wqe_addr; -- void *db_addr; -- pthread_spinlock_t lock; -- __u32 wqe_buf_size; -- __u32 num_llwqe; -- int need_lock; -- void *db_page_addr; -- off_t db_mmap_offset; -- __u8 mmaped_entry : 1; -- __u8 no_ll_mode : 1; -- __u8 qp_dedicated : 1; -- __u8 qp_shared : 1; -- __u32 count; -- struct list_node list_entry; -- __u32 db_handle; -- __u32 db_mmap_size; -- __u32 db_page_id; --}; -- --struct sxe2_db_mmap_db_page_addr { -- void* db_page_addr; -- __u32 mmap_size; -- __u32 db_page_id; -- struct list_node list_entry; --}; --struct ibv_cq *sxe2_ucreate_cq(struct ibv_context *context, int cqe, -- struct ibv_comp_channel *channel, int comp_vector); --struct ibv_cq_ex *sxe2_ucreate_cq_ex(struct ibv_context *context, -- struct ibv_cq_init_attr_ex *cq_attr); --int sxe2_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr); --int sxe2_udestroy_cq(struct ibv_cq *cq); --struct ibv_pd *sxe2_ualloc_pd(struct ibv_context *context); --int sxe2_ufree_pd(struct ibv_pd *pd); --struct ibv_mr *sxe2_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, -- uint64_t hca_va, int acc); --int sxe2_udereg_mr(struct verbs_mr *vmr); --int sxe2_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, -- void *addr, size_t length, int access); --struct ibv_mr *sxe2_ureg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, -- size_t length, uint64_t iova, int fd, -- int acc); --struct ibv_mw *sxe2_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type); --struct ibv_ah *sxe2_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr); --int sxe2_udestroy_ah(struct ibv_ah *ibah); --int sxe2_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, -- struct ibv_send_wr **bad_wr); --int sxe2_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, -- struct ibv_recv_wr **bad_wr); --int sxe2_upoll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc); --int sxe2_uarm_cq(struct ibv_cq *cq, int solicited); --void sxe2_ucq_event(struct ibv_cq *cq); --int sxe2_upost_srq_recv(struct ibv_srq *ibv_srq, -- struct ibv_recv_wr *ibv_wr, struct ibv_recv_wr **bad_wr); --void sxe2_wr_start(struct ibv_qp_ex *qp_ex); --int sxe2_wr_complete(struct ibv_qp_ex *qp_ex); --void sxe2_wr_abort(struct ibv_qp_ex *qp_ex); --void sxe2_wr_send_rc_ud(struct ibv_qp_ex *qp_ex); --void sxe2_wr_send_imm_rc_ud(struct ibv_qp_ex *qp_ex, __be32 imm_data); --void sxe2_wr_send_inv_rc(struct ibv_qp_ex *qp_ex, uint32_t invalidate_rkey); --void sxe2_wr_rdma_read_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, -- uint64_t remote_addr); --void sxe2_wr_rdma_write_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, -- uint64_t remote_addr); --void sxe2_wr_rdma_write_imm_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, -- uint64_t remote_addr, __be32 imm_data); --void sxe2_wr_set_ud_addr(struct ibv_qp_ex *qp_ex, struct ibv_ah *ah, -- uint32_t remote_qpn, uint32_t remote_qkey); --void sxe2_wr_set_inline_data_rc_ud(struct ibv_qp_ex *qp_ex, -- void *addr, size_t length); --void sxe2_wr_set_inline_data_list_rc_ud(struct ibv_qp_ex *qp_ex, -- size_t num_buf, const struct ibv_data_buf *buf_list); --void sxe2_wr_set_sge_rc_ud(struct ibv_qp_ex *qp_ex, uint32_t lkey, -- uint64_t addr, uint32_t length); --void sxe2_wr_set_sge_list_rc_ud(struct ibv_qp_ex *qp_ex, -- size_t num_sge, const struct ibv_sge *sg_list); --void sxe2_wr_bind_mw_rc(struct ibv_qp_ex *qp_ex, struct ibv_mw *mw, -- uint32_t rkey, const struct ibv_mw_bind_info *bind_info); --void sxe2_wr_local_inv_rc(struct ibv_qp_ex *qp_ex, uint32_t invalidate_rkey); -- --static inline struct sxe2_rdma_ucontext *to_sctx(struct ibv_context *ibctx) --{ -- return container_of(ibctx, struct sxe2_rdma_ucontext, ibv_ctx.context); --} -- --int sxe2_uget_single_threaded_env(void); -- --int sxe2_uget_ll_mode(void); -- --int sxe2_uget_tot_llwqe(struct sxe2_rdma_ucontext *sctx, -- struct sxe2_get_context_resp *resp); -- --int sxe2_uget_ded_llwqe(struct sxe2_rdma_ucontext *sctx, int total_llwqe); -- --int sxe2_uinit_doorbell(struct sxe2_rdma_ucontext *sctx, -- struct sxe2_get_context_resp *resp); -- --void sxe2_ufree_doorbell(struct ibv_context *context); -- -- --struct sxe2_verbs_llwqe *alloc_db_page_and_get_qp_llwqe(struct ibv_context *context, -- __u32 db_mmap_size, -- __u32 db_page_id, -- __u64 db_mmap_offset, -- __u8 alloc_page_type); --struct sxe2_verbs_llwqe *db_uget_qp_llwqe(struct ibv_context *context, -- bool *need_alloc_page, -- __u8 *alloc_page_type); -- --struct sxe2_verbs_llwqe * --db_ualloc_page_and_llwqes(struct ibv_context *context, bool dedicated, -- __u32 db_mmap_size, -- __u32 db_page_id, -- __u64 db_mmap_offset); -- --void db_uput_qp_llwqe(struct sxe2_rdma_ucontext *sctx, -- struct sxe2_verbs_llwqe *llwqe); -- --void *sxe2_alloc_hw_buf(size_t size); -- --void sxe2_free_hw_buf(void *buf, size_t size); -- --__u32 sxe2_round_up_pow_2(__u32 value); -- --struct ibv_qp *sxe2_ucreate_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); -- --struct ibv_qp *sxe2_ucreate_qp_ex(struct ibv_context *context, -- struct ibv_qp_init_attr_ex *attr_ex); -- --int sxe2_udestroy_qp(struct ibv_qp *ibqp); -- --int sxe2_umodify_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask); -- --int sxe2_uquery_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask, -- struct ibv_qp_init_attr *init_attr); --int sxe2_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr); -- --int sxe2_next_poll(struct ibv_cq_ex *ibvcq_ex); --void sxe2_end_poll(struct ibv_cq_ex *ibvcq_ex); --uint64_t sxe2_wc_read_completion_ts(struct ibv_cq_ex *ibvcq_ex); --uint64_t sxe2_wc_read_completion_wallclock_ns(struct ibv_cq_ex *ibvcq_ex); --enum ibv_wc_opcode sxe2_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex); --uint32_t sxe2_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex); --unsigned int sxe2_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex); --uint32_t sxe2_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex); --__be32 sxe2_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex); --uint32_t sxe2_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex); --uint32_t sxe2_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex); --uint32_t sxe2_wc_read_slid(struct ibv_cq_ex *ibvcq_ex); --uint8_t sxe2_wc_read_sl(struct ibv_cq_ex *ibvcq_ex); --uint8_t sxe2_wc_read_dlid_path_bits(struct ibv_cq_ex *ibvcq_ex); -- --int sxe2_uget_srq_num(struct ibv_srq *ibv_srq, uint32_t *srqn); -- --struct ibv_srq *sxe2_ucreate_srq(struct ibv_pd *pd, -- struct ibv_srq_init_attr *attr); -- --struct ibv_srq *sxe2_ucreate_srq_ex(struct ibv_context *context, -- struct ibv_srq_init_attr_ex *attr); -- --int sxe2_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, -- int attr_mask); -- --int sxe2_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr); -- --int sxe2_udestroy_srq(struct ibv_srq *ibv_srq); -- --static inline struct sxe2_usrq *to_usrq(struct ibv_srq *ibv_srq) --{ -- return container_of(ibv_srq, struct sxe2_usrq, vsrq.srq); --} -- --int sxe2_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, __u16 lid); -- --int sxe2_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, __u16 lid); -- --static inline int sxe2_spin_lock(sxe2_spinlock_s *lock) --{ -- int ret = 0; -- if (lock->need_lock) { -- ret = pthread_spin_lock(&lock->lock); -- goto end; -- } -- -- if (unlikely(lock->in_use)) { -- fprintf(stderr, "*** ERROR: multithreading violation ***\n" -- "You are running a multithreaded application but\n" -- "you set SXE2_SINGLE_THREADED=1. Please unset it.\n"); -- abort(); -- } else { -- lock->in_use = 1; -- atomic_thread_fence(memory_order_acq_rel); -- } --end: -- return ret; --} -- --static inline int sxe2_spin_unlock(sxe2_spinlock_s *lock) --{ -- int ret = 0; -- -- if (lock->need_lock) { -- ret = pthread_spin_unlock(&lock->lock); -- goto end; -- } -- -- lock->in_use = 0; --end: -- return ret; --} -- --static inline int sxe2_spinlock_init(sxe2_spinlock_s *lock, int need_lock) --{ -- lock->in_use = 0; -- lock->need_lock = need_lock; -- -- return pthread_spin_init(&lock->lock, PTHREAD_PROCESS_SHARED); --} -- --static inline int sxe2_spinlock_destroy(sxe2_spinlock_s *lock) --{ -- return pthread_spin_destroy(&lock->lock); --} -- --#endif -diff -Naur rdma-core-48.0/providers/sxe2/verbs.c rdma-core-48.0.bak/providers/sxe2/verbs.c ---- rdma-core-48.0/providers/sxe2/verbs.c 2026-05-26 10:42:01.854075208 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/verbs.c 1970-01-01 08:00:00.000000000 +0800 -@@ -1 +0,0 @@ -- -diff -Naur rdma-core-48.0/providers/sxe2/verbs.h rdma-core-48.0.bak/providers/sxe2/verbs.h ---- rdma-core-48.0/providers/sxe2/verbs.h 2026-05-26 10:42:01.853075201 +0800 -+++ rdma-core-48.0.bak/providers/sxe2/verbs.h 1970-01-01 08:00:00.000000000 +0800 -@@ -1,6 +0,0 @@ --#ifndef __SXE2_USER_VERBS_H__ --#define __SXE2_USER_VERBS_H__ -- --#include "sxe2_common.h" -- --#endif +diff -Naur rdma-core-48.0.bak/providers/sxe2/ah.c rdma-core-48.0/providers/sxe2/ah.c +--- rdma-core-48.0.bak/providers/sxe2/ah.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/ah.c 2026-05-27 17:08:05.581104746 +0800 +@@ -0,0 +1,62 @@ ++ ++#include ++#include ++ ++#include "sxe2_common.h" ++#include "sxe2_abi.h" ++#include "log.h" ++ ++struct ibv_ah *sxe2_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) ++{ ++ struct sxe2_uah *ah; ++ union ibv_gid sgid; ++ struct sxe2_ucreate_ah_resp resp; ++ int err; ++ struct sxe2_rdma_ucontext *sctx = to_sctx(ibpd->context); ++ ++ memset(&resp, 0, sizeof(resp)); ++ err = ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, ++ &sgid); ++ if (err) { ++ SXE2_VERBS_LOG_ERROR_BDF("ibv query gid failed, ret %d.\n", err); ++ errno = err; ++ return NULL; ++ } ++ ++ ah = calloc(1, sizeof(*ah)); ++ if (!ah) { ++ SXE2_VERBS_LOG_ERROR_BDF("calloc ah buf failed.\n"); ++ return NULL; ++ } ++ ++ err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp, ++ sizeof(resp)); ++ if (err) { ++ free(ah); ++ SXE2_VERBS_LOG_ERROR_BDF("ibv create ah failed, ret %d.\n", err); ++ errno = err; ++ return NULL; ++ } ++ ++ ah->ah_id = resp.ah_id; ++ ++ return &ah->ibv_ah; ++} ++ ++int sxe2_udestroy_ah(struct ibv_ah *ibah) ++{ ++ struct sxe2_uah *ah; ++ int ret; ++ ++ ah = container_of(ibah, struct sxe2_uah, ibv_ah); ++ ++ ret = ibv_cmd_destroy_ah(ibah); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR("ibv destroy ah failed, ret %d.\n", ret); ++ return ret; ++ } ++ ++ free(ah); ++ ++ return 0; ++} +diff -Naur rdma-core-48.0.bak/providers/sxe2/buf.c rdma-core-48.0/providers/sxe2/buf.c +--- rdma-core-48.0.bak/providers/sxe2/buf.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/buf.c 2026-05-27 17:08:05.580104739 +0800 +@@ -0,0 +1,313 @@ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "util/bitmap.h" ++ ++#include "buf.h" ++#include "sxe2_common.h" ++#include "sxe2_abi.h" ++#include "log.h" ++ ++ void sxe2_huge_mem_free(struct sxe2_rdma_ucontext *ctx, sxe2_hugetlb_mem_s *hmem) ++{ ++ struct sxe2_rdma_ucontext *sctx = ctx; ++ ++ if (hmem->bitmap) { ++ free(hmem->bitmap); ++ } ++ ++ if (shmdt(hmem->shmaddr) == -1) { ++ SXE2_VERBS_LOG_ERROR_BDF("shmdt fail, err(%s)", strerror(errno)); ++ } ++ shmctl(hmem->shmid, IPC_RMID, NULL); ++ free(hmem); ++ ++ return; ++} ++ ++ sxe2_hugetlb_mem_s *sxe2_huge_mem_alloc(struct sxe2_rdma_ucontext *ctx, size_t size) ++{ ++ sxe2_hugetlb_mem_s *hmem; ++ size_t shm_len; ++ struct sxe2_rdma_ucontext *sctx = ctx; ++ ++ hmem = malloc(sizeof(*hmem)); ++ if (hmem == NULL) { ++ SXE2_VERBS_LOG_ERROR_BDF("hmem malloc error"); ++ goto end; ++ } ++ memset(hmem, 0, sizeof(*hmem)); ++ ++ shm_len = align(size, SXE2_SHM_LENGTH); ++ hmem->shmid = shmget(IPC_PRIVATE, shm_len, SHM_HUGETLB | SHM_R | SHM_W); ++ if (hmem->shmid == -1) { ++ SXE2_VERBS_LOG_ERROR_BDF("shmget fail, err(%s)", strerror(errno)); ++ goto out_free; ++ } ++ ++ hmem->shmaddr = shmat(hmem->shmid, SXE2_SHM_ADDR, SXE2_SHMAT_FLAGS); ++ if (hmem->shmaddr == (void *)-1) { ++ SXE2_VERBS_LOG_ERROR_BDF("shmat fail, err(%s)", strerror(errno)); ++ goto out_rmid; ++ } ++ ++ hmem->bitmap = bitmap_alloc0(shm_len / SXE2_SHM_SINGLE_CHUNK_SIZE); ++ if (!hmem->bitmap) { ++ SXE2_VERBS_LOG_ERROR_BDF("bitmap_alloc0 fail, err(%s)", strerror(errno)); ++ goto out_shmdt; ++ } ++ ++ hmem->bmp_size = shm_len / SXE2_SHM_SINGLE_CHUNK_SIZE; ++ ++ shmctl(hmem->shmid, IPC_RMID, NULL); ++ ++ goto end; ++ ++out_shmdt: ++ if (shmdt(hmem->shmaddr) == -1) { ++ SXE2_VERBS_LOG_ERROR_BDF("shmdt fail, err(%s)", strerror(errno)); ++ } ++out_rmid: ++ shmctl(hmem->shmid, IPC_RMID, NULL); ++out_free: ++ free(hmem); ++ hmem = NULL; ++end: ++ return hmem; ++} ++ ++ void sxe2_huge_buf_free(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, bool dofork_range) ++{ ++ size_t nchunk; ++ ++ nchunk = buf->length / SXE2_SHM_SINGLE_CHUNK_SIZE; ++ ++ if (buf->hmem == NULL) { ++ goto end; ++ } ++ ++ if (dofork_range == true) { ++ ibv_dofork_range(buf->buf, buf->length); ++ } ++ ++ sxe2_spin_lock(&ctx->hugetlb_lock); ++ bitmap_zero_region(buf->hmem->bitmap, buf->base, buf->base + nchunk); ++ if (bitmap_empty(buf->hmem->bitmap, buf->hmem->bmp_size)) { ++ list_del(&buf->hmem->entry); ++ sxe2_spin_unlock(&ctx->hugetlb_lock); ++ sxe2_huge_mem_free(ctx, buf->hmem); ++ buf->hmem = NULL; ++ goto free; ++ } ++ ++ sxe2_spin_unlock(&ctx->hugetlb_lock); ++free: ++ buf->buf = NULL; ++end: ++ return; ++} ++ ++ int sxe2_huge_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size) ++{ ++ int found = 0; ++ size_t nchunk; ++ sxe2_hugetlb_mem_s *hmem = NULL; ++ int ret; ++ struct sxe2_rdma_ucontext *sctx = ctx; ++ ++ buf->length = align(size, SXE2_SHM_SINGLE_CHUNK_SIZE); ++ nchunk = buf->length / SXE2_SHM_SINGLE_CHUNK_SIZE; ++ ++ if (!nchunk) { ++ ret = 0; ++ SXE2_VERBS_LOG_WARN_BDF("not need huge"); ++ goto end; ++ } ++ ++ sxe2_spin_lock(&ctx->hugetlb_lock); ++ list_for_each(&ctx->hugetlb_list, hmem, entry) { ++ if (!bitmap_full(hmem->bitmap, hmem->bmp_size)) { ++ buf->base = bitmap_find_free_region(hmem->bitmap, hmem->bmp_size, nchunk); ++ if (buf->base != hmem->bmp_size) { ++ bitmap_fill_region(hmem->bitmap, buf->base, buf->base + nchunk); ++ buf->hmem = hmem; ++ found = true; ++ break; ++ } ++ } ++ } ++ sxe2_spin_unlock(&ctx->hugetlb_lock); ++ ++ if (!found) { ++ hmem = sxe2_huge_mem_alloc(ctx, buf->length); ++ if (NULL == hmem) { ++ SXE2_VERBS_LOG_ERROR_BDF("sxe2_huge_mem_alloc error"); ++ ret = ENOMEM; ++ goto end; ++ } ++ ++ buf->base = 0; ++ assert(nchunk <= hmem->bmp_size); ++ bitmap_fill_region(hmem->bitmap, 0, nchunk); ++ ++ buf->hmem = hmem; ++ ++ sxe2_spin_lock(&ctx->hugetlb_lock); ++ if (nchunk != hmem->bmp_size) { ++ list_add(&ctx->hugetlb_list, &hmem->entry); ++ } else { ++ list_add_tail(&ctx->hugetlb_list, &hmem->entry); ++ } ++ sxe2_spin_unlock(&ctx->hugetlb_lock); ++ } ++ ++ buf->buf = hmem->shmaddr + buf->base * SXE2_SHM_SINGLE_CHUNK_SIZE; ++ ++ ret = ibv_dontfork_range(buf->buf, buf->length); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("dontfork_range error,buf:%p,length:%zu,ret:%d", buf->buf, buf->length, ret); ++ ret = EPERM; ++ goto out_fork; ++ } ++ buf->type = SXE2_ALLOC_TYPE_HUGE; ++ ++ goto end; ++ ++out_fork: ++ sxe2_huge_buf_free(ctx, buf, false); ++end: ++ return ret; ++} ++ ++int sxe2_prefered_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size, size_t page_size, ++ sxe2_alloc_type_e type) ++{ ++ int ret; ++ struct sxe2_rdma_ucontext *sctx = ctx; ++ ++ if (type == SXE2_ALLOC_TYPE_HUGE || ++ type == SXE2_ALLOC_TYPE_PREFER_HUGE ) { ++ ret = sxe2_huge_buf_alloc(ctx, buf, size); ++ if (!ret) { ++ goto end; ++ } ++ ++ if (type == SXE2_ALLOC_TYPE_HUGE) { ++ SXE2_VERBS_LOG_ERROR_BDF("Huge mode allocation fail, page_size:%zu, size:%zu", ++ page_size, size); ++ goto end; ++ } ++ ++ SXE2_VERBS_LOG_ERROR_BDF("Huge mode allocation fail, fallback to default mode, type:%d, \ ++ page_size:%zu, size:%zu", type, page_size, size); ++ } ++ ++ ret = sxe2_buf_alloc(ctx, buf, size, page_size); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("cmr_buf_alloc error, page_size:%zu, size:%zu,ret:%d", ++ page_size, size, ret); ++ } ++ ++end: ++ return ret; ++ ++} ++ ++void sxe2_actual_buf_free(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf) ++{ ++ struct sxe2_rdma_ucontext *sctx = ctx; ++ ++ if ((buf == NULL) || ++ (buf->buf == NULL) || ++ (ctx == NULL)) { ++ SXE2_VERBS_LOG_ERROR_BDF( "context or buf or buf->buf is NULL"); ++ errno = EINVAL; ++ goto end; ++ } ++ ++ switch (buf->type) { ++ case SXE2_ALLOC_TYPE_ANON: ++ sxe2_buf_free(buf); ++ break; ++ ++ case SXE2_ALLOC_TYPE_HUGE: ++ sxe2_huge_buf_free(ctx, buf, true); ++ break; ++ ++ default: ++ SXE2_VERBS_LOG_ERROR_BDF("Bad allocation type:%d", buf->type); ++ } ++end: ++ return; ++} ++ ++void sxe2_alloc_type_get(const char *component, sxe2_alloc_type_e *alloc_type, sxe2_alloc_type_e default_type) ++{ ++ char *env_value; ++ char name[SXE2_ALLOC_ENV_NAME]; ++ ++ snprintf(name, sizeof(name), "%s_ALLOC_TYPE", component); ++ ++ *alloc_type = default_type; ++ ++ env_value = getenv(name); ++ if (env_value) { ++ if (!strcasecmp(env_value, "ANON")) { ++ *alloc_type = SXE2_ALLOC_TYPE_ANON; ++ } else if (!strcasecmp(env_value, "HUGE")){ ++ *alloc_type = SXE2_ALLOC_TYPE_HUGE; ++ } else if (!strcasecmp(env_value, "PREFER_HUGE")){ ++ *alloc_type = SXE2_ALLOC_TYPE_PREFER_HUGE; ++ } ++ } ++} ++ ++int sxe2_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size, size_t page_size) ++{ ++ int ret; ++ size_t al_size; ++ struct sxe2_rdma_ucontext *sctx = ctx; ++ ++ if ((buf == NULL) || (ctx == NULL)) { ++ SXE2_VERBS_LOG_ERROR_BDF("context of buf is NULL"); ++ ret = EPERM; ++ goto end; ++ } ++ ++ al_size = align(size, page_size); ++ ret = posix_memalign(&buf->buf, page_size, al_size); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("posix_memalign error,page_size:%zu,al_size:%zu, ret:%d", page_size, al_size, ret); ++ goto end; ++ } ++ ++ ret = ibv_dontfork_range(buf->buf, al_size); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("dontfork_range error,buf:%p,al_size:%zu, ret:%d", buf->buf, al_size, ret); ++ free(buf->buf); ++ buf->buf = NULL; ++ ret = EPERM; ++ goto end; ++ } ++ ++ buf->length = al_size; ++ buf->type = SXE2_ALLOC_TYPE_ANON; ++ ++end: ++ return ret; ++} ++ ++void sxe2_buf_free(sxe2_buf_s *buf) ++{ ++ ibv_dofork_range(buf->buf, buf->length); ++ free(buf->buf); ++ buf->buf = NULL; ++ ++ return; ++} ++ +diff -Naur rdma-core-48.0.bak/providers/sxe2/buf.h rdma-core-48.0/providers/sxe2/buf.h +--- rdma-core-48.0.bak/providers/sxe2/buf.h 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/buf.h 2026-05-27 17:08:05.579104733 +0800 +@@ -0,0 +1,42 @@ ++ ++#ifndef __BUF_H__ ++#define __BUF_H__ ++ ++#include "sxe2_common.h" ++#include ++#include ++#include ++ ++#define SXE2_SHM_ADDR NULL ++#define SXE2_SHMAT_FLAGS 0 ++#define SXE2_ALLOC_ENV_NAME (128) ++ ++#ifndef HPAGE_SIZE ++#define HPAGE_SIZE (2UL * 1024 * 1024) ++#endif ++ ++#define SXE2_SHM_LENGTH HPAGE_SIZE ++#define SXE2_SHM_SINGLE_CHUNK_SIZE 32768 ++ ++int sxe2_prefered_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size, size_t page_size, ++ sxe2_alloc_type_e type); ++ ++void sxe2_actual_buf_free(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf); ++ ++void sxe2_alloc_type_get(const char *component, sxe2_alloc_type_e *alloc_type, ++ sxe2_alloc_type_e default_type); ++ ++int sxe2_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size, size_t page_size); ++ ++void sxe2_buf_free(sxe2_buf_s *buf); ++ ++ sxe2_hugetlb_mem_s *sxe2_huge_mem_alloc(struct sxe2_rdma_ucontext *ctx, size_t size); ++ ++ void sxe2_huge_buf_free(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, bool dofork_range); ++ ++ void sxe2_huge_mem_free(struct sxe2_rdma_ucontext *ctx, sxe2_hugetlb_mem_s *hmem); ++ ++ int sxe2_huge_buf_alloc(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf, size_t size); ++ ++#endif ++ +diff -Naur rdma-core-48.0.bak/providers/sxe2/CMakeLists.txt rdma-core-48.0/providers/sxe2/CMakeLists.txt +--- rdma-core-48.0.bak/providers/sxe2/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/CMakeLists.txt 2026-05-27 17:08:05.580104739 +0800 +@@ -0,0 +1,51 @@ ++# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) ++if(DEFINED ENV{MD_MAKEMODE}) ++ set(makemode $ENV{MD_MAKEMODE}) ++else() ++ set(makemode release) ++endif() ++if(makemode STREQUAL "release") ++ add_definitions(-DSXE2_CFG_RELEASE) ++else() ++ add_definitions(-DSXE2_CFG_DEBUG) ++endif() ++if(DEFINED ENV{ASAN}) ++ set(asan $ENV{ASAN}) ++else() ++ set(asan no) ++endif() ++if(asan STREQUAL "yes") ++ add_definitions(-DSXE2_SUPPORT_ASAN) ++endif() ++if(DEFINED ENV{INJECT}) ++ set(inject $ENV{INJECT}) ++else() ++ set(inject no) ++endif() ++if(inject STREQUAL "yes") ++ add_definitions(-DSXE2_SUPPORT_INJECT) ++endif() ++if(DEFINED ENV{IO_STAT}) ++ set(iostat $ENV{IO_STAT}) ++else() ++ set(iostat no) ++endif() ++if(iostat STREQUAL "yes") ++ add_definitions(-DSXE2_SUPPORT_IO_STAT) ++endif() ++add_compile_options(-Werror) ++rdma_provider(sxe2 ++ sxe2_common.c ++ mr.c ++ io.c ++ ah.c ++ cq.c ++ pd.c ++ qp.c ++ db.c ++ device_port.c ++ log.c ++ srq.c ++ mc.c ++ buf.c ++) +diff -Naur rdma-core-48.0.bak/providers/sxe2/cq.c rdma-core-48.0/providers/sxe2/cq.c +--- rdma-core-48.0.bak/providers/sxe2/cq.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/cq.c 2026-05-27 17:08:05.579104733 +0800 +@@ -0,0 +1,405 @@ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "sxe2_common.h" ++#include "sxe2_abi.h" ++#include "sxe2-abi.h" ++#include "log.h" ++#include "io.h" ++#include "buf.h" ++ ++#define SXE2_HCA_CORE_CLOCK_800_MHZ (800) ++ ++#define SXE2_VERBS_MIN_CQ_SIZE 4 ++#define SXE2_VERBS_MAX_CQ_SIZE 2097152 ++ ++#define SXE2_UCQ_DB_NOTE_CMD_SN_SHIFT 29 ++#define SXE2_UCQ_DB_NOTE_CMD_SN 3 ++ ++enum { ++ UCREATE_CQ_SUPPORTED_FLAGS = ++ IBV_CREATE_CQ_ATTR_SINGLE_THREADED | ++ IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN ++}; ++ ++enum { ++ UCREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS | ++ IBV_CQ_INIT_ATTR_MASK_PD ++}; ++ ++static inline __u32 get_cqe_count(__u32 ncqe) ++{ ++ ncqe++; ++ ++ ncqe = (__u32)roundup_pow_of_two(ncqe); ++ ++ if (ncqe < SXE2_U_MINCQ_SIZE) { ++ ncqe = SXE2_U_MINCQ_SIZE; ++ } ++ return ncqe; ++} ++ ++static inline size_t get_cq_total_bytes(unsigned int ncqe) ++{ ++ const long page_size = sysconf(_SC_PAGE_SIZE); ++ return roundup(ncqe * sizeof(struct sxe2_cqe), page_size); ++} ++ ++static void sxe2_uk_cq_init(struct sxe2_cq_uk *cq, struct sxe2_cq_uk_init_info *info) ++{ ++ cq->cq_base = info->cq_base; ++ cq->cqe_alloc_db = info->cqe_alloc_db; ++ cq->cq_id = info->cq_id; ++ cq->ncqe = info->ncqe; ++ cq->doorbell_note = info->doorbell_note; ++ SXE2_RING_INIT(cq->cq_ring, cq->ncqe); ++ cq->polarity = 1; ++ cq->arm_sn = 0; ++ return; ++} ++ ++static void sxe2_ibvcq_ex_fill_priv_funcs(struct sxe2_ucq *cq, struct ibv_cq_init_attr_ex *cq_attr) ++{ ++ struct ibv_cq_ex *ibvcq_ex = &cq->verbs_cq.cq_ex; ++ ++ ibvcq_ex->start_poll = sxe2_start_poll; ++ ibvcq_ex->end_poll = sxe2_end_poll; ++ ibvcq_ex->next_poll = sxe2_next_poll; ++ ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) { ++ ibvcq_ex->read_completion_ts = sxe2_wc_read_completion_ts; ++ cq->report_rtt = true; ++ } ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) { ++ ibvcq_ex->read_completion_wallclock_ns = sxe2_wc_read_completion_wallclock_ns; ++ cq->report_rtt = true; ++ } ++ ++ ibvcq_ex->read_opcode = sxe2_wc_read_opcode; ++ ibvcq_ex->read_vendor_err = sxe2_wc_read_vendor_err; ++ ibvcq_ex->read_wc_flags = sxe2_wc_read_wc_flags; ++ ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) { ++ ibvcq_ex->read_byte_len = sxe2_wc_read_byte_len; ++ } ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) { ++ ibvcq_ex->read_imm_data = sxe2_wc_read_imm_data; ++ } ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) { ++ ibvcq_ex->read_qp_num = sxe2_wc_read_qp_num; ++ } ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) { ++ ibvcq_ex->read_src_qp = sxe2_wc_read_src_qp; ++ } ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) { ++ ibvcq_ex->read_slid = sxe2_wc_read_slid; ++ } ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) { ++ ibvcq_ex->read_sl = sxe2_wc_read_sl; ++ } ++ if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) { ++ ibvcq_ex->read_dlid_path_bits = sxe2_wc_read_dlid_path_bits; ++ } ++} ++ ++static bool ucreate_cq_attr_check(struct sxe2_rdma_ucontext *sctx, ++ const struct ibv_cq_init_attr_ex *cq_attr) ++{ ++ bool isFail = true; ++ ++ if (cq_attr->comp_mask & (~UCREATE_CQ_SUPPORTED_COMP_MASK)) { ++ SXE2_VERBS_LOG_ERROR_BDF("Unsupported comp_mask(%u) for create cq\n", cq_attr->comp_mask); ++ goto end; ++ } ++ ++ if ((cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS) && ++ (cq_attr->flags & (~UCREATE_CQ_SUPPORTED_FLAGS))) { ++ SXE2_VERBS_LOG_ERROR_BDF("Unsupported creation flags(%u) requested for create cq\n", cq_attr->flags); ++ goto end; ++ } ++ ++ isFail = false; ++end: ++ return isFail; ++} ++static int sxe2_alloc_cq_buf(struct sxe2_rdma_ucontext *sctx, sxe2_buf_s *buf, size_t size) ++{ ++ int ret; ++ sxe2_alloc_type_e type; ++ sxe2_alloc_type_e default_type = SXE2_ALLOC_TYPE_ANON; ++ long page_size = 0; ++ ++ page_size = sysconf(_SC_PAGE_SIZE); ++ if (page_size < 0) { ++ SXE2_VERBS_LOG_ERROR_BDF("get system page size failed."); ++ ret = EPERM; ++ goto end; ++ } ++ ++ sxe2_alloc_type_get(SXE2_CQ_PREFIX, &type, default_type); ++ ++ ret = sxe2_prefered_buf_alloc( ++ sctx, buf, size, page_size, type); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("cq buf alloc err ret (%d)", ret); ++ goto end; ++ } ++ ++ memset(buf->buf, 0, buf->length); ++ ++end: ++ return ret; ++} ++static void sxe2_free_cq_buf(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf) ++{ ++ sxe2_actual_buf_free(ctx, buf); ++} ++ ++static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr, ++ bool ext_cq) ++{ ++ int ret; ++ unsigned int ncqe; ++ struct sxe2_ucreate_cq_ex cmd_ex; ++ struct sxe2_ucreate_cq_ex_resp resp_ex; ++ struct sxe2_rdma_ucontext *sctx; ++ struct sxe2_ucq *ucq; ++ struct sxe2_cq_uk_init_info info; ++ size_t total_size = 0; ++ ++ sctx = container_of(context, struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ SXE2_VERBS_LOG_INFO_BDF("ucreate_cq start \n"); ++ ++ if ((cq_attr->cqe <= 0) || (cq_attr->cqe > sctx->uk_attrs.max_hw_cq_size - 1)) { ++ errno = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF("inv cqe:%d or vector:%d is invalid\n", cq_attr->cqe, cq_attr->comp_vector); ++ goto null; ++ } ++ ++ if (ucreate_cq_attr_check(sctx, cq_attr)) { ++ errno = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF("invalid param\n"); ++ goto null; ++ } ++ ++ ncqe = cq_attr->cqe; ++ ucq = calloc(1, sizeof(*ucq)); ++ if (!ucq) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("cq user mem alloc failed, mem size:%ld\n", sizeof(*ucq)); ++ goto null; ++ } ++ ++ if (pthread_spin_init(&ucq->lock, PTHREAD_PROCESS_SHARED)) { ++ errno = EFAULT; ++ SXE2_VERBS_LOG_ERROR_BDF("cq lock init failed\n"); ++ goto err_lock_init; ++ } ++ ++ memset(&cmd_ex, 0, sizeof(cmd_ex)); ++ memset(&resp_ex, 0, sizeof(resp_ex)); ++ memset(&info, 0, sizeof(info)); ++ ++ ucq->comp_vector = cq_attr->comp_vector; ++ info.ncqe = get_cqe_count(cq_attr->cqe); ++ if ((info.ncqe < SXE2_VERBS_MIN_CQ_SIZE) || (info.ncqe > SXE2_VERBS_MAX_CQ_SIZE)) { ++ errno = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF("cq size(%d)", info.ncqe); ++ goto err_cq_base; ++ } ++ ++ total_size = get_cq_total_bytes(info.ncqe); ++ ++ ucq->buf_size = total_size; ++ if (sxe2_alloc_cq_buf(sctx, &ucq->buf, total_size)) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("ncqe %#x total_size %zu", ncqe, total_size); ++ goto err_cq_base; ++ } ++ ++ info.cq_base = ucq->buf.buf; ++ if (!info.cq_base) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("cq buf alloc failed, ncqe:%d size:%ld\n", info.ncqe, total_size); ++ goto err_cq_base; ++ } ++ memset(info.cq_base, 0, total_size); ++ ++ info.doorbell_note = sxe2_alloc_hw_buf(SXE2_DB_NOTE_SIZE); ++ if (!info.doorbell_note) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("cq db note buf alloc failed, ncqe:%d size:%d\n", info.ncqe, SXE2_DB_NOTE_SIZE); ++ goto err_alloc_db; ++ } ++ memset(info.doorbell_note, 0, SXE2_DB_NOTE_SIZE); ++ set_32bit_val(info.doorbell_note, 4, (__u32)(SXE2_UCQ_DB_NOTE_CMD_SN) << SXE2_UCQ_DB_NOTE_CMD_SN_SHIFT); ++ ++ cq_attr->cqe = info.ncqe; ++ cmd_ex.user_cq_buf = (__u64)((uintptr_t)info.cq_base); ++ cmd_ex.user_cq_db_note = (__u64)((uintptr_t)info.doorbell_note); ++ ++ SXE2_VERBS_LOG_INFO_BDF("create cq comp_vector%d cqe:%d \n", cq_attr->comp_vector, cq_attr->cqe); ++ ++ ret = ibv_cmd_create_cq_ex(context, cq_attr, &ucq->verbs_cq, &cmd_ex.ibv_cmd, ++ sizeof(cmd_ex), &resp_ex.ibv_resp, sizeof(resp_ex), 0); ++ if (ret != 0) { ++ errno = ret; ++ SXE2_VERBS_LOG_ERROR_BDF("ibv cmd create cq err(%d)\n", ret); ++ goto err_create_cq; ++ } ++ ++ if (ext_cq) { ++ sxe2_ibvcq_ex_fill_priv_funcs(ucq, cq_attr); ++ } ++ ++ info.cq_id = resp_ex.cq_id; ++ ucq->verbs_cq.cq.cqe = (int)ncqe; ++ SXE2_VERBS_LOG_INFO_BDF("create cq cqn(%#x) cqsize %d\n", info.cq_id, info.ncqe); ++ ++ info.cqe_alloc_db = (__le64 *)sctx->cq_db_arm; ++ sxe2_uk_cq_init(&ucq->cq, &info); ++ ++ return &ucq->verbs_cq.cq_ex; ++ ++err_create_cq: ++ sxe2_free_hw_buf(info.doorbell_note, SXE2_DB_NOTE_SIZE); ++ ++err_alloc_db: ++ sxe2_free_cq_buf(sctx, &ucq->buf); ++ ++err_cq_base: ++ pthread_spin_destroy(&ucq->lock); ++ ++err_lock_init: ++ free(ucq); ++ ++null: ++ return NULL; ++} ++ ++struct ibv_cq *sxe2_ucreate_cq(struct ibv_context *context, int cqe, ++ struct ibv_comp_channel *channel, int comp_vector) ++{ ++ struct ibv_cq_ex *cq; ++ struct ibv_cq_init_attr_ex cq_attr; ++ struct sxe2_rdma_ucontext *sctx; ++ ++ sctx = container_of(context, struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ SXE2_VERBS_LOG_INFO_BDF("sxe2_ucreate_cq comp_vector%d cqe:%d \n", comp_vector, cqe); ++ SXE2_VERBS_LOG_INFO_BDF("sxe2_ucreate_cq ctx num_comp_vectors:%d\n", context->num_comp_vectors); ++ memset(&cq_attr, 0, sizeof(cq_attr)); ++ cq_attr.cqe = cqe; ++ cq_attr.channel = channel; ++ cq_attr.comp_vector = comp_vector; ++ ++ cq = ucreate_cq(context, &cq_attr, false); ++ if (!cq) { ++ SXE2_VERBS_LOG_ERROR_BDF("user create cq failed\n"); ++ } ++ ++ return cq ? ibv_cq_ex_to_cq(cq) : NULL; ++} ++ ++struct ibv_cq_ex *sxe2_ucreate_cq_ex(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr) ++{ ++ struct ibv_cq_ex *cq; ++ struct sxe2_rdma_ucontext *sctx; ++ ++ sctx = container_of(context, struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ if (cq_attr->wc_flags & ~SXE2_CQ_SUPPORTED_WC_FLAGS) { ++ SXE2_VERBS_LOG_ERROR_BDF("user create cq ex flags err\n"); ++ errno = EOPNOTSUPP; ++ cq = NULL; ++ goto end; ++ } ++ ++ cq = ucreate_cq(context, cq_attr, true); ++ ++end: ++ return cq; ++} ++ ++int sxe2_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr) ++{ ++ int ret; ++ struct ibv_modify_cq cmd; ++ struct sxe2_rdma_ucontext *sctx; ++ ++ if (!cq || !attr) { ++ SXE2_VERBS_LOG_ERROR("user modify cq para err\n"); ++ ret = EINVAL; ++ goto end; ++ } ++ sctx = container_of(cq->context, struct sxe2_rdma_ucontext, ibv_ctx.context); ++ memset(&cmd, 0, sizeof(cmd)); ++ ++ ret = ibv_cmd_modify_cq(cq, attr, &cmd, sizeof(cmd)); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("ibv cmd modify cq err(%d)\n", ret); ++ } ++ ++end: ++ return ret; ++} ++ ++int sxe2_udestroy_cq(struct ibv_cq *cq) ++{ ++ struct sxe2_ucq *ucq; ++ int ret; ++ struct sxe2_rdma_ucontext *sctx; ++ ++ sctx = container_of(cq->context, struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ucq = container_of(cq, struct sxe2_ucq, verbs_cq.cq); ++ ++ ret = pthread_spin_destroy(&ucq->lock); ++ if (ret != 0) { ++ SXE2_VERBS_LOG_ERROR("user destroy cq lock failed\n"); ++ goto end; ++ } ++ ++ ret = ibv_cmd_destroy_cq(cq); ++ if (ret != 0) { ++ SXE2_VERBS_LOG_ERROR("ibv cmd destory cq err(%d)\n", ret); ++ goto end; ++ } ++ ++ sxe2_free_cq_buf(sctx, &ucq->buf); ++ sxe2_free_hw_buf(ucq->cq.doorbell_note, SXE2_DB_NOTE_SIZE); ++ ucq->cq.doorbell_note = NULL; ++ free(ucq); ++end: ++ return ret; ++} ++ +diff -Naur rdma-core-48.0.bak/providers/sxe2/db.c rdma-core-48.0/providers/sxe2/db.c +--- rdma-core-48.0.bak/providers/sxe2/db.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/db.c 2026-05-27 17:08:05.579104733 +0800 +@@ -0,0 +1,622 @@ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "log.h" ++#include "sxe2_common.h" ++ ++int gsingle_threaded = 0; ++ ++#define SXE2_VERBS_STRTOL_BASE 0 ++#define SXE2_VERBS_LLWQE_PER_DB 15 ++#define SXE2_VERBS_PF_DB_PAGE 2 ++#define SXE2_VERBS_VF_DB_PAGE 1 ++#define SXE2_VERBS_DED_LLWQE_PERCENT \ ++ 3 ++#define SXE2_VERBS_LLWQE_OFFSET 0x100 ++#define SXE2_VERBS_LLWQE_DB_OFFSET \ ++ 0x10 ++#define SXE2_VERBS_LLWQE_SIZE 256 ++#define SXE2_VERBS_LLWQE_DB_SIZE 16 ++#define SXE2_VERBS_CQ_ARM_OFFSET 0x8 ++#define SXE2_VERBS_CQ_INFO_OFFSET 0xC ++ ++enum sxe2_verbs_cmd_attr_num { ++ SXE2_VERBS_ONE_CMD_ATTR = 1, ++ SXE2_VERBS_TWO_CMD_ATTRS = 2, ++ SXE2_VERBS_THREE_CMD_ATTRS = 3, ++ SXE2_VERBS_FOUR_CMD_ATTRS = 4, ++ SXE2_VERBS_FIVE_CMD_ATTRS = 5, ++}; ++ ++ ++static struct sxe2_verbs_llwqe *db_ualloc_page(struct ibv_context *context, ++ __u32 flags, ++ __u32 db_mmap_size, ++ __u32 db_page_id, ++ __u64 db_mmap_offset) ++{ ++ struct sxe2_verbs_llwqe *llwqe = NULL; ++ struct sxe2_rdma_ucontext *sctx = to_sctx(context); ++ struct sxe2_db_mmap_db_page_addr *page_addr_entry = NULL; ++ struct sxe2_db_mmap_db_page_addr *mmap_addr_entry = NULL; ++ struct sxe2_db_mmap_db_page_addr *tmp_mmap_addr_entry = NULL; ++ __u32 db_page_id_allign; ++ __u32 page_id_mod; ++ long page_size = sysconf(_SC_PAGE_SIZE); ++ __u32 db_page_multiplier = page_size/SXE2_DB_PAGE_SIZE; ++ ++ llwqe = calloc(1, sizeof(*llwqe)); ++ if (!llwqe) { ++ SXE2_VERBS_LOG_ERROR_BDF("llwqe calloc fail"); ++ goto end; ++ } ++ llwqe->db_mmap_size = db_mmap_size; ++ llwqe->db_page_id = db_page_id; ++ llwqe->db_mmap_offset = db_mmap_offset; ++ llwqe->db_page_addr = NULL; ++ ++ page_id_mod = llwqe->db_page_id % db_page_multiplier; ++ if (page_id_mod != 0) { ++ list_for_each_safe(&sctx->mmap_page_addr_list, mmap_addr_entry, tmp_mmap_addr_entry, ++ list_entry) ++ { ++ db_page_id_allign = llwqe->db_page_id - page_id_mod; ++ if (mmap_addr_entry->db_page_id == db_page_id_allign) { ++ llwqe->db_page_addr = (void*)((__u64)mmap_addr_entry->db_page_addr + page_id_mod * SXE2_DB_PAGE_SIZE); ++ llwqe->db_page_id = mmap_addr_entry->db_page_id + page_id_mod; ++ break; ++ } ++ } ++ } ++ if (!llwqe->db_page_addr) { ++ llwqe->db_page_addr = ++ mmap(NULL, db_mmap_size, PROT_WRITE | PROT_READ, MAP_SHARED, ++ context->cmd_fd, db_mmap_offset); ++ if (llwqe->db_page_addr == MAP_FAILED) { ++ SXE2_VERBS_LOG_ERROR_BDF("mmap db fail"); ++ goto free_llwqe; ++ } ++ page_addr_entry = calloc(1, sizeof(*page_addr_entry)); ++ if (!page_addr_entry) { ++ SXE2_VERBS_LOG_ERROR_BDF("page addr entry alloc fail"); ++ goto unmap_page; ++ } ++ page_addr_entry->db_page_id = llwqe->db_page_id - page_id_mod; ++ page_addr_entry->db_page_addr = llwqe->db_page_addr; ++ page_addr_entry->mmap_size = db_mmap_size; ++ list_add_tail(&sctx->mmap_page_addr_list, &page_addr_entry->list_entry); ++ ++ if (page_id_mod != 0) { ++ llwqe->db_page_addr = (void*)((__u64)llwqe->db_page_addr + page_id_mod * SXE2_DB_PAGE_SIZE); ++ } ++ } ++ llwqe->mmaped_entry = true; ++ ++ SXE2_VERBS_LOG_DEBUG_BDF( ++ "DB ALLOC:llwqe->db_handle:%#x, db_mmap_offset:%#llx, length:%#x, " ++ "pageid:%#x, over", ++ llwqe->db_handle, (__u64)llwqe->db_mmap_offset, llwqe->db_mmap_size, ++ llwqe->db_page_id); ++ ++end: ++ return llwqe; ++unmap_page: ++ munmap(llwqe->db_page_addr, db_mmap_size); ++free_llwqe: ++ free(llwqe); ++ llwqe = NULL; ++ goto end; ++} ++ ++static void db_uinsert_llwqe(struct sxe2_rdma_ucontext *sctx, ++ struct sxe2_verbs_llwqe *llwqe_first) ++{ ++ struct list_head *head; ++ struct sxe2_verbs_llwqe *llwqe = llwqe_first; ++ __u64 idx; ++ ++ if (llwqe_first->qp_dedicated) { ++ head = &sctx->dedicated_llwqe_list; ++ } else { ++ head = &sctx->shared_llwqe_list; ++ } ++ ++ for (idx = 0; idx < SXE2_VERBS_LLWQE_PER_DB; idx++) { ++ if (idx != 0) { ++ llwqe = calloc(1, sizeof(*llwqe)); ++ } ++ if (!llwqe) { ++ SXE2_VERBS_LOG_ERROR_BDF("llwqe is NULL"); ++ goto end; ++ } ++ ++ llwqe->wqe_addr = llwqe_first->db_page_addr + SXE2_VERBS_LLWQE_OFFSET + ++ (idx * SXE2_VERBS_LLWQE_SIZE); ++ llwqe->db_addr = llwqe_first->db_page_addr + ++ SXE2_VERBS_LLWQE_DB_OFFSET + ++ (idx * SXE2_VERBS_LLWQE_DB_SIZE); ++ llwqe->wqe_buf_size = ++ llwqe_first->no_ll_mode ? 0 : SXE2_VERBS_LLWQE_SIZE; ++ llwqe->num_llwqe = llwqe_first->no_ll_mode ? 0 : 1; ++ list_node_init(&llwqe->list_entry); ++ list_add_tail(head, &llwqe->list_entry); ++ pthread_spin_init(&llwqe->lock, PTHREAD_PROCESS_SHARED); ++ if (idx != 0) { ++ llwqe->db_page_addr = llwqe_first->db_page_addr; ++ llwqe->db_page_id = llwqe_first->db_page_id; ++ llwqe->db_handle = llwqe_first->db_handle; ++ llwqe->no_ll_mode = llwqe_first->no_ll_mode; ++ llwqe->db_mmap_offset = llwqe_first->db_mmap_offset; ++ } ++ if (llwqe_first->qp_dedicated) { ++ sctx->alloc_dedicated_llwqes++; ++ llwqe->qp_dedicated = true; ++ } else { ++ sctx->alloc_shared_llwqes++; ++ llwqe->qp_shared = true; ++ } ++ llwqe->need_lock = llwqe_first->qp_shared && (!gsingle_threaded); ++ } ++ ++end: ++ return; ++} ++ ++int sxe2_uget_single_threaded_env(void) ++{ ++ char *env; ++ int ret_code = 0; ++ ++ env = getenv(SXE2_SINGLE_THREADED); ++ if (env) { ++ ret_code = strncmp(env, "1", 1) ? 0 : 1; ++ } ++ return ret_code; ++} ++ ++int sxe2_uget_ll_mode(void) ++{ ++ char *env; ++ int ret_code = 0; ++ ++ env = getenv(SXE2_LL_MODE); ++ if (env) { ++ ret_code = strncmp(env, "0", 1) ? 1 : 0; ++ } ++ return ret_code; ++} ++ ++int sxe2_uget_tot_llwqe(struct sxe2_rdma_ucontext *sctx, ++ struct sxe2_get_context_resp *resp) ++{ ++ errno = 0; ++ char *env; ++ int total_llwqe = 0; ++ int config_tot_llwqe = 0; ++ int max_tot_llwqe = 0; ++ int total_db_page = 0; ++ int cal_db_page = 0; ++ ++ max_tot_llwqe = (int)(resp->max_db * SXE2_VERBS_LLWQE_PER_DB); ++ ++ if (resp->is_pf) { ++ cal_db_page = (SXE2_VERBS_PF_DB_PAGE < resp->max_db) ++ ? SXE2_VERBS_PF_DB_PAGE ++ : (int)resp->max_db; ++ } else { ++ cal_db_page = (SXE2_VERBS_VF_DB_PAGE < resp->max_db) ++ ? SXE2_VERBS_VF_DB_PAGE ++ : (int)resp->max_db; ++ } ++ ++ env = getenv(SXE2_TOTAL_LL_WQE); ++ if (env) { ++ config_tot_llwqe = (int)strtol(env, NULL, SXE2_VERBS_STRTOL_BASE); ++ if (errno == ERANGE || config_tot_llwqe < 1) { ++ total_db_page = cal_db_page; ++ SXE2_VERBS_LOG_WARN_BDF( ++ "Config total_llwqe %d invalid, modify total_db_page %d", ++ config_tot_llwqe, total_db_page); ++ } else if (config_tot_llwqe > max_tot_llwqe) { ++ total_db_page = (int)resp->max_db; ++ SXE2_VERBS_LOG_WARN_BDF("Config total_llwqe %d > max_tot_llwqe " ++ "%d, modify total_db_page %d", ++ config_tot_llwqe, max_tot_llwqe, ++ total_db_page); ++ } else { ++ total_db_page = (config_tot_llwqe + SXE2_VERBS_LLWQE_PER_DB - 1) / ++ SXE2_VERBS_LLWQE_PER_DB; ++ SXE2_VERBS_LOG_INFO_BDF( ++ "Config and align total_llwqe %d, total_db_page %d", ++ config_tot_llwqe, total_db_page); ++ } ++ } else { ++ total_db_page = cal_db_page; ++ SXE2_VERBS_LOG_WARN_BDF( ++ "No config total_llwqe, modify total_db_page %d", total_db_page); ++ } ++ ++ total_llwqe = total_db_page * SXE2_VERBS_LLWQE_PER_DB; ++ return total_llwqe; ++} ++ ++int sxe2_uget_ded_llwqe(struct sxe2_rdma_ucontext *sctx, int total_llwqe) ++{ ++ errno = 0; ++ char *env; ++ int cal_ded_llwqe = 0; ++ int config_ded_llwqe = 0; ++ int ded_llwqe = 0; ++ ++ cal_ded_llwqe = ((total_llwqe / SXE2_VERBS_LLWQE_PER_DB) / ++ SXE2_VERBS_DED_LLWQE_PERCENT) * ++ SXE2_VERBS_LLWQE_PER_DB; ++ ++ env = getenv(SXE2_DEDICATED_LL_WQE); ++ if (env) { ++ config_ded_llwqe = (int)strtol(env, NULL, SXE2_VERBS_STRTOL_BASE); ++ if (errno == ERANGE || config_ded_llwqe < 0) { ++ ded_llwqe = cal_ded_llwqe; ++ SXE2_VERBS_LOG_WARN_BDF( ++ "Config ded_llwqe %d invalid, modify ded_llwqe %d", ++ config_ded_llwqe, ded_llwqe); ++ } else if (config_ded_llwqe > total_llwqe) { ++ ded_llwqe = total_llwqe; ++ SXE2_VERBS_LOG_WARN_BDF( ++ "Config ded_llwqe %d > total_llwqe %d, modify ded_llwqe %d", ++ config_ded_llwqe, total_llwqe, total_llwqe); ++ } else { ++ ded_llwqe = ((config_ded_llwqe + SXE2_VERBS_LLWQE_PER_DB - 1) / ++ SXE2_VERBS_LLWQE_PER_DB) * ++ SXE2_VERBS_LLWQE_PER_DB; ++ SXE2_VERBS_LOG_INFO_BDF("Config and align ded_llwqe %d", ++ config_ded_llwqe); ++ } ++ } else { ++ ded_llwqe = cal_ded_llwqe; ++ SXE2_VERBS_LOG_WARN_BDF("No config ded_llwqe, modify ded_llwqe %d", ++ ded_llwqe); ++ } ++ ++ return ded_llwqe; ++} ++ ++struct sxe2_verbs_llwqe * ++db_ualloc_page_and_llwqes(struct ibv_context *context, bool dedicated, ++ __u32 db_mmap_size, ++ __u32 db_page_id, ++ __u64 db_mmap_offset) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_verbs_llwqe *llwqe = NULL; ++ ++ sctx = to_sctx(context); ++ if (db_page_id == 0) { ++ SXE2_VERBS_LOG_ERROR_BDF("have no llwqe page"); ++ goto end; ++ } ++ llwqe = db_ualloc_page(context, SXE2_VERBS_DB_PAGE_TYPE_LLWQE, db_mmap_size, db_page_id, db_mmap_offset); ++ if (!llwqe) { ++ SXE2_VERBS_LOG_ERROR_BDF("alloc db page fail"); ++ goto end; ++ } ++ ++ if (dedicated) { ++ llwqe->qp_dedicated = true; ++ } else { ++ llwqe->qp_shared = true; ++ } ++ ++ db_uinsert_llwqe(sctx, llwqe); ++ ++end: ++ return llwqe; ++} ++ ++static void *db_ummap(int fd, off_t offset) ++{ ++ void *map = NULL; ++ long page_size = sysconf(_SC_PAGE_SIZE); ++ ++ if (page_size < 0) { ++ map = MAP_FAILED; ++ goto end; ++ } ++ map = mmap(NULL, (size_t)page_size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, ++ offset); ++ if (map == MAP_FAILED) { ++ return map; ++ } ++ ++ if (ibv_dontfork_range(map, page_size)) { ++ munmap(map, page_size); ++ return MAP_FAILED; ++ } ++end: ++ return map; ++} ++ ++static void db_umunmap(void *map) ++{ ++ long page_size = sysconf(_SC_PAGE_SIZE); ++ if (page_size < 0) { ++ goto end; ++ } ++ ++ ibv_dofork_range(map, (size_t)page_size); ++ munmap(map, page_size); ++end: ++ return; ++} ++ ++int sxe2_uinit_doorbell(struct sxe2_rdma_ucontext *sctx, ++ struct sxe2_get_context_resp *resp) ++{ ++ int ret_code = 0; ++ int config_ll_mode = 0; ++ int total_llwqe = 0; ++ int ded_llwqe = 0; ++ __u64 mmap_key; ++ long page_size = sysconf(_SC_PAGE_SIZE); ++ ++ gsingle_threaded = sxe2_uget_single_threaded_env( ); ++ ++ config_ll_mode = sxe2_uget_ll_mode( ); ++ ++ if (config_ll_mode) { ++ total_llwqe = sxe2_uget_tot_llwqe(sctx, resp); ++ if (total_llwqe) { ++ ded_llwqe = sxe2_uget_ded_llwqe(sctx, total_llwqe); ++ } else { ++ ret_code = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF( ++ "config ll_mode but total_llwqe=0, ret_code %d", ret_code); ++ goto end; ++ } ++ } ++ ++ sctx->ll_mode = config_ll_mode; ++ sctx->tot_ll_wqes = (__u32)total_llwqe; ++ sctx->ded_ll_wqes = (__u32)ded_llwqe; ++ sctx->shared_ll_wqes = (__u32)(total_llwqe - ded_llwqe); ++ list_head_init(&sctx->shared_llwqe_list); ++ list_head_init(&sctx->dedicated_llwqe_list); ++ list_head_init(&sctx->mmap_page_addr_list); ++ pthread_mutex_init(&sctx->alloc_llwqe_mutex, NULL); ++ sctx->alloc_dedicated_llwqes = 0; ++ sctx->alloc_shared_llwqes = 0; ++ ++ mmap_key = resp->db_mmap_key; ++ sctx->qp_db_no_llwqe = db_ummap(sctx->ibv_ctx.context.cmd_fd, (off_t)mmap_key); ++ if (sctx->qp_db_no_llwqe == MAP_FAILED) { ++ ret_code = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("mmap db page fail, ret_code %d", ret_code); ++ goto end; ++ } ++ if (page_size > SXE2_DB_PAGE_SIZE) ++ sctx->qp_db_no_llwqe = (void*)((__u64)sctx->qp_db_no_llwqe + SXE2_DB_PAGE_SIZE); ++ ++ sctx->cq_db_arm = sctx->qp_db_no_llwqe + SXE2_VERBS_CQ_ARM_OFFSET; ++ sctx->cq_db_info = sctx->qp_db_no_llwqe + SXE2_VERBS_CQ_INFO_OFFSET; ++ ++ SXE2_VERBS_LOG_INFO_BDF( ++ "init doorbell over: single_thread %d, ll_mode %d, " ++ "total_llwqes %#x, ded_llwqes %#x, shared_llwqes %#x, " ++ "qp_db_no_llwqe addr %p, cq_db_arm addr %p", ++ gsingle_threaded, sctx->ll_mode, sctx->tot_ll_wqes, sctx->ded_ll_wqes, ++ sctx->shared_ll_wqes, sctx->qp_db_no_llwqe, sctx->cq_db_arm); ++ ++end: ++ return ret_code; ++} ++ ++void sxe2_ufree_doorbell(struct ibv_context *context) ++{ ++ struct sxe2_verbs_llwqe *llwqe = NULL; ++ struct sxe2_verbs_llwqe *tmp_llwqe = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_db_mmap_db_page_addr *mmap_addr_entry = NULL; ++ struct sxe2_db_mmap_db_page_addr *tmp_mmap_addr_entry = NULL; ++ long page_size = sysconf(_SC_PAGE_SIZE); ++ ++ if (context == NULL) { ++ SXE2_VERBS_LOG_ERROR_BDF("ibv context is NULL"); ++ goto end; ++ } ++ sctx = to_sctx(context); ++ ++ list_for_each_safe(&sctx->dedicated_llwqe_list, llwqe, tmp_llwqe, ++ list_entry) ++ { ++ SXE2_VERBS_LOG_DEBUG_BDF( ++ "DB FREE:llwqe->db_handle:%#x, db_mmap_offset:%#llx, " ++ "db_mmap_size:%#x, db_page_id:%#x, start(dedicated)", ++ llwqe->db_handle, (__u64)llwqe->db_mmap_offset, llwqe->db_mmap_size, ++ llwqe->db_page_id); ++ list_del(&llwqe->list_entry); ++ free(llwqe); ++ llwqe = NULL; ++ } ++ ++ list_for_each_safe(&sctx->shared_llwqe_list, llwqe, tmp_llwqe, list_entry) ++ { ++ SXE2_VERBS_LOG_DEBUG_BDF( ++ "DB FREE:llwqe->db_handle:%#x, db_mmap_offset:%#llx, " ++ "db_mmap_size:%#x, db_page_id:%#x, start(shared)", ++ llwqe->db_handle, (__u64)llwqe->db_mmap_offset, llwqe->db_mmap_size, ++ llwqe->db_page_id); ++ list_del(&llwqe->list_entry); ++ free(llwqe); ++ llwqe = NULL; ++ } ++ ++ list_for_each_safe(&sctx->mmap_page_addr_list, mmap_addr_entry, tmp_mmap_addr_entry, ++ list_entry) ++ { ++ if (mmap_addr_entry->db_page_addr && ++ munmap(mmap_addr_entry->db_page_addr, mmap_addr_entry->mmap_size)) { ++ SXE2_VERBS_LOG_ERROR_BDF("munmap db fail"); ++ } ++ SXE2_VERBS_LOG_DEBUG_BDF("DB FREE: munmap page_addr(%p) page_size(%u)", ++ mmap_addr_entry->db_page_addr, mmap_addr_entry->mmap_size); ++ list_del(&mmap_addr_entry->list_entry); ++ free(mmap_addr_entry); ++ mmap_addr_entry = NULL; ++ } ++ ++ if (page_size > SXE2_DB_PAGE_SIZE) ++ sctx->qp_db_no_llwqe = (void*)((__u64)sctx->qp_db_no_llwqe - SXE2_DB_PAGE_SIZE); ++ db_umunmap(sctx->qp_db_no_llwqe); ++ sctx->qp_db_no_llwqe = NULL; ++ ++ SXE2_VERBS_LOG_INFO_BDF("free doorbell over"); ++ ++end: ++ return; ++} ++ ++static struct sxe2_verbs_llwqe *get_idle_shared_llwqe(struct sxe2_rdma_ucontext *sctx) ++{ ++ struct sxe2_verbs_llwqe *llwqe = NULL; ++ struct sxe2_verbs_llwqe *llwqe_entry = NULL; ++ ++ list_for_each(&sctx->shared_llwqe_list, llwqe_entry, list_entry) ++ { ++ if (!llwqe) { ++ llwqe = llwqe_entry; ++ } else { ++ if (llwqe_entry->count < llwqe->count) { ++ llwqe = llwqe_entry; ++ } ++ } ++ } ++ if (llwqe) { ++ llwqe->count++; ++ } ++ ++ return llwqe; ++} ++ ++struct sxe2_verbs_llwqe *alloc_db_page_and_get_qp_llwqe(struct ibv_context *context, ++ __u32 db_mmap_size, ++ __u32 db_page_id, ++ __u64 db_mmap_offset, ++ __u8 alloc_page_type) ++{ ++ struct sxe2_verbs_llwqe *llwqe = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ if (context == NULL) { ++ SXE2_VERBS_LOG_ERROR_BDF("ibv context is NULL"); ++ goto end; ++ } ++ sctx = to_sctx(context); ++ ++ pthread_mutex_lock(&sctx->alloc_llwqe_mutex); ++ ++ if (alloc_page_type == SXE2_VERBS_DB_PAGE_TYPE_DEDICATED) { ++ if (!db_ualloc_page_and_llwqes(context, true, db_mmap_size, db_page_id, db_mmap_offset)) { ++ SXE2_VERBS_LOG_ERROR_BDF( ++ "LL_WQE:Alloc new ded db page failed"); ++ goto end; ++ } ++ llwqe = list_pop(&sctx->dedicated_llwqe_list, struct sxe2_verbs_llwqe, ++ list_entry); ++ if (llwqe) { ++ llwqe->count++; ++ } ++ } else { ++ if (!db_ualloc_page_and_llwqes(context, false, db_mmap_size, db_page_id, db_mmap_offset)) { ++ SXE2_VERBS_LOG_ERROR_BDF("LLWQE:Alloc new shared db page failed"); ++ goto end; ++ } ++ llwqe = get_idle_shared_llwqe(sctx); ++ } ++ ++end: ++ pthread_mutex_unlock(&sctx->alloc_llwqe_mutex); ++ if (llwqe) { ++ SXE2_VERBS_LOG_DEBUG_BDF( ++ "DB:Get LL_WQE:db_page_id %#x, ll_wqe_count %#x, dedicated %#x, " ++ "shared %#x, wqe_addr %p, db_addr %p", ++ llwqe->db_page_id, llwqe->count, llwqe->qp_dedicated, ++ llwqe->qp_shared, llwqe->wqe_addr, llwqe->db_addr); ++ } else { ++ SXE2_VERBS_LOG_ERROR_BDF("DB:Get LL_WQE failed"); ++ } ++ return llwqe; ++} ++ ++struct sxe2_verbs_llwqe *db_uget_qp_llwqe(struct ibv_context *context, ++ bool *need_alloc_page, ++ __u8 *alloc_page_type) ++{ ++ struct sxe2_verbs_llwqe *llwqe = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ if (context == NULL) { ++ SXE2_VERBS_LOG_ERROR_BDF("ibv context is NULL"); ++ goto end; ++ } ++ sctx = to_sctx(context); ++ ++ pthread_mutex_lock(&sctx->alloc_llwqe_mutex); ++ ++ llwqe = list_pop(&sctx->dedicated_llwqe_list, struct sxe2_verbs_llwqe, ++ list_entry); ++ if (llwqe) { ++ llwqe->count++; ++ *need_alloc_page = false; ++ goto end; ++ } ++ if (sctx->alloc_dedicated_llwqes < sctx->ded_ll_wqes) { ++ *need_alloc_page = true; ++ *alloc_page_type = SXE2_VERBS_DB_PAGE_TYPE_DEDICATED; ++ goto end; ++ } ++ ++ llwqe = get_idle_shared_llwqe(sctx); ++ ++ if (sctx->alloc_shared_llwqes < sctx->shared_ll_wqes) { ++ *need_alloc_page = true; ++ *alloc_page_type = SXE2_VERBS_DB_PAGE_TYPE_SHARED; ++ } ++ ++end: ++ pthread_mutex_unlock(&sctx->alloc_llwqe_mutex); ++ if (llwqe) { ++ SXE2_VERBS_LOG_DEBUG_BDF( ++ "DB:Get LL_WQE:db_page_id %#x, ll_wqe_count %#x, dedicated %#x, " ++ "shared %#x, wqe_addr %p, db_addr %p", ++ llwqe->db_page_id, llwqe->count, llwqe->qp_dedicated, ++ llwqe->qp_shared, llwqe->wqe_addr, llwqe->db_addr); ++ } else { ++ SXE2_VERBS_LOG_INFO_BDF("DB:No LL_WQE left, will alloc new db page."); ++ } ++ return llwqe; ++} ++ ++void db_uput_qp_llwqe(struct sxe2_rdma_ucontext *sctx, ++ struct sxe2_verbs_llwqe *llwqe) ++{ ++ if (!llwqe || (!llwqe->qp_dedicated && !llwqe->qp_shared)) { ++ SXE2_VERBS_LOG_ERROR_BDF("llwqe:invalid argument"); ++ goto end; ++ } ++ ++ pthread_mutex_lock(&sctx->alloc_llwqe_mutex); ++ if (llwqe->qp_dedicated) { ++ list_add_tail(&sctx->dedicated_llwqe_list, &llwqe->list_entry); ++ } else { ++ llwqe->count--; ++ } ++ pthread_mutex_unlock(&sctx->alloc_llwqe_mutex); ++ ++end: ++ return; ++} +diff -Naur rdma-core-48.0.bak/providers/sxe2/device_port.c rdma-core-48.0/providers/sxe2/device_port.c +--- rdma-core-48.0.bak/providers/sxe2/device_port.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/device_port.c 2026-05-27 17:08:05.579104733 +0800 +@@ -0,0 +1,277 @@ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "sxe2_common.h" ++#include "device_port.h" ++#include "log.h" ++#include "verbs.h" ++#include ++ ++#define INTEL_HCA(v, d) VERBS_PCI_MATCH(v, d, NULL) ++static const struct verbs_match_ent sxe2_hca_table[] = { ++ VERBS_DRIVER_ID(RDMA_DRIVER_SXE2), ++ INTEL_HCA(SXE2_PCI_VENDOR_ID, SXE2_RDMA_DEVICE_ID_PF), ++ INTEL_HCA(SXE2_PCI_VENDOR_ID, SXE2_RDMA_DEVICE_ID_VF), ++ INTEL_HCA(SXE2_PCI_VENDOR_ID, SXE2_RDMA_DEVICE_ID_PF_PCIE_2_10G), ++ INTEL_HCA(SXE2_PCI_VENDOR_ID, SXE2_RDMA_DEVICE_ID_VF_PCIE_2_10G), ++ INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_TL, SXE2_RDMA_DEVICE_ID_PF), ++ INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_TL, SXE2_RDMA_DEVICE_ID_VF), ++ INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_TL, SXE2_RDMA_DEVICE_ID_PF_PCIE_2_10G), ++ INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_TL, SXE2_RDMA_DEVICE_ID_VF_PCIE_2_10G), ++ INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_POP, SXE2_RDMA_DEVICE_ID_PF_POP), ++ INTEL_HCA(SXE2_PCI_VENDOR_ID_FOR_POP, SXE2_RDMA_DEVICE_ID_VF_POP), ++ {} ++}; ++ ++void sxe2_ufree_context(struct ibv_context *context) ++{ ++ struct sxe2_rdma_ucontext *sctx; ++ ++ sctx = container_of(context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ sxe2_ufree_pd(&sctx->sxe2_upd->ibv_pd); ++ sctx->sxe2_upd = NULL; ++ sxe2_ufree_doorbell(context); ++ ++ verbs_uninit_context(&sctx->ibv_ctx); ++ if(sctx != NULL){ ++ free(sctx); ++ } ++} ++ ++int sxe2_uquery_device_ex(struct ibv_context *ibctx, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr, size_t attr_size) ++{ ++ int ret = SXE2_OK; ++ struct ib_uverbs_ex_query_device_resp resp = {}; ++ size_t resp_size = sizeof(resp); ++ struct sxe2_rdma_ucontext *sctx; ++ uint16_t major_ver_id; ++ uint16_t sub_ver_id; ++ uint16_t fix_ver_id; ++ uint16_t build_id; ++ ++ sctx = to_sctx(ibctx); ++ ++ ret = ibv_cmd_query_device_any(ibctx, input, attr, attr_size, ++ &resp, &resp_size); ++ if (ret != SXE2_OK) { ++ SXE2_VERBS_LOG_ERROR_BDF("device:uquery device err ret=%d\n", ret); ++ goto end; ++ } ++ ++ major_ver_id = FIELD_GET(SXE2_RDMA_FW_MAIN_VERSION_BITS, resp.base.fw_ver); ++ sub_ver_id = FIELD_GET(SXE2_RDMA_FW_SUB_VERSION_BITS, resp.base.fw_ver); ++ fix_ver_id = FIELD_GET(SXE2_RDMA_FW_FIX_VERSION_BITS, resp.base.fw_ver); ++ build_id = FIELD_GET(SXE2_RDMA_FW_BUILD_NUMBER_BITS, resp.base.fw_ver); ++ ++ snprintf(attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver), ++ "%u.%u.%u.%u", major_ver_id, sub_ver_id, fix_ver_id, build_id); ++end: ++ return ret; ++} ++ ++int sxe2_uquery_port(struct ibv_context *ibctx, uint8_t port, ++ struct ibv_port_attr *attr) ++{ ++ int ret = SXE2_OK; ++ struct ibv_query_port cmd; ++ struct sxe2_rdma_ucontext *sctx; ++ sctx = to_sctx(ibctx); ++ ret = ibv_cmd_query_port(ibctx, port, attr, &cmd, sizeof(cmd)); ++ if (ret != SXE2_OK) { ++ SXE2_VERBS_LOG_ERROR_BDF("device:uquery port err ret=%d\n", ret); ++ goto end; ++ } ++ ++end: ++ return ret; ++} ++ ++static const struct verbs_context_ops sxe2_uctx_ops = { ++ .alloc_pd = sxe2_ualloc_pd, ++ .create_ah = sxe2_ucreate_ah, ++ .create_cq = sxe2_ucreate_cq, ++ .create_cq_ex = sxe2_ucreate_cq_ex, ++ .create_qp = sxe2_ucreate_qp, ++ .create_qp_ex = sxe2_ucreate_qp_ex, ++ .dealloc_pd = sxe2_ufree_pd, ++ .dereg_mr = sxe2_udereg_mr, ++ .destroy_ah = sxe2_udestroy_ah, ++ .destroy_cq = sxe2_udestroy_cq, ++ .destroy_qp = sxe2_udestroy_qp, ++ .modify_qp = sxe2_umodify_qp, ++ .modify_cq = sxe2_umodify_cq, ++ .poll_cq = sxe2_upoll_cq, ++ .post_recv = sxe2_upost_recv, ++ .post_send = sxe2_upost_send, ++ .post_srq_recv = sxe2_upost_srq_recv, ++ .query_device_ex = sxe2_uquery_device_ex, ++ .query_port = sxe2_uquery_port, ++ .query_qp = sxe2_uquery_qp, ++ .reg_mr = sxe2_ureg_mr, ++ .rereg_mr = sxe2_urereg_mr, ++ .free_context = sxe2_ufree_context, ++ .cq_event = sxe2_ucq_event, ++ .reg_dmabuf_mr = sxe2_ureg_dmabuf_mr, ++ .req_notify_cq = sxe2_uarm_cq, ++ .create_srq = sxe2_ucreate_srq, ++ .modify_srq = sxe2_umodify_srq, ++ .query_srq = sxe2_uquery_srq, ++ .destroy_srq = sxe2_udestroy_srq, ++ .create_srq_ex = sxe2_ucreate_srq_ex, ++ .get_srq_num = sxe2_uget_srq_num, ++ .attach_mcast = sxe2_uattach_mcast, ++ .detach_mcast = sxe2_udetach_mcast, ++#if 0 ++ .resize_cq = sxe2_uresize_cq, ++#endif ++}; ++ ++struct verbs_context *sxe2_ualloc_context(struct ibv_device *ib_dev, ++ int cmd_fd, void *private_data) ++{ ++ int ret = SXE2_OK; ++ struct ibv_pd *ibv_pd; ++ struct sxe2_rdma_ucontext *sctx; ++ struct sxe2_get_context cmd = {}; ++ struct sxe2_get_context_resp resp = {}; ++ __u8 user_ver = SXE2_RDMA_USER_ABI_VER; ++ char *env; ++ ++ sctx = verbs_init_and_alloc_context(ib_dev, cmd_fd, sctx, ibv_ctx, ++ RDMA_DRIVER_SXE2); ++ if (!sctx) ++ { ++ ret = -ENOMEM; ++ goto end; ++ } ++ sctx->enable_io_log = 1; ++ env = getenv(SXE2_LOG_IOLOG_ON); ++ if ((env) && (0 == strncmp(env, "OFF", 3))){ ++ SXE2_VERBS_LOG_INFO_BDF("device:disable user io log.\n"); ++ sctx->enable_io_log = 0; ++ } ++#if defined SXE2_CFG_DEBUG ++ sctx->log_level = LOG_LEVEL_DEBUG; ++#else ++ sctx->log_level = LOG_LEVEL_INVALID; ++#endif ++ env = getenv(SXE2_VERBS_LOG_LEVEL); ++ if ((env) && (0 == strncmp(env, "OFF", 3))){ ++ SXE2_VERBS_LOG_INFO_BDF("device:disable user log.\n"); ++ sctx->log_level = LOG_LEVEL_INVALID; ++ } else if ((env) && (0 == strncmp(env, "FATAL", 5))) { ++ sctx->log_level = LOG_LEVEL_FATAL; ++ } else if ((env) && (0 == strncmp(env, "ERROR", 5))) { ++ sctx->log_level = LOG_LEVEL_ERROR; ++ } else if ((env) && (0 == strncmp(env, "WARN", 4))) { ++ sctx->log_level = LOG_LEVEL_WARN; ++ } else if ((env) && (0 == strncmp(env, "INFO", 4))) { ++ sctx->log_level = LOG_LEVEL_INFO; ++ } else if ((env) && (0 == strncmp(env, "DEBUG", 5))) { ++ sctx->log_level = LOG_LEVEL_DEBUG; ++ } else if ((env) && (0 == strncmp(env, "TRACE", 5))) { ++ sctx->log_level = LOG_LEVEL_TRACE; ++ } ++ cmd.userspace_ver = user_ver; ++ ret = ibv_cmd_get_context(&sctx->ibv_ctx, (struct ibv_get_context *)&cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp)); ++ if (ret != SXE2_OK) { ++ SXE2_VERBS_LOG_ERROR_BDF("device:cmd get context err ret=%d\n", ret); ++ goto free_ctx; ++ } ++ verbs_set_ops(&sctx->ibv_ctx, &sxe2_uctx_ops); ++ sctx->uk_attrs.feature_flags = resp.feature_flags; ++ sctx->uk_attrs.hw_rev = resp.hw_rev; ++ sctx->uk_attrs.max_hw_wq_frags = resp.max_hw_wq_frags; ++ sctx->uk_attrs.max_hw_read_sges = resp.max_hw_read_sges; ++ sctx->uk_attrs.max_hw_inline = resp.max_hw_inline; ++ sctx->uk_attrs.max_hw_rq_quanta = resp.max_hw_rq_quanta; ++ sctx->uk_attrs.max_hw_wq_quanta = resp.max_hw_wq_quanta; ++ sctx->uk_attrs.max_hw_sq_chunk = resp.max_hw_sq_chunk; ++ sctx->uk_attrs.max_hw_cq_size = resp.max_hw_cq_size; ++ sctx->uk_attrs.min_hw_cq_size = resp.min_hw_cq_size; ++ sctx->uk_attrs.max_hw_srq_quanta = resp.max_hw_srq_quanta; ++ sctx->uk_attrs.max_hw_srq_wr = resp.max_hw_srq_wr; ++ sctx->abi_ver = user_ver; ++ if (resp.comp_mask & SXE2_ALLOC_UCTX_MIN_HW_WQ_SIZE) { ++ sctx->uk_attrs.min_hw_wq_size = resp.min_hw_wq_size; ++ } else { ++ sctx->uk_attrs.min_hw_wq_size = SXE2_QP_SW_MIN_WQSIZE; ++ } ++ memcpy(sctx->bdf, (const char *)resp.bdf, sizeof(sctx->bdf) - 1); ++ ++ ret = sxe2_uinit_doorbell(sctx, &resp); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("device:init door bell err ret=%d\n", ret); ++ goto free_ctx; ++ } ++ ++ ibv_pd = sxe2_ualloc_pd(&sctx->ibv_ctx.context); ++ if (!ibv_pd) { ++ SXE2_VERBS_LOG_ERROR_BDF("device:alloc pd err\n"); ++ goto free_db; ++ } ++ ++ ibv_pd->context = &sctx->ibv_ctx.context; ++ sctx->sxe2_upd = container_of(ibv_pd, struct sxe2_upd, ibv_pd); ++ ++ sxe2_spinlock_init(&sctx->hugetlb_lock, true); ++ list_head_init(&sctx->hugetlb_list); ++ ++ return &sctx->ibv_ctx; ++ ++free_db: ++ sxe2_ufree_doorbell(&sctx->ibv_ctx.context); ++free_ctx: ++ free(sctx); ++ sctx = NULL; ++end: ++ return NULL; ++} ++ ++void sxe2_uninit_device(struct verbs_device *verbs_device) ++{ ++ struct sxe2_rdma_udevice *dev; ++ ++ log_uninit0(); ++ ++ dev = container_of(&verbs_device->device, struct sxe2_rdma_udevice, ++ ibv_dev.device); ++ free(dev); ++} ++ ++struct verbs_device *sxe2_ualloc_device(struct verbs_sysfs_dev *sysfs_dev) ++{ ++ struct sxe2_rdma_udevice *dev; ++ ++ log_init0(false); ++ ++ dev = calloc(1, sizeof(*dev)); ++ if (!dev){ ++ return NULL; ++ } ++ return &dev->ibv_dev; ++} ++ ++static const struct verbs_device_ops sxe2_udev_ops = { ++ .alloc_context = sxe2_ualloc_context, ++ .alloc_device = sxe2_ualloc_device, ++ .match_max_abi_version = SXE2_RDMA_MAX_ABI_VERSION, ++ .match_min_abi_version = SXE2_RDMA_MIN_ABI_VERSION, ++ .match_table = sxe2_hca_table, ++ .name = "sxe2_rdma", ++ .uninit_device = sxe2_uninit_device, ++}; ++ ++PROVIDER_DRIVER(sxe2, sxe2_udev_ops); +diff -Naur rdma-core-48.0.bak/providers/sxe2/device_port.h rdma-core-48.0/providers/sxe2/device_port.h +--- rdma-core-48.0.bak/providers/sxe2/device_port.h 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/device_port.h 2026-05-27 17:08:05.580104739 +0800 +@@ -0,0 +1,48 @@ ++#ifndef DEVICE_H ++#define DEVICE_H ++#define SXE2_PCI_VENDOR_ID 0x1FF2 ++#define SXE2_PCI_VENDOR_ID_FOR_TL 0x206F ++#define SXE2_PCI_VENDOR_ID_FOR_POP 0x1D94 ++ ++#define SXE2_RDMA_DEVICE_ID_PF 0x10B1 ++#define SXE2_RDMA_DEVICE_ID_VF 0x10B2 ++#define SXE2_RDMA_DEVICE_ID_PF_PCIE_2_10G 0x10B3 ++#define SXE2_RDMA_DEVICE_ID_VF_PCIE_2_10G 0x10B4 ++#define SXE2_RDMA_DEVICE_ID_PF_POP 0x1260 ++#define SXE2_RDMA_DEVICE_ID_VF_POP 0x126F ++ ++#define SXE2_RDMA_USER_ABI_VER 1 ++#define SXE2_RDMA_MIN_ABI_VERSION 1 ++#define SXE2_RDMA_MAX_ABI_VERSION 1 ++#define SXE2_OK 0 ++#define SXE2_QP_SW_MIN_WQSIZE 8 ++#define SXE2_RDMA_FW_BUILD_NUMBER_BITS GENMASK(7, 0) ++#define SXE2_RDMA_FW_FIX_VERSION_BITS GENMASK(15, 8) ++#define SXE2_RDMA_FW_SUB_VERSION_BITS GENMASK(23, 16) ++#define SXE2_RDMA_FW_MAIN_VERSION_BITS GENMASK(31, 24) ++ ++enum { ++ SXE2_ALLOC_UCTX_MIN_HW_WQ_SIZE = 1 << 1, ++}; ++ ++struct sxe2_rdma_udevice { ++ struct verbs_device ibv_dev; ++}; ++ ++void sxe2_ufree_context(struct ibv_context *context); ++ ++int sxe2_uquery_device_ex(struct ibv_context *ibctx, ++ const struct ibv_query_device_ex_input *input, ++ struct ibv_device_attr_ex *attr, size_t attr_size); ++ ++int sxe2_uquery_port(struct ibv_context *ibctx, uint8_t port, ++ struct ibv_port_attr *attr); ++ ++struct verbs_context *sxe2_ualloc_context(struct ibv_device *ib_dev, ++ int cmd_fd, void *private_data); ++ ++void sxe2_uninit_device(struct verbs_device *verbs_device); ++ ++struct verbs_device *sxe2_ualloc_device(struct verbs_sysfs_dev *sysfs_dev); ++ ++#endif +diff -Naur rdma-core-48.0.bak/providers/sxe2/io.c rdma-core-48.0/providers/sxe2/io.c +--- rdma-core-48.0.bak/providers/sxe2/io.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/io.c 2026-05-27 17:08:05.580104739 +0800 +@@ -0,0 +1,3829 @@ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "sxe2_common.h" ++#include "io.h" ++#include "sxe2_abi.h" ++#include "ring.h" ++#include "log.h" ++#include ++ ++static const rdma_disp_func g_rdma_op[SXE2_RDMA_MAX_ID] = { ++ sxe2_hw_send, ++ sxe2_hw_inline_send, ++ sxe2_hw_rdma_write, ++ sxe2_hw_inline_rdma_write, ++ sxe2_hw_rdma_read, ++ sxe2_hw_mw_bind, ++ sxe2_hw_local_invalidate, ++}; ++ ++static const int g_frag2quanta[SXE2_MAX_FRAGCNT] = { 1, 1, 2, 2, 3, 3, 4, 4, ++ 5, 5, 6, 6, 7, 7, 8, 8 }; ++#define DUMP_RDMA_SEND_HDR(psnd) \ ++ do { \ ++ SXE2_VERBS_LOG_DEBUG_BDF("SND_WQE_HDR: (%#llx)\n" \ ++ "remote_inv_key: %#x \n" \ ++ "op : %#x \n" \ ++ "addfragcnt : %d \n" \ ++ "report_rtt : %d \n" \ ++ "imme_data_flag: %d \n" \ ++ "push_wqe : %d \n" \ ++ "read_fence : %d \n" \ ++ "local_fence : %d \n" \ ++ "signaled : %d \n" \ ++ "wqe_valid : %d\n", \ ++ (unsigned long long)htole64((psnd)->val), \ ++ (psnd)->field.remote_inv_rkey, \ ++ (psnd)->field.op, \ ++ (psnd)->field.addfragcnt, \ ++ (psnd)->field.report_rtt, \ ++ (psnd)->field.imme_data_flag, \ ++ (psnd)->field.push_wqe, \ ++ (psnd)->field.read_fence, \ ++ (psnd)->field.local_fence, \ ++ (psnd)->field.signaled_completion, \ ++ (psnd)->field.wqe_valid); \ ++ } while (0) ++#define DUMP_RDMA_SEND_INLINE_HDR(pinsnd) \ ++ do { \ ++ SXE2_VERBS_LOG_DEBUG_BDF("INSND_WQE_HDR: (%#llx)\n" \ ++ "remote_inv_key: %#x \n" \ ++ "op : %#x \n" \ ++ "report_rtt : %d \n" \ ++ "imme_data_flag: %d \n" \ ++ "inline_datalen: %d \n" \ ++ "push_wqe : %d \n" \ ++ "inline_dataflg: %d \n" \ ++ "read_fence : %d \n" \ ++ "local_fence : %d \n" \ ++ "signaled : %d \n" \ ++ "wqe_valid : %d\n", \ ++ (unsigned long long)htole64((pinsnd)->val), \ ++ (pinsnd)->field.remote_inv_rkey, \ ++ (pinsnd)->field.op, \ ++ (pinsnd)->field.report_rtt, \ ++ (pinsnd)->field.imme_data_flag, \ ++ (pinsnd)->field.inline_data_len, \ ++ (pinsnd)->field.push_wqe, \ ++ (pinsnd)->field.inline_data_flag, \ ++ (pinsnd)->field.read_fence, \ ++ (pinsnd)->field.local_fence, \ ++ (pinsnd)->field.signaled_completion, \ ++ (pinsnd)->field.wqe_valid); \ ++ } while (0) ++#define DUMP_RDMA_WRITE_HDR(pwrite) \ ++ do { \ ++ SXE2_VERBS_LOG_DEBUG_BDF("WRITE_WQE_HDR: (%#llx)\n" \ ++ "remote_key : %#x \n" \ ++ "op : %#x \n" \ ++ "addfragcnt : %d \n" \ ++ "report_rtt : %d \n" \ ++ "imme_data_flag: %d \n" \ ++ "push_wqe : %d \n" \ ++ "read_fence : %d \n" \ ++ "local_fence : %d \n" \ ++ "signaled : %d \n" \ ++ "wqe_valid : %d\n", \ ++ (unsigned long long)htole64(pwrite->val), \ ++ pwrite->field.remote_key, \ ++ pwrite->field.op, \ ++ pwrite->field.addfragcnt, \ ++ pwrite->field.report_rtt, \ ++ pwrite->field.imme_data_flag, \ ++ pwrite->field.push_wqe, \ ++ pwrite->field.read_fence, \ ++ pwrite->field.local_fence, \ ++ pwrite->field.signaled_completion, \ ++ pwrite->field.wqe_valid); \ ++ } while (0) ++#define DUMP_RDMA_WRITE_INLINE_HDR(pinwrite) \ ++ do { \ ++ SXE2_VERBS_LOG_DEBUG_BDF("INWRITE_WQE_HDR: (%#llx)\n" \ ++ "remote_key : %#x \n" \ ++ "op : %#x \n" \ ++ "report_rtt : %d \n" \ ++ "imme_data_flag: %d \n" \ ++ "inline_datalen: %d \n" \ ++ "push_wqe : %d \n" \ ++ "inline_dataflg: %d \n" \ ++ "read_fence : %d \n" \ ++ "local_fence : %d \n" \ ++ "signaled : %d \n" \ ++ "wqe_valid : %d\n", \ ++ (unsigned long long)htole64((pinwrite)->val), \ ++ (pinwrite)->field.remote_key, \ ++ (pinwrite)->field.op, \ ++ (pinwrite)->field.report_rtt, \ ++ (pinwrite)->field.imme_data_flag, \ ++ (pinwrite)->field.inline_data_len, \ ++ (pinwrite)->field.push_wqe, \ ++ (pinwrite)->field.inline_data_flag, \ ++ (pinwrite)->field.read_fence, \ ++ (pinwrite)->field.local_fence, \ ++ (pinwrite)->field.signaled_completion, \ ++ (pinwrite)->field.wqe_valid); \ ++ } while (0) ++#define DUMP_RDMA_READ_HDR(pread) \ ++ do { \ ++ SXE2_VERBS_LOG_DEBUG_BDF("READ_WQE_HDR: (%#llx)\n" \ ++ "remote_key : %#x \n" \ ++ "op : %#x \n" \ ++ "addfragcnt : %d \n" \ ++ "report_rtt : %d \n" \ ++ "push_wqe : %d \n" \ ++ "read_fence : %d \n" \ ++ "local_fence : %d \n" \ ++ "signaled : %d \n" \ ++ "wqe_valid : %d\n", \ ++ (unsigned long long)htole64((pread)->val), \ ++ (pread)->field.remote_key, \ ++ (pread)->field.op, \ ++ (pread)->field.addfragcnt, \ ++ (pread)->field.report_rtt, \ ++ (pread)->field.push_wqe, \ ++ (pread)->field.read_fence, \ ++ (pread)->field.local_fence, \ ++ (pread)->field.signaled_completion, \ ++ (pread)->field.wqe_valid); \ ++ } while (0) ++#define DUMP_RDMA_LOCAL_INV_HDR(pinv) \ ++ do { \ ++ SXE2_VERBS_LOG_DEBUG_BDF("LOCALINVKEY_WQE_HDR: (%#llx)\n" \ ++ "op : %#x \n" \ ++ "push_wqe : %d \n" \ ++ "read_fence : %d \n" \ ++ "local_fence : %d \n" \ ++ "signaled : %d \n" \ ++ "wqe_valid : %d\n", \ ++ (unsigned long long)htole64((pinv)->val), \ ++ (pinv)->field.op, \ ++ (pinv)->field.push_wqe, \ ++ (pinv)->field.read_fence, \ ++ (pinv)->field.local_fence, \ ++ (pinv)->field.signaled_completion, \ ++ (pinv)->field.wqe_valid); \ ++ } while (0) ++static void sxe2_dump_wqe_hdr(struct sxe2_uqp *uqp, __u64 *hdr, ++ enum sxe2_disp_id func) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ union sxe2_send_hdr *psnd; ++ union sxe2_send_inline_hdr *pinsnd; ++ union sxe2_write_hdr *pwrite; ++ union sxe2_write_inline_hdr *pinwrite; ++ union sxe2_read_hdr *pread; ++ union sxe2_inval_hdr *pinv; ++ ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ if (sctx->enable_io_log == false) { ++ return; ++ } ++ ++ switch (func) { ++ case SXE2_RDMA_SEND: ++ psnd = (union sxe2_send_hdr *)hdr; ++ DUMP_RDMA_SEND_HDR(psnd); ++ break; ++ case SXE2_RDMA_SEND_INLINE: ++ pinsnd = (union sxe2_send_inline_hdr *)hdr; ++ DUMP_RDMA_SEND_INLINE_HDR(pinsnd); ++ break; ++ case SXE2_RDMA_WRITE: ++ pwrite = (union sxe2_write_hdr *)hdr; ++ DUMP_RDMA_WRITE_HDR(pwrite); ++ break; ++ case SXE2_RDMA_WRITE_INLINE: ++ pinwrite = (union sxe2_write_inline_hdr *)hdr; ++ DUMP_RDMA_WRITE_INLINE_HDR(pinwrite); ++ break; ++ case SXE2_RDMA_READ: ++ pread = (union sxe2_read_hdr *)hdr; ++ DUMP_RDMA_READ_HDR(pread); ++ break; ++ case SXE2_RDMA_LOCAL_INV: ++ pinv = (union sxe2_inval_hdr *)hdr; ++ DUMP_RDMA_LOCAL_INV_HDR(pinv); ++ break; ++ default: ++ SXE2_VERBS_LOG_ERROR_BDF("UNSUPPORT OP CODE\n"); ++ } ++} ++ ++static void sxe2_dump_wqe(struct sxe2_qp_common *qp, __le64 *wqe, ++ __u16 quanta, __u32 wqe_idx, const char *desc) ++{ ++ struct sxe2_uqp *uqp =container_of(qp, struct sxe2_uqp, qp); ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ __le32 *p = (__le32 *)wqe; ++ int i, offset = 0; ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ if (desc) { ++ SXE2_VERBS_LOG_DEBUG_BDF("SQWQE DUMP TYPE=[%s], qpn [0x%x], wqe_idx"\ ++ "[0x%x] quanta [%u]:\n", desc, qp->qpn, wqe_idx, quanta); ++ } ++ else { ++ SXE2_VERBS_LOG_DEBUG_BDF("RQWQE DUMP, qpn [0x%x], wqe_idx [0x%x]:\n", ++ qp->qpn, wqe_idx); ++ } ++ for (i = 0; i < quanta * SXE2_QP_WQE_MIN_SIZE; i += 32) { ++ SXE2_VERBS_LOG_DEBUG_BDF("[qpn 0x%x][offset %u] %08X %08X %08X %08X " ++ "%08X %08X %08X %08X\n", ++ qp->qpn, offset, ++ le32toh(p[0]), le32toh(p[1]), ++ le32toh(p[2]), le32toh(p[3]), ++ le32toh(p[4]), le32toh(p[5]), ++ le32toh(p[6]), le32toh(p[7])); ++ p += 8; ++ offset += 32; ++ } ++ } ++} ++ ++static inline int sxe2_fragcnt_to_quanta_cnt(__u32 frag_cnt, __u16 *quanta) ++{ ++ if (frag_cnt >= SXE2_MAX_FRAGCNT) { ++ return EINVAL; ++ } ++ ++ *quanta = (__u16)g_frag2quanta[frag_cnt]; ++ return 0; ++} ++ ++static inline __u16 sxe2_inline_to_quanta_cnt(__u32 data_size) ++{ ++ if (data_size <= 8) { ++ return SXE2_QP_WQE_MIN_QUANTA; ++ } else if (data_size <= 39) { ++ return 2; ++ } else if (data_size <= 70) { ++ return 3; ++ } else if (data_size <= 101) { ++ return 4; ++ } else if (data_size <= 132) { ++ return 5; ++ } else if (data_size <= 163) { ++ return 6; ++ } else if (data_size <= 194) { ++ return 7; ++ } else { ++ return 8; ++ } ++} ++ ++static void sxe2_qp_ring_normal_db(struct sxe2_qp_common *qp) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ if (SXE2_RING_CURRENT_HEAD(qp->sq_ring) != qp->initial_ring.head) { ++ udma_to_device_barrier(); ++ db_wr32(qp->qpn, qp->qp_db_no_llwqe); ++ ++ if (qp->push_dropped) ++ qp->push_dropped = false; ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("DB NOTIFY: QPN (%#x -> %p) PI %#x\n", ++ qp->qpn, qp->qp_db_no_llwqe, qp->sq_ring.head); ++ } ++ } ++ qp->initial_ring.head = qp->sq_ring.head; ++} ++ ++static void sxe2_qp_push_wqe(struct sxe2_qp_common *qp, __le64 *wqe, ++ __u16 quanta, __u32 wqe_idx) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ __le64 *push; ++ struct sxe2_verbs_llwqe *llwqe; ++#if defined(SXE2_CFG_DEBUG) && defined(SXE2_SUPPORT_INJECT) ++ char *env; ++#endif ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ if (!qp->llwqe_mode) { ++ sxe2_qp_ring_normal_db(qp); ++ } else { ++ llwqe = qp->verbs_llwqe; ++ if ((llwqe != NULL) && (llwqe->need_lock)) { ++ pthread_spin_lock(&llwqe->lock); ++ } ++ push = (__le64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x7) * 0x20); ++#if defined(SXE2_CFG_DEBUG) && defined(SXE2_SUPPORT_INJECT) ++ env = getenv(SXE2_INJECT_LLWQE_ERR); ++ if ((env) && (0 == strncmp(env, "yes", 3))){ ++ memcpy(push, wqe, 4); ++ } ++#else ++ memcpy(push, wqe, quanta * SXE2_QP_WQE_MIN_SIZE); ++#endif ++ set_32bit_val(qp->push_db, 0, ++ FIELD_PREP(SXE2_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) | ++ qp->qpn); ++ qp->initial_ring.head = qp->sq_ring.head; ++ qp->llwqe_mode = true; ++ qp->push_dropped = false; ++ if ((llwqe != NULL) && (llwqe->need_lock)) { ++ pthread_spin_unlock(&llwqe->lock); ++ } ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("DB NOTIFY(LLWQE): QPN (%#x ->db %#lx llwqe %#lx) idx %#x\n", ++ qp->qpn, (uintptr_t)qp->push_db, (uintptr_t)qp->push_wqe, wqe_idx >> 3); ++ } ++ } ++} ++ ++static void sxe2_qp_flush_wqe(struct ibv_qp *qp, bool flush_sq, bool flush_rq) ++{ ++ int ret; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_umodify_qp cmd = {}; ++ struct sxe2_umodify_qp_resp resp = {}; ++ struct ibv_qp_attr qp_attr = {}; ++ ++ uqp = container_of(qp, struct sxe2_uqp, verbs_qp.qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ qp_attr.qp_state = IBV_QPS_ERR; ++ cmd.sq_flush = flush_sq; ++ cmd.rq_flush = flush_rq; ++ ++ ret = ibv_cmd_modify_qp_ex(qp, &qp_attr, IBV_QP_STATE, ++ &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); ++ if (ret || sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("ibv_modify_qp_ex ret(%d)\n", ret); ++ } ++} ++ ++static inline void sxe2_set_qkeyqpn(__le64 *wqe, __u32 offset, __u32 qkey, ++ __u32 qpn) ++{ ++ union sxe2_dqpn_data msg; ++ ++ msg.val = 0; ++ msg.field.dest_qkey = qkey; ++ msg.field.dest_qpn = qpn; ++ ++ wqe[offset >> 3] = htole64(msg.val); ++} ++ ++static inline void sxe2_set_remote_offset(__le64 *wqe, __u32 offset, ++ __u64 remote_offset) ++{ ++ wqe[offset >> 3] = htole64(remote_offset); ++} ++ ++static void sxe2_set_send_hdr(__le64 *wqe, __u32 value, ++ struct sxe2_wr_info *wr_info, ++ struct sxe2_qp_common *qp) ++{ ++ union sxe2_send_hdr hdr; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ hdr.val = 0; ++ hdr.field.remote_inv_rkey = ++ wr_info->rkey_to_inv | wr_info->op_info.send.ah_id; ++ hdr.field.op = wr_info->op_type; ++ hdr.field.addfragcnt = value; ++ hdr.field.report_rtt = wr_info->report_rtt; ++ hdr.field.imme_data_flag = wr_info->imm_data_valid; ++ hdr.field.push_wqe = wr_info->push_wqe; ++ hdr.field.read_fence = wr_info->read_fence; ++ hdr.field.local_fence = wr_info->local_fence; ++ hdr.field.signaled_completion = wr_info->signaled; ++ hdr.field.wqe_valid = qp->swqe_polarity; ++ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(wqe, 24, hdr.val); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("SND_WQE_HDR: (%#llx)\n" \ ++ "remote_inv_key: %#x \n"\ ++ "op : %#x \n"\ ++ "addfragcnt : %d \n"\ ++ "report_rtt : %d \n"\ ++ "imme_data_flag: %d \n"\ ++ "push_wqe : %d \n"\ ++ "read_fence : %d \n"\ ++ "local_fence : %d \n"\ ++ "signaled : %d \n"\ ++ "wqe_valid : %d\n", ++ (unsigned long long)htole64(hdr.val), ++ hdr.field.remote_inv_rkey, ++ hdr.field.op, ++ hdr.field.addfragcnt, ++ hdr.field.report_rtt, ++ hdr.field.imme_data_flag, ++ hdr.field.push_wqe, ++ hdr.field.read_fence, ++ hdr.field.local_fence, ++ hdr.field.signaled_completion, ++ hdr.field.wqe_valid); ++ } ++} ++ ++static void sxe2_set_inlinesnd_hdr(__le64 *wqe, __u32 value, ++ struct sxe2_wr_info *wr_info, ++ struct sxe2_qp_common *qp) ++{ ++ union sxe2_send_inline_hdr hdr; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ hdr.val = 0; ++ hdr.field.remote_inv_rkey = ++ wr_info->rkey_to_inv | wr_info->op_info.send.ah_id; ++ hdr.field.op = wr_info->op_type; ++ hdr.field.report_rtt = wr_info->report_rtt; ++ hdr.field.imme_data_flag = wr_info->imm_data_valid; ++ hdr.field.inline_data_len = value; ++ hdr.field.push_wqe = wr_info->push_wqe; ++ hdr.field.inline_data_flag = 1; ++ hdr.field.read_fence = wr_info->read_fence; ++ hdr.field.local_fence = wr_info->local_fence; ++ hdr.field.signaled_completion = wr_info->signaled; ++ hdr.field.wqe_valid = qp->swqe_polarity; ++ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(wqe, 24, hdr.val); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("INSND_WQE_HDR: (%#llx)\n" \ ++ "remote_inv_key: %#x \n"\ ++ "op : %#x \n"\ ++ "report_rtt : %d \n"\ ++ "imme_data_flag: %d \n"\ ++ "inline_datalen: %d \n"\ ++ "push_wqe : %d \n"\ ++ "inline_dataflg: %d \n"\ ++ "read_fence : %d \n"\ ++ "local_fence : %d \n"\ ++ "signaled : %d \n"\ ++ "wqe_valid : %d\n", ++ (unsigned long long)htole64(hdr.val), ++ hdr.field.remote_inv_rkey, ++ hdr.field.op, ++ hdr.field.report_rtt, ++ hdr.field.imme_data_flag, ++ hdr.field.inline_data_len, ++ hdr.field.push_wqe, ++ hdr.field.inline_data_flag, ++ hdr.field.read_fence, ++ hdr.field.local_fence, ++ hdr.field.signaled_completion, ++ hdr.field.wqe_valid); ++ } ++} ++ ++static void sxe2_set_write_hdr(__le64 *wqe, __u32 value, ++ struct sxe2_wr_info *wr_info, ++ struct sxe2_qp_common *qp) ++{ ++ union sxe2_write_hdr hdr; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ hdr.val = 0; ++ hdr.field.remote_key = wr_info->op_info.rdma_write.rem_addr.lkey; ++ hdr.field.op = wr_info->op_type; ++ hdr.field.addfragcnt = value; ++ hdr.field.report_rtt = wr_info->report_rtt; ++ hdr.field.imme_data_flag = wr_info->imm_data_valid; ++ hdr.field.push_wqe = wr_info->push_wqe; ++ hdr.field.read_fence = wr_info->read_fence; ++ hdr.field.local_fence = wr_info->local_fence; ++ hdr.field.signaled_completion = wr_info->signaled; ++ hdr.field.wqe_valid = qp->swqe_polarity; ++ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(wqe, 24, hdr.val); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("WRITE_WQE_HDR: (%#llx)\n" \ ++ "remote_key : %#x \n"\ ++ "op : %#x \n"\ ++ "addfragcnt : %d \n"\ ++ "report_rtt : %d \n"\ ++ "imme_data_flag: %d \n"\ ++ "push_wqe : %d \n"\ ++ "read_fence : %d \n"\ ++ "local_fence : %d \n"\ ++ "signaled : %d \n"\ ++ "wqe_valid : %d\n", ++ (unsigned long long)htole64(hdr.val), ++ hdr.field.remote_key, ++ hdr.field.op, ++ hdr.field.addfragcnt, ++ hdr.field.report_rtt, ++ hdr.field.imme_data_flag, ++ hdr.field.push_wqe, ++ hdr.field.read_fence, ++ hdr.field.local_fence, ++ hdr.field.signaled_completion, ++ hdr.field.wqe_valid); ++ } ++} ++ ++static void sxe2_set_inlinewrite_hdr(__le64 *wqe, __u32 value, ++ struct sxe2_wr_info *wr_info, ++ struct sxe2_qp_common *qp) ++{ ++ union sxe2_write_inline_hdr hdr; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ hdr.val = 0; ++ hdr.field.remote_key = wr_info->op_info.rdma_write.rem_addr.lkey; ++ hdr.field.op = wr_info->op_type; ++ hdr.field.report_rtt = wr_info->report_rtt; ++ hdr.field.imme_data_flag = wr_info->imm_data_valid; ++ hdr.field.inline_data_len = value; ++ hdr.field.push_wqe = wr_info->push_wqe; ++ hdr.field.inline_data_flag = 1; ++ hdr.field.read_fence = wr_info->read_fence; ++ hdr.field.local_fence = wr_info->local_fence; ++ hdr.field.signaled_completion = wr_info->signaled; ++ hdr.field.wqe_valid = qp->swqe_polarity; ++ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(wqe, 24, hdr.val); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("INWRITE_WQE_HDR: (%#llx)\n" \ ++ "remote_key : %#x \n"\ ++ "op : %#x \n"\ ++ "report_rtt : %d \n"\ ++ "imme_data_flag: %d \n"\ ++ "inline_datalen: %d \n"\ ++ "push_wqe : %d \n"\ ++ "inline_dataflg: %d \n"\ ++ "read_fence : %d \n"\ ++ "local_fence : %d \n"\ ++ "signaled : %d \n"\ ++ "wqe_valid : %d\n", ++ (unsigned long long)htole64(hdr.val), ++ hdr.field.remote_key, ++ hdr.field.op, ++ hdr.field.report_rtt, ++ hdr.field.imme_data_flag, ++ hdr.field.inline_data_len, ++ hdr.field.push_wqe, ++ hdr.field.inline_data_flag, ++ hdr.field.read_fence, ++ hdr.field.local_fence, ++ hdr.field.signaled_completion, ++ hdr.field.wqe_valid); ++ } ++} ++ ++static void sxe2_set_read_hdr(__le64 *wqe, __u32 addfragcnt, ++ struct sxe2_wr_info *wr_info, ++ struct sxe2_qp_common *qp, bool ord_fence) ++{ ++ union sxe2_read_hdr hdr; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ hdr.val = 0; ++ hdr.field.remote_key = wr_info->op_info.rdma_read.rem_addr.lkey; ++ hdr.field.op = wr_info->op_type; ++ hdr.field.addfragcnt = addfragcnt; ++ hdr.field.report_rtt = wr_info->report_rtt; ++ hdr.field.push_wqe = wr_info->push_wqe; ++ hdr.field.read_fence = wr_info->read_fence || ord_fence ? 1 : 0; ++ hdr.field.local_fence = wr_info->local_fence; ++ hdr.field.signaled_completion = wr_info->signaled; ++ hdr.field.wqe_valid = qp->swqe_polarity; ++ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(wqe, 24, hdr.val); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("READ_WQE_HDR: (%#llx)\n" \ ++ "remote_key : %#x \n"\ ++ "op : %#x \n"\ ++ "addfragcnt : %d \n"\ ++ "report_rtt : %d \n"\ ++ "push_wqe : %d \n"\ ++ "read_fence : %d \n"\ ++ "local_fence : %d \n"\ ++ "signaled : %d \n"\ ++ "wqe_valid : %d\n", ++ (unsigned long long)htole64(hdr.val), ++ hdr.field.remote_key, ++ hdr.field.op, ++ hdr.field.addfragcnt, ++ hdr.field.report_rtt, ++ hdr.field.push_wqe, ++ hdr.field.read_fence, ++ hdr.field.local_fence, ++ hdr.field.signaled_completion, ++ hdr.field.wqe_valid); ++ } ++} ++ ++static void sxe2_set_bindmw_hdr(__le64 *wqe, struct sxe2_wr_info *wr_info, ++ struct sxe2_qp_common *qp) ++{ ++ union sxe2_bindmw_hdr hdr; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ hdr.val = 0; ++ hdr.field.op = wr_info->op_type; ++ hdr.field.access = wr_info->op_info.bind_window.ena_reads << 2 | ++ wr_info->op_info.bind_window.ena_writes << 3; ++ hdr.field.va_base_flag = ++ (wr_info->op_info.bind_window.addressing_type == ++ SXE2_ADDR_TYPE_VA_BASED ? 1 : 0); ++ hdr.field.mw_type = ++ wr_info->op_info.bind_window.mem_window_type_1 ? 0 : 1; ++ hdr.field.push_wqe = wr_info->push_wqe; ++ hdr.field.read_fence = wr_info->read_fence; ++ hdr.field.local_fence = wr_info->local_fence; ++ hdr.field.signaled_completion = wr_info->signaled; ++ hdr.field.wqe_valid = qp->swqe_polarity; ++ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(wqe, 24, hdr.val); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("BINDMW_WQE_HDR: (%#llx)\n" \ ++ "op : %#x \n"\ ++ "access : %d \n"\ ++ "va_base_flag : %d \n"\ ++ "mw_type : %d \n"\ ++ "push_wqe : %d \n"\ ++ "read_fence : %d \n"\ ++ "local_fence : %d \n"\ ++ "signaled : %d \n"\ ++ "wqe_valid : %d\n", ++ (unsigned long long)htole64(hdr.val), ++ hdr.field.op, ++ hdr.field.access, ++ hdr.field.va_base_flag, ++ hdr.field.mw_type, ++ hdr.field.push_wqe, ++ hdr.field.read_fence, ++ hdr.field.local_fence, ++ hdr.field.signaled_completion, ++ hdr.field.wqe_valid); ++ } ++} ++ ++static void sxe2_set_invalidate_hdr(__le64 *wqe, struct sxe2_wr_info *wr_info, ++ struct sxe2_qp_common *qp) ++{ ++ union sxe2_inval_hdr hdr; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ hdr.val = 0; ++ hdr.field.op = wr_info->op_type; ++ hdr.field.push_wqe = wr_info->push_wqe; ++ hdr.field.read_fence = wr_info->read_fence; ++ hdr.field.local_fence = wr_info->local_fence; ++ hdr.field.signaled_completion = wr_info->signaled; ++ hdr.field.wqe_valid = qp->swqe_polarity; ++ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(wqe, 24, hdr.val); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("LOCALINVKEY_WQE_HDR: (%#llx)\n" \ ++ "op : %#x \n"\ ++ "push_wqe : %d \n"\ ++ "read_fence : %d \n"\ ++ "local_fence : %d \n"\ ++ "signaled : %d \n"\ ++ "wqe_valid : %d\n", ++ (unsigned long long)htole64(hdr.val), ++ hdr.field.op, ++ hdr.field.push_wqe, ++ hdr.field.read_fence, ++ hdr.field.local_fence, ++ hdr.field.signaled_completion, ++ hdr.field.wqe_valid); ++ } ++} ++ ++static void sxe2_set_nop_hdr(__le64 *wqe, __u32 offset, ++ struct sxe2_qp_common *qp) ++{ ++ union sxe2_nop_hdr hdr; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ hdr.val = 0; ++ hdr.field.op = SXE2_OP_TYPE_NOP; ++ hdr.field.signaled_completion = false; ++ hdr.field.wqe_valid = qp->swqe_polarity; ++ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(wqe, offset, hdr.val); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("NOP_WQE_HDR: (%#llx)\n" \ ++ "op : %#x \n"\ ++ "signaled : %d \n"\ ++ "wqe_valid : %d\n", ++ (unsigned long long)htole64(hdr.val), ++ hdr.field.op, ++ hdr.field.signaled_completion, ++ hdr.field.wqe_valid); ++ } ++} ++ ++static int sxe2_hw_nop(struct sxe2_qp_common *qp) ++{ ++ __le64 *wqe; ++ __u32 wqe_idx; ++ ++ if (!qp->sq_ring.head) ++ return EINVAL; ++ ++ wqe_idx = SXE2_RING_CURRENT_HEAD(qp->sq_ring); ++ wqe = qp->sq_base[wqe_idx].elem; ++ ++ qp->sq_wrtrk_array[wqe_idx].quanta = SXE2_QP_WQE_MIN_QUANTA; ++ ++ set_64bit_val(wqe, 0, 0); ++ set_64bit_val(wqe, 8, 0); ++ set_64bit_val(wqe, 16, 0); ++ ++ sxe2_set_nop_hdr(wqe, 24, qp); ++ ++ return 0; ++} ++ ++static void sxe2_set_rcvq_hdr(__le64 *wqe, __u32 addl_frag_cnt, ++ struct sxe2_qp_common *qp) ++{ ++ union sxe2_rq_hdr hdr; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ hdr.val = 0; ++ hdr.field.addfragcnt = addl_frag_cnt; ++ hdr.field.wqe_valid = qp->rwqe_polarity; ++ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(wqe, 24, hdr.val); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("RCV_WQE_HDR: (%#llx)\n" \ ++ "addfragcnt : %d \n"\ ++ "wqe_valid : %d\n", ++ (unsigned long long)htole64(hdr.val), ++ hdr.field.addfragcnt, ++ hdr.field.wqe_valid); ++ } ++} ++ ++static inline void sxe2_set_immedata(__le64 *wqe, __u32 offset, __u64 imm_data) ++{ ++ wqe[offset >> 3] = htole64(imm_data); ++} ++ ++static inline void sxe2_set_sgelist_data(void *wqe, __u32 offset, ++ struct ibv_sge *sge, __u8 valid) ++{ ++ struct sxe2_frag_data *pmsg = (struct sxe2_frag_data *)wqe; ++ uint32_t len; ++ ++ pmsg = pmsg + offset / sizeof(*pmsg); ++ if (sge) { ++ len = (sge->length & ((uint32_t)1 << 31)) ? 0 : sge->length; ++ pmsg->tag_offset = htole64(sge->addr); ++ pmsg->offset8.field.frag_valid = valid; ++ pmsg->offset8.field.frag_len = len; ++ pmsg->offset8.field.stag = sge->lkey; ++ pmsg->offset8.val = htole64(pmsg->offset8.val); ++ } else { ++ pmsg->tag_offset = 0; ++ pmsg->offset8.field.frag_valid = valid; ++ pmsg->offset8.val = htole64(pmsg->offset8.val); ++ } ++} ++ ++static void sxe2_set_inline_data_seg_list(__u8 *wqe, struct ibv_sge *sge_list, ++ __u32 num_sges, __u8 polarity) ++{ ++ __u8 inline_valid = (__u8)(polarity << SXE2_INLINE_VALID_S); ++ __u32 quanta_bytes_remaining = 8; ++ __u32 i; ++ bool first_quanta = true; ++ ++ wqe += 8; ++ ++ for (i = 0; i < num_sges; i++) { ++ __u8 *cur_sge = (__u8 *)(uintptr_t)sge_list[i].addr; ++ __u32 sge_len = sge_list[i].length; ++ ++ while (sge_len) { ++ __u32 bytes_copied; ++ ++ bytes_copied = min(sge_len, quanta_bytes_remaining); ++ memcpy(wqe, cur_sge, bytes_copied); ++ wqe += bytes_copied; ++ cur_sge += bytes_copied; ++ quanta_bytes_remaining -= bytes_copied; ++ sge_len -= bytes_copied; ++ ++ if (!quanta_bytes_remaining) { ++ quanta_bytes_remaining = 31; ++ ++ if (first_quanta) { ++ first_quanta = false; ++ wqe += 16; ++ } else { ++ *wqe = inline_valid; ++ wqe++; ++ } ++ } ++ } ++ } ++ if (!first_quanta && quanta_bytes_remaining < 31) { ++ *(wqe + quanta_bytes_remaining) = inline_valid; ++ } ++} ++ ++static void sxe2_set_wqe_mw_bind(__le64 *wqe, ++ struct sxe2_bind_window *op_info, ++ struct sxe2_qp_common *qp) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_bindmw_info *pmsg; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ pmsg = (struct sxe2_bindmw_info *)wqe; ++ pmsg->mw_va_base = htole64((uintptr_t)op_info->va); ++ pmsg->offset8.field.mw_key = op_info->mw_stag; ++ pmsg->offset8.field.mr_key = op_info->mr_stag; ++ pmsg->offset8.val = htole64(pmsg->offset8.val); ++ pmsg->offset16.field.mw_len = op_info->bind_len; ++ pmsg->offset16.val = htole64(pmsg->offset16.val); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("BINDMW_WQE_INFO:\n" \ ++ "mw_va_base : %p \n"\ ++ "mw_key|mr_key : 0x%x|0x%x \n"\ ++ "mw_len : %#llx \n", ++ op_info->va, ++ op_info->mw_stag, ++ op_info->mr_stag, ++ op_info->bind_len); ++ } ++} ++ ++static __le64 *sxe2_qp_get_next_send_wqe(struct sxe2_qp_common *qp, ++ __u32 *wqe_idx, __u16 *quanta, __u32 total_size, ++ __u64 wr_id, bool push_wqe) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ __le64 *wqe; ++ __u32 nop_wqe_idx; ++ __u16 wqe_quanta = *quanta; ++ bool push_wqe_pad = false; ++ __u16 avail_quanta; ++ __u16 i; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ if (push_wqe && (*quanta & 0x1)) { ++ *quanta = *quanta + 1; ++ push_wqe_pad = true; ++ } ++ ++ avail_quanta = qp->common_attrs->max_hw_sq_chunk - ++ (SXE2_RING_CURRENT_HEAD(qp->sq_ring) % ++ qp->common_attrs->max_hw_sq_chunk); ++ if (*quanta <= avail_quanta) { ++ if (*quanta > SXE2_SQ_RING_FREE_QUANTA(qp->sq_ring)) { ++ return NULL; ++ } ++ } else { ++ if (*quanta + avail_quanta > ++ SXE2_SQ_RING_FREE_QUANTA(qp->sq_ring)) { ++ return NULL; ++ } ++ ++ nop_wqe_idx = SXE2_RING_CURRENT_HEAD(qp->sq_ring); ++ for (i = 0; i < avail_quanta; i++) { ++ (void)sxe2_hw_nop(qp); ++ SXE2_RING_MOVE_HEAD_NOCHECK(qp->sq_ring); ++ } ++ if (qp->push_db && push_wqe) { ++ sxe2_qp_push_wqe(qp, qp->sq_base[nop_wqe_idx].elem, ++ avail_quanta, nop_wqe_idx); ++ } ++ } ++ ++ *wqe_idx = SXE2_RING_CURRENT_HEAD(qp->sq_ring); ++ if (!*wqe_idx) { ++ qp->swqe_polarity = !qp->swqe_polarity; ++ } ++ ++ SXE2_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, *quanta); ++ ++ wqe = qp->sq_base[*wqe_idx].elem; ++ ++ qp->sq_wrtrk_array[*wqe_idx].wrid = wr_id; ++ qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; ++ qp->sq_wrtrk_array[*wqe_idx].quanta = wqe_quanta; ++ ++ if (push_wqe_pad) { ++ __le64 *nop_wqe; ++ ++ nop_wqe_idx = *wqe_idx + wqe_quanta; ++ nop_wqe = qp->sq_base[nop_wqe_idx].elem; ++ qp->sq_wrtrk_array[nop_wqe_idx].quanta = SXE2_QP_WQE_MIN_QUANTA; ++ set_64bit_val(nop_wqe, 0, 0); ++ set_64bit_val(nop_wqe, 8, 0); ++ set_64bit_val(nop_wqe, 16, 0); ++ sxe2_set_nop_hdr(nop_wqe, 24, qp); ++ } ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("POST SEND(qpn = %u): wqe_idx %u wr_id %llu"\ ++ " ring_size %u sq_chunk %d pad %d push_mode %d\n", ++ qp->qpn, *wqe_idx, qp->sq_wrtrk_array[*wqe_idx].wrid, ++ SXE2_RING_SIZE(qp->sq_ring), qp->common_attrs->max_hw_sq_chunk, ++ push_wqe_pad, push_wqe); ++ } ++ ++ return wqe; ++} ++ ++static __le64 *sxe2_qp_get_next_recv_wqe(struct sxe2_qp_common *qp, ++ __u32 *wqe_idx) ++{ ++ __le64 *wqe; ++ int ret_code; ++ ++ if (SXE2_RING_FULL_ERR(qp->rq_ring)) { ++ return NULL; ++ } ++ ++ SXE2_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code); ++ if (ret_code) { ++ return NULL; ++ } ++ ++ if (!*wqe_idx) { ++ qp->rwqe_polarity = !qp->rwqe_polarity; ++ } ++ wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem; ++ ++ return wqe; ++} ++ ++int sxe2_hw_send(struct sxe2_qp_common *qp, struct sxe2_wr_info *wr_info, ++ bool post_sq) ++{ ++ __le64 *wqe; ++ struct sxe2_post_send *op_info; ++ __u32 i, wqe_idx, total_size = 0, byte_off; ++ int ret_code; ++ __u32 frag_cnt, addl_frag_cnt; ++ __u64 frag_info = 0; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ __u16 quanta; ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ op_info = &wr_info->op_info.send; ++ if (qp->max_sq_sge_cnt < op_info->num_sges) { ++ return EINVAL; ++ } ++ ++ for (i = 0; i < op_info->num_sges; i++) { ++ total_size += op_info->sg_list[i].length; ++ } ++ ++ if (wr_info->imm_data_valid) { ++ frag_cnt = op_info->num_sges + 1; ++ } else { ++ frag_cnt = op_info->num_sges; ++ } ++ ++ ret_code = sxe2_fragcnt_to_quanta_cnt(frag_cnt, &quanta); ++ if (ret_code) { ++ return ret_code; ++ } ++ ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, ++ wr_info->wr_id, wr_info->push_wqe); ++ if (!wqe) { ++ return ENOMEM; ++ } ++ ++ addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; ++ ++ if (wr_info->imm_data_valid) { ++ sxe2_set_immedata(wqe, 0, wr_info->imm_data); ++ i = 0; ++ } else { ++ sxe2_set_sgelist_data(wqe, 0, ++ frag_cnt ? op_info->sg_list : NULL, ++ qp->swqe_polarity); ++ i = 1; ++ } ++ ++ if (total_size == 0) { ++ get_64bit_val(wqe, 8, &frag_info); ++ frag_info = frag_info & (~SXE2_WQE_FRAG_VALID); ++ SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_VALID, !qp->swqe_polarity); ++ set_64bit_val(wqe, 8, frag_info); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr set frag_info [%#llx] field [%d]\n", ++ frag_info, !qp->swqe_polarity); ++ } ++ } ++ ++ sxe2_set_qkeyqpn(wqe, 16, op_info->qkey, op_info->dest_qp); ++ ++ for (byte_off = 32; i < op_info->num_sges;) { ++ sxe2_set_sgelist_data(wqe, byte_off, &op_info->sg_list[i], ++ qp->swqe_polarity); ++ byte_off += 16; ++ i++; ++ } ++ ++ if (!(frag_cnt & 0x01) && frag_cnt) { ++ sxe2_set_sgelist_data(wqe, byte_off, NULL, qp->swqe_polarity); ++ } ++ ++ sxe2_set_send_hdr(wqe, addl_frag_cnt, wr_info, qp); ++ ++ if (wr_info->push_wqe) { ++ sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); ++ } else if (post_sq) { ++ sxe2_qp_ring_normal_db(qp); ++ } ++ sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "send"); ++ ++ return 0; ++} ++ ++int sxe2_hw_inline_send(struct sxe2_qp_common *qp, ++ struct sxe2_wr_info *wr_info, bool post_sq) ++{ ++ __le64 *wqe; ++ struct sxe2_post_send *op_info; ++ __u32 wqe_idx; ++ __u32 i, total_size = 0; ++ __u16 quanta; ++ ++ op_info = &wr_info->op_info.send; ++ ++ if (unlikely(qp->max_sq_sge_cnt < op_info->num_sges)) { ++ return EINVAL; ++ } ++ ++ for (i = 0; i < op_info->num_sges; i++) { ++ total_size += op_info->sg_list[i].length; ++ } ++ ++ if (unlikely(total_size > qp->max_inline_data)) { ++ return EINVAL; ++ } ++ ++ quanta = sxe2_inline_to_quanta_cnt(total_size); ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, ++ wr_info->wr_id, wr_info->push_wqe); ++ if (!wqe) { ++ return ENOMEM; ++ } ++ ++ if (wr_info->imm_data_valid) { ++ sxe2_set_immedata(wqe, 0, wr_info->imm_data); ++ } ++ ++ sxe2_set_qkeyqpn(wqe, 16, op_info->qkey, op_info->dest_qp); ++ ++ sxe2_set_inline_data_seg_list((__u8 *)wqe, op_info->sg_list, ++ op_info->num_sges, qp->swqe_polarity); ++ ++ sxe2_set_inlinesnd_hdr(wqe, total_size, wr_info, qp); ++ ++ if (wr_info->push_wqe) { ++ sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); ++ } else if (post_sq) { ++ sxe2_qp_ring_normal_db(qp); ++ } ++ ++#ifdef SXE2_CFG_DEBUG ++ if ((wr_info->op_type == SXE2_OP_TYPE_SEND_INV) || ++ (wr_info->op_type == SXE2_OP_TYPE_SEND_SOL_INV)) { ++ qp->statistics.send_inv_cnt++; ++ } else { ++ qp->statistics.send_cnt++; ++ } ++ ++ if (wr_info->signaled) { ++ qp->statistics.total_signal_cnt++; ++ } ++ ++ qp->statistics.total_sqe_cnt++; ++ qp->statistics.last_send_sqwrid = wr_info->wr_id; ++#endif ++ ++ sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "inline_send"); ++ ++ return 0; ++} ++ ++int sxe2_hw_rdma_write(struct sxe2_qp_common *qp, ++ struct sxe2_wr_info *wr_info, bool post_sq) ++{ ++ __le64 *wqe; ++ struct sxe2_rdma_write *op_info; ++ __u32 i, wqe_idx; ++ __u32 total_size = 0, byte_off; ++ int ret_code; ++ __u32 frag_cnt, addl_frag_cnt; ++ __u16 quanta; ++ __u64 frag_info = 0; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ op_info = &wr_info->op_info.rdma_write; ++ if (op_info->num_lo_sges > qp->max_sq_sge_cnt) { ++ return EINVAL; ++ } ++ ++ for (i = 0; i < op_info->num_lo_sges; i++) { ++ total_size += op_info->lo_sg_list[i].length; ++ } ++ ++ if (wr_info->imm_data_valid) { ++ frag_cnt = op_info->num_lo_sges + 1; ++ } else { ++ frag_cnt = op_info->num_lo_sges; ++ } ++ ++ addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; ++ ret_code = sxe2_fragcnt_to_quanta_cnt(frag_cnt, &quanta); ++ if (ret_code) { ++ return ret_code; ++ } ++ ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, ++ wr_info->wr_id, wr_info->push_wqe); ++ if (!wqe) { ++ return ENOMEM; ++ } ++ ++ if (wr_info->imm_data_valid) { ++ sxe2_set_immedata(wqe, 0, wr_info->imm_data); ++ i = 0; ++ } else { ++ sxe2_set_sgelist_data(wqe, 0, op_info->lo_sg_list, ++ qp->swqe_polarity); ++ i = 1; ++ } ++ ++ if (total_size == 0) { ++ get_64bit_val(wqe, 8, &frag_info); ++ frag_info = frag_info & (~SXE2_WQE_FRAG_VALID); ++ SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_VALID, !qp->swqe_polarity); ++ set_64bit_val(wqe, 8, frag_info); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr set frag_info [%#llx] field [%d]\n", ++ frag_info, !qp->swqe_polarity); ++ } ++ } ++ ++ sxe2_set_remote_offset(wqe, 16, op_info->rem_addr.addr); ++ ++ for (byte_off = 32; i < op_info->num_lo_sges;) { ++ sxe2_set_sgelist_data(wqe, byte_off, &op_info->lo_sg_list[i], ++ qp->swqe_polarity); ++ byte_off += 16; ++ i++; ++ } ++ ++ if (!(frag_cnt & 0x01) && frag_cnt) { ++ sxe2_set_sgelist_data(wqe, byte_off, NULL, qp->swqe_polarity); ++ } ++ ++ sxe2_set_write_hdr(wqe, addl_frag_cnt, wr_info, qp); ++ ++ if (wr_info->push_wqe) { ++ sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); ++ } else if (post_sq) { ++ sxe2_qp_ring_normal_db(qp); ++ } ++ sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "write"); ++ ++ return 0; ++} ++ ++int sxe2_hw_inline_rdma_write(struct sxe2_qp_common *qp, ++ struct sxe2_wr_info *wr_info, bool post_sq) ++{ ++ __le64 *wqe; ++ struct sxe2_rdma_write *op_info; ++ __u32 wqe_idx; ++ __u32 i, total_size = 0; ++ __u16 quanta; ++ ++ op_info = &wr_info->op_info.rdma_write; ++ ++ if (unlikely(qp->max_sq_sge_cnt < op_info->num_lo_sges)) { ++ return EINVAL; ++ } ++ ++ for (i = 0; i < op_info->num_lo_sges; i++) { ++ total_size += op_info->lo_sg_list[i].length; ++ } ++ ++ if (unlikely(total_size > qp->max_inline_data)) { ++ return EINVAL; ++ } ++ ++ quanta = sxe2_inline_to_quanta_cnt(total_size); ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, ++ wr_info->wr_id, wr_info->push_wqe); ++ if (!wqe) { ++ return ENOMEM; ++ } ++ ++ if (wr_info->imm_data_valid) { ++ sxe2_set_immedata(wqe, 0, wr_info->imm_data); ++ } ++ ++ sxe2_set_remote_offset(wqe, 16, op_info->rem_addr.addr); ++ ++ sxe2_set_inline_data_seg_list((__u8 *)wqe, op_info->lo_sg_list, ++ op_info->num_lo_sges, qp->swqe_polarity); ++ ++ sxe2_set_inlinewrite_hdr(wqe, total_size, wr_info, qp); ++ ++ if (wr_info->push_wqe) { ++ sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); ++ } else if (post_sq) { ++ sxe2_qp_ring_normal_db(qp); ++ } ++ ++#ifdef SXE2_CFG_DEBUG ++ qp->statistics.write_cnt++; ++ ++ if (wr_info->signaled) { ++ qp->statistics.total_signal_cnt++; ++ } ++ ++ qp->statistics.total_sqe_cnt++; ++ qp->statistics.last_send_sqwrid = wr_info->wr_id; ++#endif ++ ++ sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "inline_write"); ++ ++ return 0; ++} ++ ++int sxe2_hw_rdma_read(struct sxe2_qp_common *qp, ++ struct sxe2_wr_info *wr_info, bool post_sq) ++{ ++ struct sxe2_rdma_read *op_info; ++ int ret_code; ++ __u32 i, byte_off, total_size = 0; ++ __u32 addl_frag_cnt; ++ __le64 *wqe; ++ __u32 wqe_idx; ++ __u16 quanta; ++ __u64 frag_info = 0; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ bool ord_fence = false; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ ++ op_info = &wr_info->op_info.rdma_read; ++ if (qp->max_sq_sge_cnt < op_info->num_lo_sges) { ++ return EINVAL; ++ } ++ ++ for (i = 0; i < op_info->num_lo_sges; i++) { ++ total_size += op_info->lo_sg_list[i].length; ++ } ++ ++ ret_code = sxe2_fragcnt_to_quanta_cnt(op_info->num_lo_sges, &quanta); ++ if (ret_code) { ++ return ret_code; ++ } ++ if (qp->rd_fence_rate && (qp->ord_cnt++ == qp->rd_fence_rate)) { ++ ord_fence = true; ++ qp->ord_cnt = 0; ++ } ++ ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, ++ wr_info->wr_id, wr_info->push_wqe); ++ if (!wqe) { ++ return ENOMEM; ++ } ++ ++ addl_frag_cnt = ++ op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0; ++ ++ sxe2_set_sgelist_data(wqe, 0, op_info->lo_sg_list, qp->swqe_polarity); ++ ++ if (total_size == 0) { ++ get_64bit_val(wqe, 8, &frag_info); ++ frag_info = frag_info & (~SXE2_WQE_FRAG_VALID); ++ SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_VALID, !qp->swqe_polarity); ++ set_64bit_val(wqe, 8, frag_info); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr set frag_info [%#llx] field [%d]\n", ++ frag_info, !qp->swqe_polarity); ++ } ++ } ++ ++ sxe2_set_remote_offset(wqe, 16, op_info->rem_addr.addr); ++ ++ for (i = 1, byte_off = 32; i < op_info->num_lo_sges; ++i) { ++ sxe2_set_sgelist_data(wqe, byte_off, &op_info->lo_sg_list[i], ++ qp->swqe_polarity); ++ byte_off += 16; ++ } ++ ++ if (!(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) { ++ sxe2_set_sgelist_data(wqe, byte_off, NULL, qp->swqe_polarity); ++ } ++ ++ sxe2_set_read_hdr(wqe, addl_frag_cnt, wr_info, qp, ord_fence); ++ ++ if (wr_info->push_wqe) { ++ sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); ++ } else if (post_sq) { ++ sxe2_qp_ring_normal_db(qp); ++ } ++ ++#ifdef SXE2_CFG_DEBUG ++ qp->statistics.read_cnt++; ++ ++ if (wr_info->signaled) { ++ qp->statistics.total_signal_cnt++; ++ } ++ ++ qp->statistics.total_sqe_cnt++; ++ qp->statistics.last_send_sqwrid = wr_info->wr_id; ++#endif ++ ++ sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "rdma_read"); ++ ++ return 0; ++} ++ ++int sxe2_hw_mw_bind(struct sxe2_qp_common *qp, struct sxe2_wr_info *wr_info, ++ bool post_sq) ++{ ++ __le64 *wqe; ++ struct sxe2_bind_window *op_info; ++ __u32 wqe_idx; ++ __u16 quanta = SXE2_QP_WQE_MIN_QUANTA; ++ ++ op_info = &wr_info->op_info.bind_window; ++ ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, ++ 0, wr_info->wr_id, wr_info->push_wqe); ++ if (!wqe) { ++ return ENOMEM; ++ } ++ ++ sxe2_set_wqe_mw_bind(wqe, op_info, qp); ++ ++ sxe2_set_bindmw_hdr(wqe, wr_info, qp); ++ ++ if (wr_info->push_wqe) { ++ sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); ++ } else if (post_sq) { ++ sxe2_qp_ring_normal_db(qp); ++ } ++ ++#ifdef SXE2_CFG_DEBUG ++ qp->statistics.bind_mw_cnt++; ++ ++ if (wr_info->signaled) { ++ qp->statistics.total_signal_cnt++; ++ } ++ ++ qp->statistics.total_sqe_cnt++; ++ qp->statistics.last_send_sqwrid = wr_info->wr_id; ++#endif ++ ++ sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "mw_bind"); ++ ++ return 0; ++} ++ ++int sxe2_hw_local_invalidate(struct sxe2_qp_common *qp, ++ struct sxe2_wr_info *wr_info, bool post_sq) ++{ ++ __le64 *wqe; ++ struct sxe2_local_invalidate *op_info; ++ __u32 wqe_idx; ++ __u16 quanta = SXE2_QP_WQE_MIN_QUANTA; ++ struct ibv_sge sge = {}; ++ ++ op_info = &wr_info->op_info.local_inval; ++ ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, ++ 0, wr_info->wr_id, wr_info->push_wqe); ++ if (!wqe) { ++ return ENOMEM; ++ } ++ ++ sge.lkey = op_info->target_stag; ++ sxe2_set_sgelist_data(wqe, 0, &sge, 0); ++ ++ set_64bit_val(wqe, 16, 0); ++ ++ sxe2_set_invalidate_hdr(wqe, wr_info, qp); ++ ++ if (wr_info->push_wqe) { ++ sxe2_qp_push_wqe(qp, wqe, quanta, wqe_idx); ++ } else if (post_sq) { ++ sxe2_qp_ring_normal_db(qp); ++ } ++ ++#ifdef SXE2_CFG_DEBUG ++ qp->statistics.local_inv_cnt++; ++ ++ if (wr_info->signaled) { ++ qp->statistics.total_signal_cnt++; ++ } ++ ++ qp->statistics.total_sqe_cnt++; ++ qp->statistics.last_send_sqwrid = wr_info->wr_id; ++#endif ++ ++ sxe2_dump_wqe(qp, wqe, quanta, wqe_idx, "local_invalidate"); ++ ++ return 0; ++} ++ ++static int sxe2_hw_post_receive(struct sxe2_qp_common *qp, ++ struct sxe2_rq_info *wr_info) ++{ ++ __u32 wqe_idx, i, byte_off, sge_count_valid = 0; ++ __u32 addl_frag_cnt; ++ __le64 *wqe; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct ibv_sge *p_sge = NULL; ++ ++ uqp = container_of(qp, struct sxe2_uqp, qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ if (qp->max_rq_sge_cnt < wr_info->num_sges) { ++ return EINVAL; ++ } ++ if (!qp->rq_size) { ++ return EINVAL; ++ } ++ wqe = sxe2_qp_get_next_recv_wqe(qp, &wqe_idx); ++ if (!wqe) { ++ return ENOMEM; ++ } ++ for (i = 0; i < wr_info->num_sges; i++) { ++ if (wr_info->sg_list[i].length != 0) { ++ sge_count_valid++; ++ if (p_sge == NULL) { ++ p_sge = &wr_info->sg_list[i]; ++ } ++ } ++ } ++ qp->rq_wrid_array[wqe_idx] = wr_info->wr_id; ++ ++ addl_frag_cnt = sge_count_valid > 1 ? (sge_count_valid - 1) : 0; ++ if (sge_count_valid) { ++ sxe2_set_sgelist_data(wqe, 0, p_sge, qp->rwqe_polarity); ++ } else { ++ sxe2_set_sgelist_data(wqe, 0, NULL, qp->rwqe_polarity); ++ } ++ if (sge_count_valid) { ++ for (i = 1, byte_off = SXE2_RQ_WQE_HEAD_OFFSET; i < wr_info->num_sges; i++) { ++ if (wr_info->sg_list[i].length == 0) { ++ continue; ++ } ++ sxe2_set_sgelist_data(wqe, byte_off, &wr_info->sg_list[i], ++ qp->rwqe_polarity); ++ byte_off += SXE2_RQ_WQE_FRAG_OFFSET; ++ } ++ if (!(sge_count_valid & SXE2_WQE_QUANTA_ODD_NUMBER) && sge_count_valid) { ++ sxe2_set_sgelist_data(wqe, byte_off, NULL, qp->rwqe_polarity); ++ } ++ } ++ set_64bit_val(wqe, 16, 0); ++ ++ sxe2_set_rcvq_hdr(wqe, addl_frag_cnt, qp); ++ ++ udma_to_device_barrier(); ++ ++ qp->doorbell_note[SXE2_QP_RQ_PI] = ++ htole32(SXE2_RING_CURRENT_HEAD(qp->rq_ring) * qp->rq_wqe_size_multiplier); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("POST RCV(qpn = %u): wqe_idx %u wr_id %llu"\ ++ " rq_pi %u ring_size %u\n", qp->qpn, wqe_idx, ++ qp->rq_wrid_array[wqe_idx], ++ SXE2_RING_CURRENT_HEAD(qp->rq_ring) * qp->rq_wqe_size_multiplier, ++ SXE2_RING_SIZE(qp->rq_ring)); ++ for (i = 0; i < wr_info->num_sges; i++) { ++ SXE2_VERBS_LOG_DEBUG_BDF("sgelist[%d] addr %" PRIu64 " len [%u] lkey [%u]\n", ++ i, wr_info->sg_list[i].addr, wr_info->sg_list[i].length, ++ wr_info->sg_list[i].lkey); ++ } ++ } ++ ++#ifdef SXE2_CFG_DEBUG ++ qp->statistics.total_rqe_cnt++; ++ qp->statistics.last_send_rqwrid = wr_info->wr_id; ++#endif ++ ++ return 0; ++} ++static void sxe2_wrinfo_init_inv(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, ++ struct sxe2_wr_info *wr_info) ++{ ++ struct sxe2_uah *ah; ++ ++ if (ib_wr->opcode == IBV_WR_SEND || ++ ib_wr->opcode == IBV_WR_SEND_WITH_IMM) { ++ wr_info->op_type = SXE2_OP_TYPE_SEND; ++ if (ib_wr->send_flags & IBV_SEND_SOLICITED) { ++ wr_info->op_type = SXE2_OP_TYPE_SEND_SOL; ++ } ++ } else { ++ wr_info->op_type = SXE2_OP_TYPE_SEND_INV; ++ if (ib_wr->send_flags & IBV_SEND_SOLICITED) { ++ wr_info->op_type = SXE2_OP_TYPE_SEND_SOL_INV; ++ } ++ wr_info->rkey_to_inv = ib_wr->invalidate_rkey; ++ } ++ wr_info->op_info.send.num_sges = (__u32)ib_wr->num_sge; ++ wr_info->op_info.send.sg_list = (struct ibv_sge *)ib_wr->sg_list; ++ if (ib_qp->qp_type == IBV_QPT_UD) { ++ ah = container_of(ib_wr->wr.ud.ah, struct sxe2_uah, ibv_ah); ++ wr_info->op_info.send.ah_id = ah->ah_id; ++ wr_info->op_info.send.qkey = ib_wr->wr.ud.remote_qkey; ++ wr_info->op_info.send.dest_qp = ib_wr->wr.ud.remote_qpn; ++ } ++ wr_info->funid = SXE2_RDMA_SEND; ++ if (ib_wr->send_flags & IBV_SEND_INLINE) { ++ wr_info->funid = SXE2_RDMA_SEND_INLINE; ++ } ++} ++static int sxe2_wrinfo_init(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, ++ struct sxe2_wr_info *wr_info) ++{ ++ struct sxe2_common_attrs *uk_attrs; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ int err = 0; ++ ++ uqp = container_of(ib_qp, struct sxe2_uqp, verbs_qp.qp); ++ sctx = container_of(ib_qp->context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ uk_attrs = &sctx->uk_attrs; ++ ++ memset(wr_info, 0, sizeof(*wr_info)); ++ ++ wr_info->wr_id = (__u64)(ib_wr->wr_id); ++ wr_info->push_wqe = uqp->qp.push_db ? true : false; ++ ++ if ((ib_wr->send_flags & IBV_SEND_SIGNALED) || uqp->sq_sig_all) { ++ wr_info->signaled = true; ++ } ++ ++ if (ib_wr->send_flags & IBV_SEND_FENCE) { ++ wr_info->read_fence = true; ++ } ++ ++ if (uqp->send_cq->report_rtt) { ++ wr_info->report_rtt = true; ++ } ++ ++ switch (ib_wr->opcode) { ++ case IBV_WR_SEND_WITH_IMM: ++ if (!(uqp->qp.qp_caps & SXE2_SEND_WITH_IMM)) { ++ return EINVAL; ++ } ++ wr_info->imm_data_valid = true; ++ wr_info->imm_data = ntohl(ib_wr->imm_data); ++ SWITCH_FALLTHROUGH; ++ case IBV_WR_SEND: ++ case IBV_WR_SEND_WITH_INV: ++ sxe2_wrinfo_init_inv(ib_qp, ib_wr, wr_info); ++ break; ++ case IBV_WR_RDMA_WRITE_WITH_IMM: ++ if (!(uqp->qp.qp_caps & SXE2_WRITE_WITH_IMM)) { ++ return EINVAL; ++ } ++ wr_info->imm_data_valid = true; ++ wr_info->imm_data = ntohl(ib_wr->imm_data); ++ SWITCH_FALLTHROUGH; ++ case IBV_WR_RDMA_WRITE: ++ wr_info->op_type = SXE2_OP_TYPE_RDMA_WRITE; ++ if (ib_wr->send_flags & IBV_SEND_SOLICITED) { ++ wr_info->op_type = SXE2_OP_TYPE_RDMA_WRITE_SOL; ++ } ++ ++ wr_info->op_info.rdma_write.num_lo_sges = (__u32)ib_wr->num_sge; ++ wr_info->op_info.rdma_write.lo_sg_list = ib_wr->sg_list; ++ wr_info->op_info.rdma_write.rem_addr.addr = ++ ib_wr->wr.rdma.remote_addr; ++ wr_info->op_info.rdma_write.rem_addr.lkey = ib_wr->wr.rdma.rkey; ++ wr_info->funid = SXE2_RDMA_WRITE; ++ if (ib_wr->send_flags & IBV_SEND_INLINE) { ++ wr_info->funid = SXE2_RDMA_WRITE_INLINE; ++ } ++ break; ++ case IBV_WR_RDMA_READ: ++ if (ib_wr->num_sge > (int)uk_attrs->max_hw_read_sges) { ++ return EINVAL; ++ } ++ wr_info->op_type = SXE2_OP_TYPE_RDMA_READ; ++ wr_info->op_info.rdma_read.rem_addr.addr = ++ ib_wr->wr.rdma.remote_addr; ++ wr_info->op_info.rdma_read.rem_addr.lkey = ib_wr->wr.rdma.rkey; ++ ++ wr_info->op_info.rdma_read.lo_sg_list = ib_wr->sg_list; ++ wr_info->op_info.rdma_read.num_lo_sges = (__u32)ib_wr->num_sge; ++ wr_info->funid = SXE2_RDMA_READ; ++ break; ++ case IBV_WR_LOCAL_INV: ++ wr_info->op_type = SXE2_OP_TYPE_LOCAL_INV; ++ wr_info->op_info.local_inval.target_stag = ++ ib_wr->invalidate_rkey; ++ wr_info->funid = SXE2_RDMA_LOCAL_INV; ++ wr_info->post_wqe = true; ++ break; ++ default: ++ err = EINVAL; ++ break; ++ } ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr_info qpn [0x%x] opcode [%d] "\ ++ "wr_id %llu push_mode %d post_wqe %d signald %d\n", ++ ib_qp->qp_num, ib_wr->opcode, wr_info->wr_id, ++ wr_info->push_wqe, wr_info->post_wqe, wr_info->signaled); ++ } ++ return err; ++} ++ ++int sxe2_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, ++ struct ibv_send_wr **bad_wr) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ bool reflush = false; ++ struct sxe2_wr_info wr_info; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ int err; ++ ++ uqp = container_of(ib_qp, struct sxe2_uqp, verbs_qp.qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("upost send enter, qpn [%u]\n", ++ uqp->qp.qpn); ++ } ++ ++ err = pthread_spin_lock(&uqp->lock); ++ if (err) { ++ return err; ++ } ++ ++ if (!SXE2_RING_MORE_WORK(uqp->qp.sq_ring) && ++ ib_qp->state == IBV_QPS_ERR) { ++ reflush = true; ++ } ++ ++ while (ib_wr) { ++ err = sxe2_wrinfo_init(ib_qp, ib_wr, &wr_info); ++ if (err) { ++ *bad_wr = ib_wr; ++ break; ++ } ++ ++#if 0 ++ wr_info.post_wqe = true; ++#endif ++ err = g_rdma_op[wr_info.funid](&uqp->qp, &wr_info, ++ wr_info.post_wqe); ++ if (err) { ++ *bad_wr = ib_wr; ++ break; ++ } ++ ib_wr = ib_wr->next; ++ } ++ ++ if (!uqp->qp.push_db) ++ sxe2_qp_ring_normal_db(&uqp->qp); ++ if (reflush) ++ sxe2_qp_flush_wqe(ib_qp, true, false); ++ ++ pthread_spin_unlock(&uqp->lock); ++ ++ return err; ++} ++ ++int sxe2_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, ++ struct ibv_recv_wr **bad_wr) ++{ ++ struct sxe2_rq_info rq_info = {}; ++ struct sxe2_uqp *uqp = NULL; ++ bool reflush = false; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ int err; ++ ++ uqp = container_of(ib_qp, struct sxe2_uqp, verbs_qp.qp); ++ sctx = container_of(uqp->verbs_qp.qp.context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("upost receive enter, qpn [%u]\n", ++ uqp->qp.qpn); ++ } ++ ++ err = pthread_spin_lock(&uqp->lock); ++ if (err) { ++ return err; ++ } ++ ++ if (!SXE2_RING_MORE_WORK(uqp->qp.rq_ring) && ++ ib_qp->state == IBV_QPS_ERR) { ++ reflush = true; ++ } ++ ++ while (ib_wr) { ++ if (ib_wr->num_sge > (int)uqp->qp.max_rq_sge_cnt) { ++ *bad_wr = ib_wr; ++ err = EINVAL; ++ goto error; ++ } ++ rq_info.num_sges = (__u32)ib_wr->num_sge; ++ rq_info.wr_id = ib_wr->wr_id; ++ rq_info.sg_list = ib_wr->sg_list; ++ err = sxe2_hw_post_receive(&uqp->qp, &rq_info); ++ if (err) { ++ *bad_wr = ib_wr; ++ goto error; ++ } ++ ++ if (reflush) { ++ sxe2_qp_flush_wqe(ib_qp, false, true); ++ } ++ ++ ib_wr = ib_wr->next; ++ } ++error: ++ pthread_spin_unlock(&uqp->lock); ++ ++ return err; ++} ++ ++static int sxe2_get_next_cqe(struct sxe2_cq_uk *cq, struct sxe2_cqe_info *cqe_info) ++{ ++ int i; ++ __le64 *cqe; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ vendor_cq = container_of(cq, struct sxe2_ucq, cq); ++ sctx = container_of(vendor_cq->verbs_cq.cq.context, struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ cqe = SXE2_GET_CURRENT_CQ_ELEM(cq); ++ ++ cqe_info->info.buf[SXE2_CQE_SIZE - 1] = le64toh(cqe[SXE2_CQE_SIZE - 1]); ++ if (cqe_info->info.field.cqe_valid != cq->polarity) { ++ return -ENOENT; ++ } ++ ++ udma_from_device_barrier(); ++ ++ for (i = 0; i < SXE2_CQE_SIZE - 1; i++) { ++ cqe_info->info.buf[i] = le64toh(cqe[i]); ++ } ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("CQ1 (%d) CI(0x%x) cqeinfo:\n"\ ++ "%#llx(%#llx) %#llx(%#llx) %#llx(%#llx) %#llx(%#llx)\n"\ ++ "%#llx(%#llx) %#llx(%#llx) %#llx(%#llx) %#llx(%#llx)\n", ++ cq->cq_id, ++ cq->cq_ring.head, ++ cqe[0], cqe_info->info.buf[0], ++ cqe[1], cqe_info->info.buf[1], ++ cqe[2], cqe_info->info.buf[2], ++ cqe[3], cqe_info->info.buf[3], ++ cqe[4], cqe_info->info.buf[4], ++ cqe[5], cqe_info->info.buf[5], ++ cqe[6], cqe_info->info.buf[6], ++ cqe[7], cqe_info->info.buf[7]); ++ } ++ ++ return SXE2_CQ_OK; ++} ++ ++static int sxe2_hw_flush_one_sq_wqe(struct sxe2_cq_uk *cq, ++ struct sxe2_qp_common *qp, struct sxe2_cqe_info *cqe_info) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ __le64 *sw_wqe; ++ __u64 wqe_hdr; ++ __u32 tail; ++ ++ vendor_cq = container_of(cq, struct sxe2_ucq, cq); ++ sctx = container_of(vendor_cq->verbs_cq.cq.context, struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ if (!SXE2_RING_MORE_WORK(qp->sq_ring) || !SXE2_RING_MORE_WORK_PAD(qp->sq_ring)) { ++ return -ENOENT; ++ } ++ do { ++ tail = qp->sq_ring.tail; ++ sw_wqe = qp->sq_base[tail].elem; ++ get_64bit_val(sw_wqe, 24, &wqe_hdr); ++ cqe_info->info.field.op = (__u8)FIELD_GET(SXE2_WQE_OPCODE, wqe_hdr); ++ SXE2_RING_SET_TAIL(qp->sq_ring, ++ tail + qp->sq_wrtrk_array[tail].quanta); ++ if (cqe_info->info.field.op != SXE2_OP_TYPE_NOP) { ++ cqe_info->wr_id = qp->sq_wrtrk_array[tail].wrid; ++ cqe_info->bytes = qp->sq_wrtrk_array[tail].wr_len; ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("flush qp %u sq_pi %u sq_ci %u wqe_idx %u wr_id %llu.\n", ++ qp->qpn, SXE2_RING_CURRENT_HEAD(qp->sq_ring), ++ SXE2_RING_CURRENT_TAIL(qp->sq_ring), tail, ++ cqe_info->wr_id); ++ } ++#ifdef SXE2_CFG_DEBUG ++ qp->statistics.flushed_sq_cnt++; ++ qp->statistics.last_rcvd_sqwrid = cqe_info->wr_id; ++#endif ++ break; ++ } ++ } while (1); ++ ++ return SXE2_CQ_OK; ++} ++ ++static void sxe2_move_srq_ring_tail(struct sxe2_srq_verbs *srq) ++{ ++ __u32 tail; ++ while (SXE2_RING_MORE_WORK(srq->srq_ring)) { ++ tail = SXE2_RING_CURRENT_TAIL(srq->srq_ring); ++ if (srq->srqe_array[tail] == SXE2_SRQE_BUSY) { ++ break; ++ } ++ SXE2_RING_MOVE_TAIL(srq->srq_ring); ++ } ++} ++static void sxe2_hw_deal_srq_cqe(struct sxe2_rdma_ucontext *sctx, struct sxe2_cq_uk *cq, ++ struct sxe2_cqe_info *cqe_info, struct sxe2_qp_common *qp, __u32 qpn) ++{ ++ struct sxe2_srq_verbs *srq; ++ __u32 wqe_idx; ++ __u32 array_idx; ++ ++ srq = qp->srq; ++ wqe_idx = (__u32)cqe_info->info.field.wq_desc_idx; ++ array_idx = wqe_idx / srq->wqe_size_multiplier; ++ if (srq->srqe_array[array_idx] == SXE2_SRQE_BUSY) { ++ srq->srqe_array[array_idx] = SXE2_SRQE_FREE; ++ } else { ++ SXE2_VERBS_LOG_ERROR_BDF("cq %d received invalid cqe"\ ++ "(qpn %u srqn %u wqe_idx %u mul_size %d flag %d).\n", ++ cq->cq_id, qpn, srq->srq_id, wqe_idx, ++ srq->wqe_size_multiplier, srq->srqe_array[array_idx]); ++ } ++ cqe_info->wr_id = srq->srq_wrid_array[array_idx]; ++ sxe2_move_srq_ring_tail(srq); ++} ++static int sxe2_hw_deal_rq_cqe(struct sxe2_rdma_ucontext *sctx, struct sxe2_cq_uk *cq, ++ struct sxe2_cqe_info *cqe_info, struct sxe2_qp_common *qp, __u32 qpn) ++{ ++ __u32 wqe_idx; ++ __u32 array_idx; ++ ++ wqe_idx = (__u32)cqe_info->info.field.wq_desc_idx; ++ array_idx = wqe_idx / qp->rq_wqe_size_multiplier; ++ if (cqe_info->info.field.major_err == SXE2_WR_FLUSH_ERR) { ++ if (!SXE2_RING_MORE_WORK(qp->rq_ring)) { ++ return -ENOENT; ++ } ++ SXE2_VERBS_LOG_DEBUG_BDF("flush qp %u state rq_pi %u rq_ci %u .\n", ++ qpn, SXE2_RING_CURRENT_HEAD(qp->rq_ring), ++ SXE2_RING_CURRENT_TAIL(qp->rq_ring)); ++ array_idx = qp->rq_ring.tail; ++ } ++ cqe_info->wr_id = qp->rq_wrid_array[array_idx]; ++ SXE2_RING_SET_TAIL(qp->rq_ring, array_idx + 1); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("update qp %u rq_ci %u wqeidx %u arridx %u wrid %llu.\n", ++ qpn, SXE2_RING_CURRENT_TAIL(qp->rq_ring), wqe_idx, ++ array_idx, cqe_info->wr_id); ++ } ++ return 0; ++} ++static int sxe2_hw_deal_sq_cqe(struct sxe2_rdma_ucontext *sctx, struct sxe2_cq_uk *cq, ++ struct sxe2_cqe_info *cqe_info, struct sxe2_qp_common *qp, __u32 qpn) ++{ ++ __u32 wqe_idx; ++ ++ wqe_idx = (__u32)cqe_info->info.field.wq_desc_idx; ++ ++ if (cqe_info->info.field.push_dropped) { ++ qp->llwqe_mode = false; ++ qp->push_dropped = true; ++ } ++ if (cqe_info->info.field.major_err != SXE2_WR_FLUSH_ERR) { ++ cqe_info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; ++ cqe_info->bytes = qp->sq_wrtrk_array[wqe_idx].wr_len; ++ SXE2_RING_SET_TAIL(qp->sq_ring, wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("update qp %u sq_ci %u wqe_idx %u wr_id %llu.\n", ++ qpn, SXE2_RING_CURRENT_TAIL(qp->sq_ring), wqe_idx, ++ cqe_info->wr_id); ++ } ++ } else { ++ return sxe2_hw_flush_one_sq_wqe(cq, qp, cqe_info); ++ } ++ return 0; ++} ++ ++static int sxe2_hw_cq_poll(struct sxe2_cq_uk *cq, ++ struct sxe2_cqe_info *cqe_info) ++{ ++ struct sxe2_qp_common *qp; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ struct sxe2_ring *cur_ring = NULL; ++ __u32 wqe_idx; ++ int ret_code = SXE2_CQ_OK; ++ __le64 *cqe; ++ __u64 qword = 0; ++ __u32 qpn = 0; ++ bool move_cq_head = true; ++ ++ vendor_cq = container_of(cq, struct sxe2_ucq, cq); ++ sctx = container_of(vendor_cq->verbs_cq.cq.context, struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ret_code = sxe2_get_next_cqe(cq, cqe_info); ++ if (ret_code != SXE2_CQ_OK) { ++ return ret_code; ++ } ++ ++ qp = (struct sxe2_qp_common *)(unsigned long)cqe_info->info.field.qpc; ++ if (!qp || qp->destroy_pending) { ++ ret_code = -EFAULT; ++ goto exit; ++ } ++ ++ qpn = qp->qpn; ++ wqe_idx = (__u32)cqe_info->info.field.wq_desc_idx; ++ cqe_info->bytes = cqe_info->info.field.payload_len; ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("cqe process cqn [%u] ci [%u] qp [%u] wqeidx %u.\n", ++ cq->cq_id, SXE2_RING_CURRENT_HEAD(cq->cq_ring), qpn, wqe_idx); ++ } ++ if (cqe_info->info.field.is_srq) { ++ sxe2_hw_deal_srq_cqe(sctx, cq, cqe_info, qp, qpn); ++ } else if (cqe_info->info.field.qp_type == SXE2_CQE_QTYPE_RQ) { ++ cur_ring = &qp->rq_ring; ++ ret_code = sxe2_hw_deal_rq_cqe(sctx, cq, cqe_info, qp, qpn); ++ if (ret_code) { ++ goto exit; ++ } ++ } else { ++ cur_ring = &qp->sq_ring; ++ ret_code = sxe2_hw_deal_sq_cqe(sctx, cq, cqe_info, qp, qpn); ++ if (ret_code) { ++ goto exit; ++ } ++ } ++ ++exit: ++ if (!ret_code && cqe_info->info.field.major_err == SXE2_WR_FLUSH_ERR) { ++ if (cur_ring && SXE2_RING_MORE_WORK(*cur_ring)) { ++ move_cq_head = false; ++ } ++ } ++ ++ if (move_cq_head) { ++ SXE2_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); ++ if (!SXE2_RING_CURRENT_HEAD(cq->cq_ring)) { ++ cq->polarity ^= 1; ++ } ++ ++ SXE2_RING_MOVE_TAIL(cq->cq_ring); ++ cq->doorbell_note[SXE2_CQ_SET_CI] = ++ htole32(SXE2_RING_CURRENT_HEAD(cq->cq_ring)); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("update qp %u cq %d ci %u.\n", ++ qpn, cq->cq_id, SXE2_RING_CURRENT_HEAD(cq->cq_ring)); ++ } ++ } else { ++ cqe = SXE2_GET_CURRENT_CQ_ELEM(cq); ++ get_64bit_val(cqe, 24, &qword); ++ qword &= ~SXE2_CQE_WQEIDX; ++ qword |= FIELD_PREP(SXE2_CQE_WQEIDX, cur_ring->tail); ++ set_64bit_val(cqe, 24, qword); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("cqn [%u] idx [%u] flush wqeidx %d to %d qpn %u.\n", ++ cq->cq_id, SXE2_RING_CURRENT_HEAD(cq->cq_ring), ++ wqe_idx, cur_ring->tail, qpn); ++ } ++ } ++ ++ return ret_code; ++} ++ ++static enum ibv_wc_status sxe2_flush_err_to_ib_wc_status(enum sxe2_major_opcode opcode) ++{ ++ switch (opcode) { ++ case SXE2_LOCAL_PROTECTION_ERR: ++ return IBV_WC_LOC_PROT_ERR; ++ case SXE2_BAD_RESPONSE_ERR: ++ return IBV_WC_BAD_RESP_ERR; ++ case SXE2_REMOTE_ACCESS_ERR: ++ return IBV_WC_REM_ACCESS_ERR; ++ case SXE2_LOCAL_QP_OP_ERR: ++ return IBV_WC_LOC_QP_OP_ERR; ++ case SXE2_REMOTE_OPERATION_ERR: ++ return IBV_WC_REM_OP_ERR; ++ case SXE2_LOCAL_LEN_ERR: ++ return IBV_WC_LOC_LEN_ERR; ++ case SXE2_LOCAL_ACCESS_ERR: ++ return IBV_WC_LOC_ACCESS_ERR; ++ case SXE2_WR_FLUSH_ERR: ++ return IBV_WC_WR_FLUSH_ERR; ++ case SXE2_TRANS_RETRY_CNT_EXCEED_ERR: ++ return IBV_WC_RETRY_EXC_ERR; ++ case SXE2_MW_BIND_ERR: ++ return IBV_WC_MW_BIND_ERR; ++ case SXE2_REMOTE_INVALID_REQUEST_ERR: ++ return IBV_WC_REM_INV_REQ_ERR; ++ case SXE2_RNR_RETRY_CNT_EXCEED_ERR: ++ return IBV_WC_RNR_RETRY_EXC_ERR; ++ default: ++ return IBV_WC_GENERAL_ERR; ++ } ++} ++ ++static enum ibv_wc_opcode sxe2_get_ib_wc_op_sq(struct sxe2_cqe_info *cur_cqe) ++{ ++ enum ibv_wc_opcode opcode; ++ ++ switch (cur_cqe->info.field.op) { ++ case SXE2_OP_TYPE_RDMA_WRITE: ++ case SXE2_OP_TYPE_RDMA_WRITE_SOL: ++ opcode = IBV_WC_RDMA_WRITE; ++ break; ++ case SXE2_OP_TYPE_RDMA_READ: ++ opcode = IBV_WC_RDMA_READ; ++ break; ++ case SXE2_OP_TYPE_SEND_SOL: ++ case SXE2_OP_TYPE_SEND_SOL_INV: ++ case SXE2_OP_TYPE_SEND_INV: ++ case SXE2_OP_TYPE_SEND: ++ opcode = IBV_WC_SEND; ++ break; ++ case SXE2_OP_TYPE_BIND_MW: ++ opcode = IBV_WC_BIND_MW; ++ break; ++ case SXE2_OP_TYPE_LOCAL_INV: ++ opcode = IBV_WC_LOCAL_INV; ++ break; ++ default: ++ opcode = IBV_WC_DRIVER1; ++ break; ++ } ++ return opcode; ++} ++ ++static enum ibv_wc_opcode sxe2_get_ib_wc_op_rq(struct sxe2_cqe_info *cur_cqe) ++{ ++ enum ibv_wc_opcode opcode; ++ ++ if (cur_cqe->info.field.imm_data_flag) { ++ opcode = IBV_WC_RECV_RDMA_WITH_IMM; ++ } else { ++ opcode = IBV_WC_RECV; ++ } ++ ++ return opcode; ++} ++ ++static void sxe2_cq_cqe_ext(struct sxe2_cqe_info *cur_cqe) ++{ ++ struct sxe2_ucq *ucq = container_of(cur_cqe, struct sxe2_ucq, cur_cqe); ++ struct ibv_cq_ex *ibvcq_ex = &ucq->verbs_cq.cq_ex; ++ ibvcq_ex->wr_id = cur_cqe->wr_id; ++ if (cur_cqe->info.field.error) { ++ ibvcq_ex->status = sxe2_flush_err_to_ib_wc_status(cur_cqe->info.field.major_err); ++ } else { ++ ibvcq_ex->status = IBV_WC_SUCCESS; ++ } ++} ++ ++static void sxe2_cq_pollinfo_to_wc(struct ibv_wc *entry, struct sxe2_cqe_info *cur_cqe) ++{ ++ struct sxe2_qp_common *qp; ++ struct ibv_qp *ib_qp; ++ ++ entry->wc_flags = 0; ++ entry->wr_id = cur_cqe->wr_id; ++ entry->qp_num = cur_cqe->info.field.qp_id; ++ qp = (struct sxe2_qp_common *)(unsigned long)cur_cqe->info.field.qpc; ++ ib_qp = qp->back_qp; ++ ++ if (cur_cqe->info.field.error) { ++ entry->status = sxe2_flush_err_to_ib_wc_status(cur_cqe->info.field.major_err); ++ entry->vendor_err = (uint32_t)(cur_cqe->info.field.major_err << 16 | ++ cur_cqe->info.field.minor_err); ++ } else { ++ entry->status = IBV_WC_SUCCESS; ++ } ++ ++ if (cur_cqe->info.field.imm_data_flag) { ++ entry->imm_data = htonl(cur_cqe->info.field.imme_data); ++ entry->wc_flags |= IBV_WC_WITH_IMM; ++ } ++ ++ if (cur_cqe->info.field.qp_type == SXE2_CQE_QTYPE_SQ) { ++ entry->opcode = sxe2_get_ib_wc_op_sq(cur_cqe); ++ } else { ++ entry->opcode = sxe2_get_ib_wc_op_rq(cur_cqe); ++ if (ib_qp->qp_type != IBV_QPT_UD && ++ cur_cqe->info.field.stag_or_lrkey) { ++ entry->invalidated_rkey = cur_cqe->info.field.l_r_key; ++ entry->wc_flags |= IBV_WC_WITH_INV; ++ } ++ } ++ ++ if (ib_qp->qp_type == IBV_QPT_UD) { ++ entry->src_qp = cur_cqe->info.field.ud_src_qpn; ++ entry->wc_flags |= IBV_WC_GRH; ++ } else { ++ entry->src_qp = cur_cqe->info.field.qp_id; ++ } ++ entry->byte_len = cur_cqe->bytes; ++} ++ ++static int sxe2_poll_one(struct sxe2_cq_uk *ukcq, struct sxe2_cqe_info *cur_cqe, ++ struct ibv_wc *entry) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ vendor_cq = container_of(ukcq, struct sxe2_ucq, cq); ++ sctx = container_of(vendor_cq->verbs_cq.cq.context, struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ int ret = sxe2_hw_cq_poll(ukcq, cur_cqe); ++ if (ret) { ++ if (ret == -EFAULT) { ++ SXE2_VERBS_LOG_WARN_BDF("CQ %d skip cqe(CI 0x%x) for qp destroy.\n", ++ ukcq->cq_id, ukcq->cq_ring.head); ++ } ++ return ret; ++ } ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("CQ (%d) CI(0x%x) wrid(%llu) cqeinfo:\n"\ ++ "payload_len : %ul\n"\ ++ "packet_seq : %d\n"\ ++ "qpc : %#llx\n"\ ++ "l_r_key : %ul\n"\ ++ "qp_id : %d\n"\ ++ "minor_err : %d\n"\ ++ "major_err : %d\n"\ ++ "wq_desc_idx : %d\n"\ ++ "extended_cqe : %d\n"\ ++ "push_dropped : %d\n"\ ++ "ipv4 : %d\n"\ ++ "stag_or_lrkey : %d\n"\ ++ "solicited_evt : %d\n"\ ++ "error : %d\n"\ ++ "op : %d\n"\ ++ "qp_type : %d\n"\ ++ "imme_data : %ul\n"\ ++ "srqn : %d\n"\ ++ "is_srq : %d\n"\ ++ "cqe_timestamp : %#llx\n"\ ++ "ud_smac : %#llx\n"\ ++ "ud_vlan_tag : %d\n"\ ++ "ud_src_qpn : %d\n"\ ++ "vsi_index : %d\n"\ ++ "vlan_tag_flag : %d\n"\ ++ "ud_smac_valid : %d\n"\ ++ "imm_data_flag : %d\n"\ ++ "cqe_valid : %d\n", ++ ukcq->cq_id, ++ ukcq->cq_ring.head, ++ cur_cqe->wr_id, ++ cur_cqe->info.field.payload_len, ++ cur_cqe->info.field.packet_seq, ++ cur_cqe->info.field.qpc, ++ cur_cqe->info.field.l_r_key, ++ cur_cqe->info.field.qp_id, ++ cur_cqe->info.field.minor_err, ++ cur_cqe->info.field.major_err, ++ cur_cqe->info.field.wq_desc_idx, ++ cur_cqe->info.field.extended_cqe, ++ cur_cqe->info.field.push_dropped, ++ cur_cqe->info.field.ipv4, ++ cur_cqe->info.field.stag_or_lrkey, ++ cur_cqe->info.field.solicited_evt, ++ cur_cqe->info.field.error, ++ cur_cqe->info.field.op, ++ cur_cqe->info.field.qp_type, ++ cur_cqe->info.field.imme_data, ++ cur_cqe->info.field.srqn, ++ cur_cqe->info.field.is_srq, ++ cur_cqe->info.field.cqe_timestamp, ++ (__u64)cur_cqe->info.field.ud_smac, ++ cur_cqe->info.field.ud_vlan_tag, ++ cur_cqe->info.field.ud_src_qpn, ++ cur_cqe->info.field.vsi_index, ++ cur_cqe->info.field.vlan_tag_flag, ++ cur_cqe->info.field.ud_smac_valid, ++ cur_cqe->info.field.imm_data_flag, ++ cur_cqe->info.field.cqe_valid); ++ } ++ ++ if (!entry) { ++ sxe2_cq_cqe_ext(cur_cqe); ++ } else { ++ sxe2_cq_pollinfo_to_wc(entry, cur_cqe); ++ } ++ ++ return 0; ++} ++ ++int sxe2_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc) ++{ ++ struct sxe2_ucq *ucq = NULL; ++ struct sxe2_cqe_info *cur_cqe; ++ int ret; ++ int npolled = 0; ++ ++ ucq = container_of(cq, struct sxe2_ucq, verbs_cq.cq); ++ cur_cqe = &ucq->cur_cqe; ++ ++ ret = pthread_spin_lock(&ucq->lock); ++ if (ret) { ++ return -ret; ++ } ++ while (npolled < num_entries) { ++ ret = sxe2_poll_one(&ucq->cq, cur_cqe, wc + npolled); ++ if (ret == SXE2_CQ_OK) { ++ ++npolled; ++ continue; ++ } ++ if (ret == -ENOENT) { ++ break; ++ } ++ } ++ ++ pthread_spin_unlock(&ucq->lock); ++ return npolled; ++} ++ ++static void sxe2_cq_arm_notify(struct sxe2_cq_uk *arm_cq, ++ enum sxe2_arm_type arm_type) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ uint64_t doorbell; ++ uint32_t sn; ++ uint32_t ci; ++ uint32_t cmd; ++ ++ vendor_cq = container_of(arm_cq, struct sxe2_ucq, cq); ++ sctx = container_of(vendor_cq->verbs_cq.cq.context, struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ sn = arm_cq->arm_sn & 3; ++ ci = SXE2_RING_CURRENT_HEAD(arm_cq->cq_ring); ++ ++ if (arm_type == SXE2_CQ_ARM_SOLICITED) { ++ cmd = (uint32_t)SXE2_CQ_DB_REQ_SOLICITED; ++ } else { ++ cmd = SXE2_CQ_DB_REQ_NOSOLICITED; ++ } ++ ++ doorbell = 0; ++ doorbell = sn << 29 | cmd | ci; ++ doorbell <<= 32; ++ doorbell |= arm_cq->cq_id; ++ ++ arm_cq->doorbell_note[SXE2_CQ_ARM_DB] = htole32(sn << 29 | cmd | ci); ++ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(arm_cq->cqe_alloc_db, 0, doorbell); ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("CQ (%u -> %p) ARM NOTIFY DBNOTE(0x%x)"\ ++ " DB(0x%lx) CI(%u) SN(%u) CMD(%u)\n", arm_cq->cq_id, ++ arm_cq->cqe_alloc_db, htole32(sn << 29 | cmd | ci), ++ htole64(doorbell), ci, sn, cmd); ++ } ++} ++ ++int sxe2_uarm_cq(struct ibv_cq *cq, int solicited) ++{ ++ struct sxe2_ucq *vendor_cq = NULL; ++ enum sxe2_arm_type arm_type; ++ int ret; ++ ++ arm_type = solicited ? SXE2_CQ_ARM_SOLICITED : SXE2_CQ_ARM_NEXT; ++ vendor_cq = container_of(cq, struct sxe2_ucq, verbs_cq.cq); ++ ret = pthread_spin_lock(&vendor_cq->lock); ++ if (ret) { ++ return ret; ++ } ++ ++ sxe2_cq_arm_notify(&vendor_cq->cq, arm_type); ++ ++ pthread_spin_unlock(&vendor_cq->lock); ++ ++ return 0; ++} ++ ++void sxe2_ucq_event(struct ibv_cq *cq) ++{ ++ struct sxe2_ucq *vendor_cq = NULL; ++ struct sxe2_cq_uk *arm_cq; ++ ++ vendor_cq = container_of(cq, struct sxe2_ucq, verbs_cq.cq); ++ arm_cq = &vendor_cq->cq; ++ if (pthread_spin_lock(&vendor_cq->lock)) { ++ return; ++ } ++ ++ arm_cq->arm_sn += 1; ++ ++ pthread_spin_unlock(&vendor_cq->lock); ++} ++ ++static void sxe2_clean_base_cq(struct sxe2_qp_common *qp, struct sxe2_cq_uk *cq, int cq_type) ++{ ++ __le64 *cqe; ++ __u64 qword, qpc; ++ __u32 cq_head; ++ __u8 polarity, cq_polarity; ++ ++ cq_head = cq->cq_ring.head; ++ cq_polarity = cq->polarity; ++ do { ++ cqe = ((struct sxe2_cqe *)(cq->cq_base))[cq_head].buf; ++ get_64bit_val(cqe, 56, &qword); ++ polarity = (__u8)FIELD_GET(SXE2_CQE_VALID, qword); ++ if (polarity != cq_polarity) { ++ break; ++ } ++ ++ get_64bit_val(cqe, 8, &qpc); ++ if ((void *)(uintptr_t)qpc == (void *)qp) { ++ set_64bit_val(cqe, 8, 0); ++#ifdef SXE2_CFG_DEBUG ++ if (cq_type == SQ_CQ) { ++ qp->statistics.cleaned_sq_cnt++; ++ } else { ++ qp->statistics.cleaned_rq_cnt++; ++ } ++#endif ++ } ++ ++ cq_head = (cq_head + 1) % cq->cq_ring.size; ++ if (!cq_head) { ++ cq_polarity ^= 1; ++ } ++ } while (true); ++} ++ ++void sxe2_clean_cqes(struct sxe2_qp_common *qp, ++ struct sxe2_ucq *vendor_cq, int cq_type) ++{ ++ struct sxe2_cq_uk *cq = &vendor_cq->cq; ++ int ret; ++ ++ ret = pthread_spin_lock(&vendor_cq->lock); ++ if (ret) { ++ return; ++ } ++ ++ sxe2_clean_base_cq(qp, cq, cq_type); ++ ++ pthread_spin_unlock(&vendor_cq->lock); ++} ++ ++int sxe2_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr) ++{ ++ struct sxe2_ucq *vendor_cq = NULL; ++ struct sxe2_cqe_info *cur_cqe; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ int ret; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ret = pthread_spin_lock(&vendor_cq->lock); ++ if (ret) { ++ return ret; ++ } ++ ++ cur_cqe = &vendor_cq->cur_cqe; ++ ret = sxe2_poll_one(&vendor_cq->cq, cur_cqe, NULL); ++ if (ret == SXE2_CQ_OK) { ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("sxe2 start poll, cqn [%u] wr_id %"PRIu64" succeed.\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id); ++ } ++ return SXE2_CQ_OK; ++ } ++ ++ if (ret == SXE2_CQ_NOENT) { ++ ret = ENOENT; ++ } ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("sxe2 start poll, cqn [%u] wr_id %"PRIu64" failed[%d].\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, ret); ++ } ++ ++ pthread_spin_unlock(&vendor_cq->lock); ++ ++ return ret; ++} ++ ++int sxe2_next_poll(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_ucq *vendor_cq = NULL; ++ struct sxe2_cqe_info *cur_cqe; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ int ret; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("sxe2 next poll, cqn [%u] wr_id %"PRIu64".\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id); ++ } ++ cur_cqe = &vendor_cq->cur_cqe; ++ ret = sxe2_poll_one(&vendor_cq->cq, cur_cqe, NULL); ++ if (ret == SXE2_CQ_NOENT) { ++ return ENOENT; ++ } ++ ++ return ret; ++} ++ ++void sxe2_end_poll(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("sxe2 end poll, cqn [%u] wr_id %"PRIu64".\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id); ++ } ++ ++ pthread_spin_unlock(&vendor_cq->lock); ++} ++ ++uint64_t sxe2_wc_read_completion_ts(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ uint64_t timestamp; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ timestamp = vendor_cq->cur_cqe.info.field.cqe_timestamp / HCA_CORE_CLOCK_500_MHZ; ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read comp ts, cqn [%u] wr_id %"PRIu64" ret %"PRIu64"\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, timestamp); ++ } ++ return timestamp; ++} ++ ++uint64_t sxe2_wc_read_completion_wallclock_ns(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ uint64_t timestamp_ns; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ timestamp_ns = vendor_cq->cur_cqe.info.field.cqe_timestamp * 1000; ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read timestamp ns, cqn [%u] wr_id %"PRIu64" ret %"PRIu64"\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, timestamp_ns); ++ } ++ ++ return timestamp_ns; ++} ++ ++enum ibv_wc_opcode sxe2_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_ucq *vendor_cq = NULL; ++ struct sxe2_cqe_info *cur_cqe; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ enum ibv_wc_opcode opcode; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ cur_cqe = &vendor_cq->cur_cqe; ++ ++ if (cur_cqe->info.field.qp_type == SXE2_CQE_QTYPE_SQ) { ++ opcode = sxe2_get_ib_wc_op_sq(cur_cqe); ++ } else { ++ opcode = sxe2_get_ib_wc_op_rq(cur_cqe); ++ } ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read opcode, cqn [%u] wr_id %"PRIu64" ret %d\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, opcode); ++ } ++ ++ return opcode; ++} ++ ++uint32_t sxe2_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_cqe_info *cur_cqe; ++ struct sxe2_ucq *vendor_cq = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ __u32 error = 0; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ cur_cqe = &vendor_cq->cur_cqe; ++ if (cur_cqe->info.field.error) { ++ error = (__u32)(cur_cqe->info.field.major_err << 16 | cur_cqe->info.field.minor_err); ++ } ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read vendor error, cqn [%u] wr_id %"PRIu64" ret %u\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, error); ++ } ++ ++ return error; ++} ++ ++unsigned int sxe2_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_cqe_info *cur_cqe; ++ struct sxe2_ucq *vendor_cq = NULL; ++ struct sxe2_qp_common *qp; ++ struct ibv_qp *ib_qp; ++ unsigned int wc_flags = 0; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ cur_cqe = &vendor_cq->cur_cqe; ++ qp = (struct sxe2_qp_common *)cur_cqe->info.field.qpc; ++ ib_qp = qp->back_qp; ++ ++ if (cur_cqe->info.field.imm_data_flag) { ++ wc_flags |= IBV_WC_WITH_IMM; ++ } ++ ++ if (ib_qp->qp_type == IBV_QPT_UD) { ++ wc_flags |= IBV_WC_GRH; ++ } else { ++ if ((cur_cqe->info.field.qp_type == SXE2_CQE_QTYPE_RQ) ++ && (cur_cqe->info.field.stag_or_lrkey)) { ++ wc_flags |= IBV_WC_WITH_INV; ++ } ++ } ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read wc flags, cqn [%u] wr_id %"PRIu64" ret %u\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, wc_flags); ++ } ++ ++ return wc_flags; ++} ++ ++uint32_t sxe2_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ __u32 bytes; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, ++ verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ bytes = vendor_cq->cur_cqe.bytes; ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read length, cqn [%u] wr_id %"PRIu64" ret %u\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, bytes); ++ } ++ ++ return bytes; ++} ++ ++__be32 sxe2_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_cqe_info *cur_cqe; ++ struct sxe2_ucq *vendor_cq = NULL; ++ __u32 imme_data = 0; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ cur_cqe = &vendor_cq->cur_cqe; ++ ++ if (cur_cqe->info.field.imm_data_flag) { ++ imme_data = cur_cqe->info.field.imme_data; ++ } ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read imme data, cqn [%u] wr_id %"PRIu64" ret %u\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, imme_data); ++ } ++ return htonl(imme_data); ++} ++ ++uint32_t sxe2_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ __u32 qpn; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, ++ verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ qpn = vendor_cq->cur_cqe.info.field.qp_id; ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read qpn, cqn [%u] wr_id %"PRIu64" ret %u\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, qpn); ++ } ++ return qpn; ++} ++ ++uint32_t sxe2_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_cqe_info *cur_cqe; ++ struct sxe2_ucq *vendor_cq = NULL; ++ struct sxe2_qp_common *qp; ++ struct ibv_qp *ib_qp; ++ __u32 src_qpn; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ cur_cqe = &vendor_cq->cur_cqe; ++ qp = (struct sxe2_qp_common *)cur_cqe->info.field.qpc; ++ ib_qp = qp->back_qp; ++ if (ib_qp->qp_type == IBV_QPT_UD) { ++ src_qpn = cur_cqe->info.field.ud_src_qpn; ++ } else { ++ src_qpn = cur_cqe->info.field.qp_id; ++ } ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read src qpn, cqn [%u] wr_id %"PRIu64" ret %u\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, src_qpn); ++ } ++ return src_qpn; ++} ++ ++uint32_t sxe2_wc_read_slid(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read src lid, cqn [%u] wr_id %"PRIu64" ret %u\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, 0); ++ } ++ ++ return 0; ++} ++ ++uint8_t sxe2_wc_read_sl(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read sl, cqn [%u] wr_id %"PRIu64" ret %u\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, 0); ++ } ++ ++ return 0; ++} ++ ++uint8_t sxe2_wc_read_dlid_path_bits(struct ibv_cq_ex *ibvcq_ex) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_ucq *vendor_cq = NULL; ++ ++ vendor_cq = container_of(ibvcq_ex, struct sxe2_ucq, verbs_cq.cq_ex); ++ sctx = container_of(vendor_cq->verbs_cq.cq_ex.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wc read dlid path bits, cqn [%u] wr_id %"PRIu64" ret %u\n", ++ vendor_cq->cq.cq_id, vendor_cq->verbs_cq.cq_ex.wr_id, 0); ++ } ++ ++ return 0; ++} ++ ++static __le64 *sxe2_srq_get_next_recv_wqe(struct sxe2_srq_verbs *srq, __u32 *wqe_idx) ++{ ++ int ret_code; ++ __le64 *wqe; ++ ++ if (SXE2_RING_FULL_ERR(srq->srq_ring)) ++ return NULL; ++ ++ *wqe_idx = SXE2_RING_CURRENT_HEAD(srq->srq_ring); ++ if (srq->srqe_array[*wqe_idx] == SXE2_SRQE_BUSY) ++ return NULL; ++ ++ SXE2_RING_MOVE_HEAD(srq->srq_ring, ret_code); ++ if (ret_code) ++ return NULL; ++ ++ srq->srqe_array[*wqe_idx] = SXE2_SRQE_BUSY; ++ ++ if (!*wqe_idx) ++ srq->srq_polarity = !srq->srq_polarity; ++ ++ wqe = srq->srq_base[*wqe_idx * srq->wqe_size_multiplier].elem; ++ ++ return wqe; ++} ++ ++static int sxe2_hw_srq_post_receive(struct sxe2_srq_verbs *srq, ++ struct sxe2_rq_info *info) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_usrq *usrq = NULL; ++ __u32 wqe_idx, i, byte_off; ++ __u32 addl_sge_cnt; ++ __le64 *wqe; ++ __u64 hdr; ++ ++ usrq = container_of(srq, struct sxe2_usrq, srq_verbs); ++ sctx = to_sctx(usrq->vsrq.srq.context); ++ ++ if (srq->max_srq_frag_cnt < info->num_sges) ++ return -EINVAL; ++ ++ wqe = sxe2_srq_get_next_recv_wqe(srq, &wqe_idx); ++ if (!wqe) ++ return -ENOMEM; ++ ++ srq->srq_wrid_array[wqe_idx] = info->wr_id; ++ ++ addl_sge_cnt = info->num_sges > 1 ? info->num_sges - 1 : 0; ++ sxe2_set_sgelist_data(wqe, 0, info->sg_list, srq->srq_polarity); ++ ++ for (i = 1, byte_off = 32; i < info->num_sges; i++) { ++ sxe2_set_sgelist_data(wqe, byte_off, &info->sg_list[i], srq->srq_polarity); ++ byte_off += 16; ++ } ++ ++ if (!(info->num_sges & 0x01) && info->num_sges) { ++ sxe2_set_sgelist_data(wqe, byte_off, NULL, srq->srq_polarity); ++ } ++ ++ hdr = FIELD_PREP(SXE2_WQE_ADDSGECNT, addl_sge_cnt) | ++ FIELD_PREP(SXE2_WQE_VALID, srq->srq_polarity); ++ ++ udma_to_device_barrier(); ++ ++ set_64bit_val(wqe, 24, hdr); ++ ++ set_64bit_val(srq->db_note, 0, ++ (__u64)SXE2_RING_CURRENT_HEAD(srq->srq_ring) * srq->wqe_size_multiplier); ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("POST SRQ RCV(srqn = %u): wqe_idx %u "\ ++ "wr_id %llu ring_size %u mul_size %d\n", ++ srq->srq_id, wqe_idx, srq->srq_wrid_array[wqe_idx], ++ SXE2_RING_SIZE(srq->srq_ring), srq->wqe_size_multiplier); ++ for (i = 0; i < info->num_sges; i++) { ++ SXE2_VERBS_LOG_DEBUG_BDF("sgelist[%d] addr %" PRIu64 " len [%u] lkey [%u]\n", ++ i, info->sg_list[i].addr, info->sg_list[i].length, ++ info->sg_list[i].lkey); ++ } ++ } ++ ++ return 0; ++} ++ ++int sxe2_upost_srq_recv(struct ibv_srq *ibv_srq, ++ struct ibv_recv_wr *ibv_wr, struct ibv_recv_wr **bad_wr) ++{ ++ struct sxe2_usrq *usrq = to_usrq(ibv_srq); ++ struct sxe2_srq_verbs *verbs_srq = &usrq->srq_verbs; ++ struct sxe2_rq_info rq_info = {}; ++ int err = 0; ++ ++ pthread_spin_lock(&usrq->lock); ++ while (ibv_wr) { ++ if (ibv_wr->num_sge > (int)verbs_srq->max_srq_frag_cnt) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ rq_info.num_sges = (__u32)ibv_wr->num_sge; ++ rq_info.wr_id = ibv_wr->wr_id; ++ rq_info.sg_list = ibv_wr->sg_list; ++ err = sxe2_hw_srq_post_receive(verbs_srq, &rq_info); ++ if (err) ++ goto out; ++ ++ ibv_wr = ibv_wr->next; ++ } ++ ++out: ++ pthread_spin_unlock(&usrq->lock); ++ ++ if (err) ++ *bad_wr = ibv_wr; ++ ++ return err; ++} ++ ++static __le64 *sxe2_init_wqe(struct sxe2_uqp *uqp, __u64 wr_id, unsigned int opcode, bool ord_fence) ++{ ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_qp_common *qp; ++ unsigned int send_flags = uqp->verbs_qp.qp_ex.wr_flags; ++ __le64 *wqe; ++ __u64 hdr = 0; ++ ++ qp = &uqp->qp; ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("init wqe (qpn = %u): wr_id %"PRIu64" "\ ++ "opcode %u wr_flags %x ring_size %u \n", ++ uqp->verbs_qp.qp_ex.qp_base.qp_num, ++ uqp->verbs_qp.qp_ex.wr_id, opcode, ++ send_flags, SXE2_RING_SIZE(qp->sq_ring)); ++ } ++ ++ if (SXE2_RING_FULL_ERR(qp->sq_ring)) { ++ uqp->wqe_hdr = NULL; ++ uqp->err = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("sq wqe buf full, qpn [%u]\n", ++ uqp->verbs_qp.qp_ex.qp_base.qp_num); ++ return NULL; ++ } ++ ++ wqe = (__u64 *)uqp->wqebuf; ++ memset(wqe, 0, SXE2_QP_WQE_MIN_SIZE); ++ ++ SXE2_SET_FIELD(hdr, SXE2_WQE_OPCODE, opcode); ++ if ((send_flags & IBV_SEND_SIGNALED) || uqp->sq_sig_all) { ++ SXE2_SET_FIELD(hdr, SXE2_WQE_SIGCOMPL, true); ++ } ++ ++ if (send_flags & IBV_SEND_FENCE || ord_fence) { ++ SXE2_SET_FIELD(hdr, SXE2_WQE_READFENCE, true); ++ } ++ ++ if (uqp->send_cq->report_rtt) { ++ SXE2_SET_FIELD(hdr, SXE2_WQE_REPORTRTT, true); ++ } ++ ++ if(uqp->qp.push_db) { ++ SXE2_SET_FIELD(hdr, SXE2_WQE_PUSHWQE, true); ++ } ++ ++ set_64bit_val(wqe, 24, hdr); ++ ++ uqp->wqe_hdr = wqe; ++ ++ return wqe; ++} ++ ++void sxe2_wr_start(struct ibv_qp_ex *qp_ex) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ enum ibv_qp_state state = qp_ex->qp_base.state; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr start enter, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ ++ if (state < IBV_QPS_RTS) { ++ uqp->err = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF("qp state err [%d] qpn [%u]\n", ++ state, qp_ex->qp_base.qp_num); ++ return; ++ } ++ ++ (void)pthread_spin_lock(&uqp->lock); ++ ++ uqp->rb_sq_head = SXE2_RING_CURRENT_HEAD(uqp->qp.sq_ring); ++ uqp->err = 0; ++} ++ ++int sxe2_wr_complete(struct ibv_qp_ex *qp_ex) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_qp_common *qp; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ bool push_wqe; ++ int err; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ err = uqp->err; ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr complete enter, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ if (unlikely(err)) { ++ SXE2_RING_CURRENT_HEAD(uqp->qp.sq_ring) = uqp->rb_sq_head; ++ SXE2_VERBS_LOG_ERROR_BDF("qp err [%d] qpn [%u]\n", ++ err, qp_ex->qp_base.qp_num); ++ goto out; ++ } ++ ++ qp = &uqp->qp; ++ ++ push_wqe = qp->push_db ? true : false; ++ ++ if (push_wqe) ++ sxe2_qp_push_wqe(qp, uqp->cur_wqe, uqp->quanta, uqp->wqe_idx); ++ else ++ sxe2_qp_ring_normal_db(qp); ++ ++ sxe2_dump_wqe(qp, uqp->cur_wqe, uqp->quanta, uqp->wqe_idx, "new_send"); ++ ++out: ++ pthread_spin_unlock(&uqp->lock); ++ ++ return err; ++} ++ ++void sxe2_wr_abort(struct ibv_qp_ex *qp_ex) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr abort enter, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ SXE2_RING_CURRENT_HEAD(uqp->qp.sq_ring) = uqp->rb_sq_head; ++ ++ pthread_spin_unlock(&uqp->lock); ++} ++ ++void sxe2_wr_send_rc_ud(struct ibv_qp_ex *qp_ex) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ unsigned int send_flags = qp_ex->wr_flags; ++ unsigned int opcode = SXE2_OP_TYPE_SEND; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr send enter, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ ++ if (send_flags & IBV_SEND_SOLICITED) ++ opcode = SXE2_OP_TYPE_SEND_SOL; ++ ++ (void)sxe2_init_wqe(uqp, qp_ex->wr_id, opcode, false); ++ ++ uqp->funid = SXE2_RDMA_SEND; ++} ++ ++void sxe2_wr_send_imm_rc_ud(struct ibv_qp_ex *qp_ex, __be32 imm_data) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ unsigned int send_flags = qp_ex->wr_flags; ++ unsigned int opcode = SXE2_OP_TYPE_SEND; ++ __le64 *wqe; ++ __u64 hdr; ++ __u64 val = 0; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr send immediate enter, qpn [%u] imm [%u]\n", ++ qp_ex->qp_base.qp_num, be32toh(imm_data)); ++ } ++ if (send_flags & IBV_SEND_SOLICITED) ++ opcode = SXE2_OP_TYPE_SEND_SOL; ++ ++ wqe = sxe2_init_wqe(uqp, qp_ex->wr_id, opcode, false); ++ if (!wqe) ++ return; ++ ++ get_64bit_val(wqe, 24, &hdr); ++ ++ SXE2_SET_FIELD(hdr, SXE2_WQE_IMMEDATAFLG, true); ++ SXE2_SET_FIELD(val, SXE2_WQE_IMMEDATA, be32toh(imm_data)); ++ ++ set_64bit_val(wqe, 0, val); ++ set_64bit_val(wqe, 24, hdr); ++ ++ uqp->funid = SXE2_RDMA_SEND; ++} ++ ++void sxe2_wr_send_inv_rc(struct ibv_qp_ex *qp_ex, uint32_t invalidate_rkey) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ unsigned int send_flags = qp_ex->wr_flags; ++ unsigned int opcode = SXE2_OP_TYPE_SEND_INV; ++ __le64 *wqe; ++ __u64 hdr; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr send inv enter, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ ++ if (send_flags & IBV_SEND_SOLICITED) { ++ opcode = SXE2_OP_TYPE_SEND_SOL_INV; ++ } ++ ++ wqe = sxe2_init_wqe(uqp, qp_ex->wr_id, opcode, false); ++ if (!wqe) ++ return; ++ ++ get_64bit_val(wqe, 24, &hdr); ++ ++ SXE2_SET_FIELD(hdr, SXE2_WQE_REMOTEINVTAG, invalidate_rkey); ++ ++ set_64bit_val(wqe, 24, hdr); ++ ++ uqp->funid = SXE2_RDMA_SEND; ++} ++ ++void sxe2_wr_rdma_read_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, ++ uint64_t remote_addr) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ __le64 *wqe; ++ __u64 hdr; ++ bool ord_fence = false; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr read enter, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ if (uqp->qp.rd_fence_rate && (uqp->qp.ord_cnt++ == uqp->qp.rd_fence_rate)) { ++ ord_fence = true; ++ uqp->qp.ord_cnt = 0; ++ } ++ ++ wqe = sxe2_init_wqe(uqp, qp_ex->wr_id, SXE2_OP_TYPE_RDMA_READ, ord_fence); ++ if (!wqe) ++ return; ++ ++ get_64bit_val(wqe, 24, &hdr); ++ ++ sxe2_set_remote_offset(wqe, 16, remote_addr); ++ ++ SXE2_SET_FIELD(hdr, SXE2_WQE_REMOTETAG, rkey); ++ ++ set_64bit_val(wqe, 24, hdr); ++ ++ uqp->funid = SXE2_RDMA_READ; ++} ++ ++void sxe2_wr_rdma_write_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, ++ uint64_t remote_addr) ++{ ++ unsigned int send_flags = qp_ex->wr_flags; ++ unsigned int opcode = SXE2_OP_TYPE_RDMA_WRITE; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ __le64 *wqe; ++ __u64 hdr; ++ ++ if (send_flags & IBV_SEND_SOLICITED) ++ opcode = SXE2_OP_TYPE_RDMA_WRITE_SOL; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr write enter, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ ++ wqe = sxe2_init_wqe(uqp, qp_ex->wr_id, opcode, false); ++ if (!wqe) ++ return; ++ ++ get_64bit_val(wqe, 24, &hdr); ++ ++ sxe2_set_remote_offset(wqe, 16, remote_addr); ++ ++ SXE2_SET_FIELD(hdr, SXE2_WQE_REMOTETAG, rkey); ++ ++ set_64bit_val(wqe, 24, hdr); ++ ++ uqp->funid = SXE2_RDMA_WRITE; ++} ++ ++void sxe2_wr_rdma_write_imm_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, ++ uint64_t remote_addr, __be32 imm_data) ++{ ++ unsigned int send_flags = qp_ex->wr_flags; ++ unsigned int opcode = SXE2_OP_TYPE_RDMA_WRITE; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ __le64 *wqe; ++ __u64 hdr; ++ __u64 val = 0; ++ ++ if (send_flags & IBV_SEND_SOLICITED) ++ opcode = SXE2_OP_TYPE_RDMA_WRITE_SOL; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr write with immediate enter, "\ ++ "qpn [%u] imm [%u]\n", qp_ex->qp_base.qp_num, ++ be32toh(imm_data)); ++ } ++ ++ wqe = sxe2_init_wqe(uqp, qp_ex->wr_id, opcode, false); ++ if (!wqe) ++ return; ++ ++ get_64bit_val(wqe, 24, &hdr); ++ ++ sxe2_set_remote_offset(wqe, 16, remote_addr); ++ ++ SXE2_SET_FIELD(hdr, SXE2_WQE_REMOTETAG, rkey); ++ SXE2_SET_FIELD(hdr, SXE2_WQE_IMMEDATAFLG, true); ++ SXE2_SET_FIELD(val, SXE2_WQE_IMMEDATA, be32toh(imm_data)); ++ ++ set_64bit_val(wqe, 24, hdr); ++ ++ set_64bit_val(wqe, 0, val); ++ uqp->funid = SXE2_RDMA_WRITE; ++} ++ ++void sxe2_wr_set_ud_addr(struct ibv_qp_ex *qp_ex, struct ibv_ah *ah, ++ uint32_t remote_qpn, uint32_t remote_qkey) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ struct sxe2_uah *uah; ++ __le64 *wqe; ++ __u64 hdr; ++ __u64 val = 0; ++ ++ uah = container_of(ah, struct sxe2_uah, ibv_ah); ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr set ud addr enter, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ ++ wqe = uqp->wqe_hdr; ++ if (!wqe) { ++ return; ++ } ++ ++ get_64bit_val(wqe, 24, &hdr); ++ ++ SXE2_SET_FIELD(hdr, SXE2_WQE_AH_ID, uah->ah_id); ++ ++ SXE2_SET_FIELD(val, SXE2_WQE_DESTQKEY, remote_qkey); ++ SXE2_SET_FIELD(val, SXE2_WQE_DESTQPN, remote_qpn); ++ ++ set_64bit_val(wqe, 16, val); ++ set_64bit_val(wqe, 24, hdr); ++} ++ ++static void sxe2_fill_inline_data_buflist(__u8 *wqe, size_t num_buf, ++ const struct ibv_data_buf *buf_list, __u8 polarity) ++{ ++ __u8 inline_valid = (__u8)(polarity << SXE2_INLINE_VALID_S); ++ __u32 quanta_bytes_remaining = 8; ++ __u32 i; ++ bool first_quanta = true; ++ ++ wqe += 8; ++ ++ for (i = 0; i < num_buf; i++) { ++ __u8 *cur_sge = (__u8 *)(uintptr_t)buf_list[i].addr; ++ __u32 sge_len = (__u32)buf_list[i].length; ++ ++ while (sge_len) { ++ __u32 bytes_copied; ++ ++ bytes_copied = min(sge_len, quanta_bytes_remaining); ++ memcpy(wqe, cur_sge, bytes_copied); ++ wqe += bytes_copied; ++ cur_sge += bytes_copied; ++ quanta_bytes_remaining -= bytes_copied; ++ sge_len -= bytes_copied; ++ ++ if (!quanta_bytes_remaining) { ++ quanta_bytes_remaining = 31; ++ ++ if (first_quanta) { ++ first_quanta = false; ++ wqe += 16; ++ } else { ++ *wqe = inline_valid; ++ wqe++; ++ } ++ } ++ } ++ } ++ if (!first_quanta && quanta_bytes_remaining < 31) { ++ *(wqe + quanta_bytes_remaining) = inline_valid; ++ } ++} ++ ++static inline void sxe2_fill_sgelist_data(void *wqe, __u32 offset, ++ const struct ibv_sge *sge, __u8 valid) ++{ ++ __u64 frag_info = 0; ++ uint32_t len; ++ ++ if (sge) { ++ len = (sge->length & ((uint32_t)1 << 31)) ? 0 : sge->length; ++ set_64bit_val(wqe, offset,sge->addr); ++ SXE2_SET_FIELD(frag_info, SXE2_WQE_VALID, valid); ++ SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_LEN, len); ++ SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_STAG, sge->lkey); ++ set_64bit_val(wqe, offset + 8, frag_info); ++ } else { ++ SXE2_SET_FIELD(frag_info, SXE2_WQE_VALID, valid); ++ set_64bit_val(wqe, offset, 0); ++ set_64bit_val(wqe, offset + 8, frag_info); ++ } ++} ++ ++void sxe2_wr_set_inline_data_rc_ud(struct ibv_qp_ex *qp_ex, ++ void *addr, size_t length) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_qp_common *qp; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ const struct ibv_data_buf buff = {.addr = addr, .length = length}; ++ __le64 *wqe; ++ __le64 *cache_hdr; ++ __u32 wqe_idx; ++ __u16 quanta; ++ bool push_wqe; ++ __u64 hdr; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr set inline data enter, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ ++ qp = &uqp->qp; ++ push_wqe = qp->push_db ? true : false; ++ cache_hdr = uqp->wqe_hdr; ++ if (!cache_hdr) { ++ SXE2_VERBS_LOG_WARN_BDF("wr cache hdr null, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ return; ++ } ++ ++ if (length > uqp->qp.max_inline_data) { ++ SXE2_VERBS_LOG_ERROR_BDF("wr length %zu exceed %u , qpn [%u]\n", ++ length, uqp->qp.max_inline_data, qp_ex->qp_base.qp_num); ++ uqp->err = EINVAL; ++ return; ++ } ++ ++ quanta = sxe2_inline_to_quanta_cnt((__u32)length); ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, (__u32)length, ++ qp_ex->wr_id, push_wqe); ++ if (!wqe) { ++ uqp->err = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("wr no memory, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ return; ++ } ++ ++ uqp->wqe_idx = wqe_idx; ++ uqp->cur_wqe = wqe; ++ uqp->quanta = quanta; ++ ++ memcpy(wqe, cache_hdr, SXE2_QP_WQE_MIN_SIZE); ++ ++ sxe2_fill_inline_data_buflist((__u8 *)wqe, 1, &buff, qp->swqe_polarity); ++ ++ get_64bit_val(wqe, 24, &hdr); ++ ++ SXE2_SET_FIELD(hdr, SXE2_WQE_INLINEDATAFLG, true); ++ SXE2_SET_FIELD(hdr, SXE2_WQE_INLINEDATALEN, length); ++ SXE2_SET_FIELD(hdr, SXE2_WQE_VALID, qp->swqe_polarity); ++ set_64bit_val(wqe, 24, hdr); ++ if (uqp->funid == SXE2_RDMA_SEND) { ++ sxe2_dump_wqe_hdr(uqp, &hdr, SXE2_RDMA_SEND_INLINE); ++ } else if (uqp->funid == SXE2_RDMA_WRITE) { ++ sxe2_dump_wqe_hdr(uqp, &hdr, SXE2_RDMA_WRITE_INLINE); ++ } ++} ++ ++void sxe2_wr_set_inline_data_list_rc_ud(struct ibv_qp_ex *qp_ex, ++ size_t num_buf, const struct ibv_data_buf *buf_list) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_qp_common *qp; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ size_t length = 0; ++ __le64 *wqe; ++ __le64 *cache_hdr; ++ __u32 wqe_idx; ++ __u16 quanta; ++ bool push_wqe; ++ __u64 hdr; ++ size_t i; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr set inline data list enter, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ ++ qp = &uqp->qp; ++ push_wqe = qp->push_db ? true : false; ++ cache_hdr = uqp->wqe_hdr; ++ if (!cache_hdr) { ++ SXE2_VERBS_LOG_WARN_BDF("wr cache hdr null, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ return; ++ } ++ for (i = 0; i < num_buf; i++) ++ length += buf_list[i].length; ++ ++ if (length > uqp->qp.max_inline_data) { ++ uqp->err = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF("wr length %zu exceed %u , qpn [%u]\n", ++ length, uqp->qp.max_inline_data, qp_ex->qp_base.qp_num); ++ return; ++ } ++ ++ quanta = sxe2_inline_to_quanta_cnt((__u32)length); ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, (__u32)length, ++ qp_ex->wr_id, push_wqe); ++ if (!wqe) { ++ uqp->err = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("wr no memory, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ return; ++ } ++ ++ uqp->wqe_idx = wqe_idx; ++ uqp->cur_wqe = wqe; ++ uqp->quanta = quanta; ++ ++ memcpy(wqe, cache_hdr, SXE2_QP_WQE_MIN_SIZE); ++ ++ sxe2_fill_inline_data_buflist((__u8 *)wqe, num_buf, buf_list, qp->swqe_polarity); ++ ++ get_64bit_val(wqe, 24, &hdr); ++ SXE2_SET_FIELD(hdr, SXE2_WQE_INLINEDATAFLG, true); ++ SXE2_SET_FIELD(hdr, SXE2_WQE_INLINEDATALEN, length); ++ SXE2_SET_FIELD(hdr, SXE2_WQE_VALID, qp->swqe_polarity); ++ set_64bit_val(wqe, 24, hdr); ++ ++ if (uqp->funid == SXE2_RDMA_SEND) { ++ sxe2_dump_wqe_hdr(uqp, &hdr, SXE2_RDMA_SEND_INLINE); ++ } else if (uqp->funid == SXE2_RDMA_WRITE) { ++ sxe2_dump_wqe_hdr(uqp, &hdr, SXE2_RDMA_WRITE_INLINE); ++ } ++} ++ ++void sxe2_wr_set_sge_rc_ud(struct ibv_qp_ex *qp_ex, uint32_t lkey, ++ uint64_t addr, uint32_t length) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_qp_common *qp; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ __le64 *cache_hdr; ++ __le64 *wqe; ++ bool push_wqe; ++ bool has_imme_data = false; ++ __u32 wqe_idx; ++ __u32 addl_frag_cnt; ++ __u16 quanta; ++ __u64 hdr; ++ int ret_code; ++ __u32 frag_cnt = 1; ++ __u64 frag_info = 0; ++ const struct ibv_sge sg_list = {.addr = addr, .length = length, .lkey = lkey}; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr set sge data enter, "\ ++ "qpn [%u] addr %" PRIu64 " len [%u]\n", ++ qp_ex->qp_base.qp_num, addr, length); ++ } ++ ++ qp = &uqp->qp; ++ push_wqe = qp->push_db ? true : false; ++ cache_hdr = uqp->wqe_hdr; ++ if (!cache_hdr) { ++ SXE2_VERBS_LOG_WARN_BDF("wr cache hdr null, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ return; ++ } ++ ++ get_64bit_val(cache_hdr, 24, &hdr); ++ has_imme_data = SXE2_GET_FIELD(SXE2_WQE_IMMEDATAFLG, hdr); ++ if (has_imme_data) { ++ frag_cnt = frag_cnt + 1; ++ } ++ ++ ret_code = sxe2_fragcnt_to_quanta_cnt(frag_cnt, &quanta); ++ if (ret_code) { ++ uqp->err = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF("wr sge num %u exceed, qpn [%u]\n", ++ frag_cnt, qp_ex->qp_base.qp_num); ++ return; ++ } ++ ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, length, ++ qp_ex->wr_id, push_wqe); ++ if (!wqe) { ++ uqp->err = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("wr no memory, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ return; ++ } ++ ++ uqp->wqe_idx = wqe_idx; ++ uqp->cur_wqe = wqe; ++ uqp->quanta = quanta; ++ ++ addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; ++ ++ memcpy(wqe, cache_hdr, SXE2_QP_WQE_MIN_SIZE); ++ ++ if (has_imme_data) { ++ sxe2_fill_sgelist_data(wqe, 32, &sg_list, qp->swqe_polarity); ++ sxe2_fill_sgelist_data(wqe, 48, NULL, qp->swqe_polarity); ++ } else { ++ sxe2_fill_sgelist_data(wqe, 0, &sg_list, qp->swqe_polarity); ++ } ++ ++ if (length == 0) { ++ get_64bit_val(wqe, 8, &frag_info); ++ frag_info = frag_info & (~SXE2_WQE_FRAG_VALID); ++ SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_VALID, !qp->swqe_polarity); ++ set_64bit_val(wqe, 8, frag_info); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr set frag_info [%#llx] field [%d]\n", ++ frag_info, !qp->swqe_polarity); ++ } ++ } ++ ++ SXE2_SET_FIELD(hdr, SXE2_WQE_ADDSGECNT, addl_frag_cnt); ++ SXE2_SET_FIELD(hdr, SXE2_WQE_VALID, qp->swqe_polarity); ++ set_64bit_val(wqe, 24, hdr); ++ sxe2_dump_wqe_hdr(uqp, &hdr, uqp->funid); ++} ++ ++void sxe2_wr_set_sge_list_rc_ud(struct ibv_qp_ex *qp_ex, ++ size_t num_sge, const struct ibv_sge *sg_list) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_qp_common *qp; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ __le64 *cache_hdr; ++ bool push_wqe; ++ bool has_imme_data = false; ++ __le64 *wqe; ++ __u32 i, wqe_idx, total_size = 0, byte_off; ++ int ret_code; ++ __u32 frag_cnt, addl_frag_cnt; ++ __u64 hdr; ++ __u16 quanta; ++ __u64 frag_info = 0; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ qp = &uqp->qp; ++ push_wqe = qp->push_db ? true : false; ++ cache_hdr = uqp->wqe_hdr; ++ if (!cache_hdr) { ++ SXE2_VERBS_LOG_WARN_BDF("wr cache hdr null, qpn [%u]\n", qp_ex->qp_base.qp_num); ++ return; ++ } ++ if (qp->max_sq_sge_cnt < num_sge) { ++ uqp->err = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF("wr sge num %zu exceed %u, qpn [%u]\n", ++ num_sge, qp->max_sq_sge_cnt, qp_ex->qp_base.qp_num); ++ return; ++ } ++ for (i = 0; i < num_sge; i++) { ++ total_size += sg_list[i].length; ++ } ++ get_64bit_val(cache_hdr, 24, &hdr); ++ has_imme_data = SXE2_GET_FIELD(SXE2_WQE_IMMEDATAFLG, hdr); ++ if (has_imme_data) { ++ frag_cnt = (__u32)(num_sge + 1); ++ } else { ++ frag_cnt = (__u32)num_sge; ++ } ++ ret_code = sxe2_fragcnt_to_quanta_cnt(frag_cnt, &quanta); ++ if (ret_code) { ++ uqp->err = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF("wr sge num %u exceed, qpn [%u]\n", ++ frag_cnt, qp_ex->qp_base.qp_num); ++ return; ++ } ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, ++ qp_ex->wr_id, push_wqe); ++ if (!wqe) { ++ uqp->err = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("wr no memory, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ return; ++ } ++ uqp->wqe_idx = wqe_idx; ++ uqp->cur_wqe = wqe; ++ uqp->quanta = quanta; ++ ++ memcpy(wqe, cache_hdr, SXE2_QP_WQE_MIN_SIZE); ++ ++ addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; ++ if (has_imme_data) { ++ i = 0; ++ } else { ++ sxe2_fill_sgelist_data(wqe, 0, sg_list, qp->swqe_polarity); ++ i = 1; ++ } ++ if (total_size == 0) { ++ get_64bit_val(wqe, 8, &frag_info); ++ frag_info = frag_info & (~SXE2_WQE_FRAG_VALID); ++ SXE2_SET_FIELD(frag_info, SXE2_WQE_FRAG_VALID, !qp->swqe_polarity); ++ set_64bit_val(wqe, 8, frag_info); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr set frag_info [%#llx] field [%d]\n", ++ frag_info, !qp->swqe_polarity); ++ } ++ } ++ ++ for (byte_off = 32; i < num_sge;) { ++ sxe2_fill_sgelist_data(wqe, byte_off, &sg_list[i], qp->swqe_polarity); ++ byte_off += 16; ++ i++; ++ } ++ ++ if (!(frag_cnt & 0x01) && frag_cnt) { ++ sxe2_fill_sgelist_data(wqe, byte_off, NULL, qp->swqe_polarity); ++ } ++ SXE2_SET_FIELD(hdr, SXE2_WQE_ADDSGECNT, addl_frag_cnt); ++ SXE2_SET_FIELD(hdr, SXE2_WQE_VALID, qp->swqe_polarity); ++ set_64bit_val(wqe, 24, hdr); ++ sxe2_dump_wqe_hdr(uqp, &hdr, uqp->funid); ++} ++ ++void sxe2_wr_bind_mw_rc(struct ibv_qp_ex *qp_ex, struct ibv_mw *mw, ++ uint32_t rkey, const struct ibv_mw_bind_info *bind_info) ++{ ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ __u64 hdr; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr bind mw unsupport, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ ++ uqp->funid = SXE2_RDMA_BIND_MW; ++ ++ uqp->err = EOPNOTSUPP; ++ ++ sxe2_dump_wqe_hdr(uqp, &hdr, uqp->funid); ++ return; ++} ++ ++void sxe2_wr_local_inv_rc(struct ibv_qp_ex *qp_ex, uint32_t invalidate_rkey) ++{ ++ struct sxe2_qp_common *qp; ++ struct sxe2_uqp *uqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ __le64 *cache_hdr; ++ bool push_wqe; ++ __u32 wqe_idx; ++ __le64 *wqe; ++ __u64 hdr; ++ __u16 quanta = SXE2_QP_WQE_MIN_QUANTA; ++ __u64 val = 0; ++ ++ uqp = container_of(qp_ex, struct sxe2_uqp, verbs_qp.qp_ex); ++ sctx = container_of(uqp->verbs_qp.qp_ex.qp_base.context, ++ struct sxe2_rdma_ucontext, ibv_ctx.context); ++ if (sctx->enable_io_log) { ++ SXE2_VERBS_LOG_DEBUG_BDF("wr local inv enter, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ } ++ ++ qp = &uqp->qp; ++ push_wqe = qp->push_db ? true : false; ++ cache_hdr = sxe2_init_wqe(uqp, qp_ex->wr_id, SXE2_OP_TYPE_LOCAL_INV, false); ++ if (!cache_hdr) { ++ return; ++ } ++ ++ wqe = sxe2_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, ++ 0, qp_ex->wr_id, push_wqe); ++ if (!wqe) { ++ uqp->err = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("wr no memory, qpn [%u]\n", ++ qp_ex->qp_base.qp_num); ++ return; ++ } ++ ++ get_64bit_val(cache_hdr, 24, &hdr); ++ ++ uqp->wqe_idx = wqe_idx; ++ uqp->cur_wqe = wqe; ++ uqp->quanta = quanta; ++ ++ SXE2_SET_FIELD(val, SXE2_WQE_REMOTEINVTAG, invalidate_rkey); ++ set_64bit_val(cache_hdr, 8, val); ++ ++ memcpy(wqe, cache_hdr, SXE2_QP_WQE_MIN_SIZE); ++ ++ SXE2_SET_FIELD(hdr, SXE2_WQE_VALID, qp->swqe_polarity); ++ set_64bit_val(wqe, 24, hdr); ++ uqp->funid = SXE2_RDMA_LOCAL_INV; ++ sxe2_dump_wqe_hdr(uqp, &hdr, uqp->funid); ++} ++ ++void sxe2_push_nop_wqe(struct sxe2_qp_common *qp) { ++ __le64 *wqe; ++ __u32 wqe_idx; ++ ++ wqe_idx = SXE2_RING_CURRENT_HEAD(qp->sq_ring); ++ if (!wqe_idx) { ++ qp->swqe_polarity = !qp->swqe_polarity; ++ } ++ wqe = qp->sq_base[wqe_idx].elem; ++ ++ qp->sq_wrtrk_array[wqe_idx].quanta = SXE2_QP_WQE_MIN_QUANTA; ++ ++ set_64bit_val(wqe, 0, 0); ++ set_64bit_val(wqe, 8, 0); ++ set_64bit_val(wqe, 16, 0); ++ ++ sxe2_set_nop_hdr(wqe, 24, qp); ++ SXE2_RING_MOVE_HEAD_NOCHECK(qp->sq_ring); ++ SXE2_RING_MOVE_TAIL(qp->sq_ring); ++} ++ +diff -Naur rdma-core-48.0.bak/providers/sxe2/io.h rdma-core-48.0/providers/sxe2/io.h +--- rdma-core-48.0.bak/providers/sxe2/io.h 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/io.h 2026-05-27 17:08:05.579104733 +0800 +@@ -0,0 +1,477 @@ ++ ++ ++#ifndef __IO_H__ ++#define __IO_H__ ++ ++#define SXE2_OP_TYPE_RDMA_WRITE 0x00 ++#define SXE2_OP_TYPE_RDMA_READ 0x01 ++#define SXE2_OP_TYPE_RSVD1 0x02 ++#define SXE2_OP_TYPE_SEND 0x03 ++#define SXE2_OP_TYPE_SEND_INV 0x04 ++#define SXE2_OP_TYPE_SEND_SOL 0x05 ++#define SXE2_OP_TYPE_SEND_SOL_INV 0x06 ++#define SXE2_OP_TYPE_RSVD2 0x07 ++#define SXE2_OP_TYPE_BIND_MW 0x08 ++#define SXE2_OP_TYPE_FAST_REG_MR 0x09 ++#define SXE2_OP_TYPE_LOCAL_INV 0x0a ++#define SXE2_OP_TYPE_RDMA_READ_INV 0x0b ++#define SXE2_OP_TYPE_NOP 0x0c ++#define SXE2_OP_TYPE_RDMA_WRITE_SOL 0x0d ++ ++#define SXE2_CQE_QTYPE_RQ 0 ++#define SXE2_CQE_QTYPE_SQ 1 ++ ++#define SXE2_MAX_FRAGCNT 16 ++#define SXE2_QP_WQE_MIN_SIZE 32 ++#define SXE2_QP_WQE_MAX_SIZE 256 ++#define SXE2_QP_MAX_INLINE_PER_QUANTA 31 ++ ++#define SXE2_SQ_RSVD 8 ++#define SXE2_RQ_RSVD 8 ++ ++#define SXE2_INLINE_VALID_S 7 ++#define HCA_CORE_CLOCK_500_MHZ 500 ++ ++#define SXE2_WQE_IMMEDATA GENMASK_ULL(31, 0) ++ ++#define SXE2_WQE_FRAG_LEN GENMASK_ULL(62, 32) ++#define SXE2_WQE_FRAG_STAG GENMASK_ULL(31, 0) ++#define SXE2_WQE_FRAG_VALID BIT_ULL(63) ++ ++#define SXE2_WQE_DESTQKEY GENMASK_ULL(31, 0) ++#define SXE2_WQE_DESTQPN GENMASK_ULL(55, 32) ++ ++#define SXE2_WQE_REMOTEINVTAG GENMASK_ULL(31, 0) ++#define SXE2_WQE_REMOTETAG GENMASK_ULL(31, 0) ++#define SXE2_WQE_AH_ID GENMASK_ULL(16, 0) ++#define SXE2_WQE_OPCODE GENMASK_ULL(37, 32) ++#define SXE2_WQE_ADDSGECNT GENMASK_ULL(41, 38) ++#define SXE2_WQE_REPORTRTT BIT_ULL(46) ++#define SXE2_WQE_IMMEDATAFLG BIT_ULL(47) ++#define SXE2_WQE_INLINEDATALEN GENMASK_ULL(55, 48) ++#define SXE2_WQE_PUSHWQE BIT_ULL(56) ++#define SXE2_WQE_INLINEDATAFLG BIT_ULL(57) ++#define SXE2_WQE_READFENCE BIT_ULL(60) ++#define SXE2_WQE_LOCALFENCE BIT_ULL(61) ++#define SXE2_WQE_SIGCOMPL BIT_ULL(62) ++#define SXE2_WQE_VALID BIT_ULL(63) ++ ++#define SXE2_WQE_MR_STAG GENMASK_ULL(63, 32) ++#define SXE2_WQE_MW_STAG GENMASK_ULL(31, 0) ++#define SXE2_WQE_MW_LEN GENMASK_ULL(45, 0) ++#define SXE2_WQE_ACCESSRIGHT GENMASK_ULL(52, 48) ++#define SXE2_WQE_VA_BASE_FLAG BIT_ULL(53) ++#define SXE2_WQE_MW_TYPE BIT_ULL(54) ++ ++#define SXE2_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20) ++#define SXE2_CQE_VALID BIT_ULL(63) ++#define SXE2_CQE_WQEIDX GENMASK_ULL(46, 32) ++ ++#define SXE2_SET_FIELD(origin, mask, val) \ ++ ((origin) |= FIELD_PREP(mask, val)) ++ ++#define SXE2_GET_FIELD(mask, val) \ ++ FIELD_GET(mask, val) ++ ++enum sxe2_addressing_type { ++ SXE2_ADDR_TYPE_ZERO_BASED = 0, ++ SXE2_ADDR_TYPE_VA_BASED = 1, ++}; ++ ++enum sxe2_cq_err{ ++ SXE2_CQ_OK = 0, ++ SXE2_CQ_NOENT = -2, ++}; ++ ++enum { ++ SXE2_CQ_DB_REQ_SOLICITED = 1 << 31, ++ SXE2_CQ_DB_REQ_NOSOLICITED = 0 << 31 ++}; ++ ++enum { ++ SXE2_CQ_SET_CI = 0, ++ SXE2_CQ_ARM_DB = 1, ++}; ++ ++enum { ++ SXE2_QP_RQ_PI = 0, ++ SXE2_QP_SQ_CI = 3, ++}; ++ ++enum sxe2_arm_type { ++ SXE2_CQ_ARM_NEXT = 0, ++ SXE2_CQ_ARM_SOLICITED = 1, ++}; ++ ++enum sxe2_major_opcode { ++ SXE2_SUCCESS = 0, ++ SXE2_LOCAL_LEN_ERR = 0x1, ++ SXE2_LOCAL_QP_OP_ERR = 0x2, ++ SXE2_RSV1_ERR, ++ SXE2_LOCAL_PROTECTION_ERR = 0x4, ++ SXE2_WR_FLUSH_ERR = 0x5, ++ SXE2_MW_BIND_ERR = 0x6, ++ SXE2_FAST_REGISTER_ERR = 0x7, ++ SXE2_INVALID_KEY_ERR = 0x8, ++ SXE2_BAD_RESPONSE_ERR = 0x10, ++ SXE2_LOCAL_ACCESS_ERR = 0x11, ++ SXE2_REMOTE_INVALID_REQUEST_ERR = 0x12, ++ SXE2_REMOTE_ACCESS_ERR = 0x13, ++ SXE2_REMOTE_OPERATION_ERR = 0x14, ++ SXE2_TRANS_RETRY_CNT_EXCEED_ERR = 0x15, ++ SXE2_RNR_RETRY_CNT_EXCEED_ERR = 0x16, ++ SXE2_ABORT_ERR = 0x22, ++ SXE2_DIF_CHECK_ERR = 0x23, ++ SXE2_SQ_FAKE_ERR = 0x24, ++}; ++ ++struct sxe2_post_send { ++ struct ibv_sge *sg_list; ++ __u32 num_sges; ++ __u32 qkey; ++ __u32 dest_qp; ++ __u32 ah_id; ++}; ++ ++struct sxe2_rdma_write { ++ struct ibv_sge *lo_sg_list; ++ struct ibv_sge rem_addr; ++ __u32 num_lo_sges; ++}; ++ ++struct sxe2_rdma_read { ++ struct ibv_sge *lo_sg_list; ++ struct ibv_sge rem_addr; ++ __u32 num_lo_sges; ++}; ++ ++struct sxe2_bind_window { ++ __u32 mr_stag; ++ __u32 mw_stag; ++ __u64 bind_len; ++ void *va; ++ enum sxe2_addressing_type addressing_type; ++ bool ena_reads : 1; ++ bool ena_writes : 1; ++ bool mem_window_type_1 : 1; ++}; ++ ++struct sxe2_local_invalidate { ++ __u32 target_stag; ++}; ++ ++struct sxe2_wr_info { ++ __u64 wr_id; ++ __u8 op_type; ++ __u8 l4len; ++ bool signaled : 1; ++ bool read_fence : 1; ++ bool local_fence : 1; ++ bool inline_data : 1; ++ bool imm_data_valid : 1; ++ bool push_wqe : 1; ++ bool report_rtt : 1; ++ bool udp_hdr : 1; ++ bool defer_flag : 1; ++ bool post_wqe : 1; ++ __u32 imm_data; ++ __u32 rkey_to_inv; ++ union { ++ struct sxe2_post_send send; ++ struct sxe2_rdma_write rdma_write; ++ struct sxe2_rdma_read rdma_read; ++ struct sxe2_bind_window bind_window; ++ struct sxe2_local_invalidate local_inval; ++ } op_info; ++ enum sxe2_disp_id funid; ++}; ++ ++struct sxe2_rq_info { ++ __u64 wr_id; ++ struct ibv_sge *sg_list; ++ __u32 num_sges; ++}; ++ ++struct sxe2_imme_data { ++ __le64 imme_data; ++}; ++ ++struct sxe2_frag_data { ++ __le64 tag_offset; ++ union { ++ struct { ++ __le64 stag : 32; ++ __le64 frag_len : 31; ++ __le64 frag_valid : 1; ++ } field; ++ __le64 val; ++ } offset8; ++}; ++ ++union sxe2_dqpn_data { ++ struct { ++ __le64 dest_qkey : 32; ++ __le64 dest_qpn : 24; ++ __le64 rsv0 : 8; ++ } field; ++ __le64 val; ++}; ++ ++struct sxe2_bindmw_info { ++ __le64 mw_va_base; ++ union { ++ struct { ++ __le64 mw_key : 32; ++ __le64 mr_key : 32; ++ } field; ++ __le64 val; ++ } offset8; ++ union { ++ struct { ++ __le64 mw_len : 46; ++ __le64 rsv0 : 18; ++ } field; ++ __le64 val; ++ } offset16; ++}; ++ ++union sxe2_send_hdr { ++ struct { ++ __u64 remote_inv_rkey : 32; ++ __u64 op : 6; ++ __u64 addfragcnt : 4; ++ __u64 rsvd1 : 4; ++ __u64 report_rtt : 1; ++ __u64 imme_data_flag : 1; ++ __u64 rsvd2 : 8; ++ __u64 push_wqe : 1; ++ __u64 inline_data_flag : 1; ++ __u64 rsvd3 : 1; ++ __u64 rsvd4 : 1; ++ __u64 read_fence : 1; ++ __u64 local_fence : 1; ++ __u64 signaled_completion : 1; ++ __u64 wqe_valid : 1; ++ } field; ++ __u64 val; ++}; ++ ++union sxe2_send_inline_hdr { ++ struct { ++ __u64 remote_inv_rkey : 32; ++ __u64 op : 6; ++ __u64 rsvd1 : 8; ++ __u64 report_rtt : 1; ++ __u64 imme_data_flag : 1; ++ __u64 inline_data_len : 8; ++ __u64 push_wqe : 1; ++ __u64 inline_data_flag : 1; ++ __u64 rsvd2 : 2; ++ __u64 read_fence : 1; ++ __u64 local_fence : 1; ++ __u64 signaled_completion : 1; ++ __u64 wqe_valid : 1; ++ } field; ++ __u64 val; ++}; ++ ++union sxe2_write_hdr { ++ struct { ++ __u64 remote_key : 32; ++ __u64 op : 6; ++ __u64 addfragcnt : 4; ++ __u64 rsvd1 : 4; ++ __u64 report_rtt : 1; ++ __u64 imme_data_flag : 1; ++ __u64 rsvd2 : 8; ++ __u64 push_wqe : 1; ++ __u64 inline_data_flag : 1; ++ __u64 rsvd3 : 2; ++ __u64 read_fence : 1; ++ __u64 local_fence : 1; ++ __u64 signaled_completion : 1; ++ __u64 wqe_valid : 1; ++ } field; ++ __u64 val; ++}; ++ ++union sxe2_write_inline_hdr { ++ struct { ++ __u64 remote_key : 32; ++ __u64 op : 6; ++ __u64 rsvd1 : 8; ++ __u64 report_rtt : 1; ++ __u64 imme_data_flag : 1; ++ __u64 inline_data_len : 8; ++ __u64 push_wqe : 1; ++ __u64 inline_data_flag : 1; ++ __u64 rsvd2 : 2; ++ __u64 read_fence : 1; ++ __u64 local_fence : 1; ++ __u64 signaled_completion : 1; ++ __u64 wqe_valid : 1; ++ } field; ++ __u64 val; ++}; ++ ++union sxe2_read_hdr { ++ struct { ++ __u64 remote_key : 32; ++ __u64 op : 6; ++ __u64 addfragcnt : 4; ++ __u64 rsvd1 : 4; ++ __u64 report_rtt : 1; ++ __u64 rsvd2 : 1; ++ __u64 rsvd3 : 8; ++ __u64 push_wqe : 1; ++ __u64 rsvd4 : 3; ++ __u64 read_fence : 1; ++ __u64 local_fence : 1; ++ __u64 signaled_completion : 1; ++ __u64 wqe_valid : 1; ++ } field; ++ __u64 val; ++}; ++ ++union sxe2_bindmw_hdr { ++ struct { ++ __u64 rsvd0 : 32; ++ __u64 op : 6; ++ __u64 rsvd1 : 10; ++ __u64 access : 5; ++ __u64 va_base_flag : 1; ++ __u64 mw_type : 1; ++ __u64 rsvd2 : 1; ++ __u64 push_wqe : 1; ++ __u64 rsvd3 : 3; ++ __u64 read_fence : 1; ++ __u64 local_fence : 1; ++ __u64 signaled_completion : 1; ++ __u64 wqe_valid : 1; ++ } field; ++ __u64 val; ++}; ++ ++union sxe2_inval_hdr { ++ struct { ++ __u64 rsvd0 : 32; ++ __u64 op : 6; ++ __u64 rsvd1 : 18; ++ __u64 push_wqe : 1; ++ __u64 rsvd3 : 3; ++ __u64 read_fence : 1; ++ __u64 local_fence : 1; ++ __u64 signaled_completion : 1; ++ __u64 wqe_valid : 1; ++ } field; ++ __u64 val; ++}; ++ ++union sxe2_fregmr_hdr { ++ struct { ++ __u64 consumer_key : 8; ++ __u64 mr_index : 24; ++ __u64 op : 6; ++ __u64 log_entity_size : 5; ++ __u64 rsvd1 : 5; ++ __u64 access : 5; ++ __u64 va_base_flag : 1; ++ __u64 pbl_mode : 2; ++ __u64 push_wqe : 1; ++ __u64 rsvd3 : 3; ++ __u64 read_fence : 1; ++ __u64 local_fence : 1; ++ __u64 signaled_completion : 1; ++ __u64 wqe_valid : 1; ++ } field; ++ __u64 val; ++}; ++ ++union sxe2_rq_hdr { ++ struct { ++ __u64 rsv0 : 38; ++ __u64 addfragcnt : 4; ++ __u64 rsv1 : 21; ++ __u64 wqe_valid : 1; ++ } field; ++ __u64 val; ++}; ++ ++union sxe2_nop_hdr { ++ struct { ++ __u64 rsv0 : 32; ++ __u64 op : 6; ++ __u64 addfragcnt : 4; ++ __u64 rsv1 : 14; ++ __u64 push_wqe : 1; ++ __u64 rsv2 : 3; ++ __u64 read_fence : 1; ++ __u64 local_fence : 1; ++ __u64 signaled_completion : 1; ++ __u64 wqe_valid : 1; ++ } field; ++ __u64 val; ++}; ++ ++enum sxe2_qp_caps { ++ SXE2_WRITE_WITH_IMM = 1, ++ SXE2_SEND_WITH_IMM = 2, ++ SXE2_LLWQE_MODE = 8, ++}; ++ ++enum sxe2_qp_wqe_size { ++ SXE2_WQE_SIZE_32 = 32, ++ SXE2_WQE_SIZE_64 = 64, ++ SXE2_WQE_SIZE_96 = 96, ++ SXE2_WQE_SIZE_128 = 128, ++ SXE2_WQE_SIZE_256 = 256, ++}; ++ ++static inline void set_64bit_val(__le64 *wqe_words, __u32 byte_index, __u64 val) ++{ ++ wqe_words[byte_index >> 3] = htole64(val); ++} ++ ++static inline void set_32bit_val(__le32 *wqe_words, __u32 byte_index, __u32 val) ++{ ++ wqe_words[byte_index >> 2] = htole32(val); ++} ++ ++static inline void get_64bit_val(__le64 *wqe_words, __u32 byte_index, ++ __u64 *val) ++{ ++ *val = le64toh(wqe_words[byte_index >> 3]); ++} ++ ++static inline void get_32bit_val(__le32 *wqe_words, __u32 byte_index, ++ __u32 *val) ++{ ++ *val = le32toh(wqe_words[byte_index >> 2]); ++} ++ ++static inline void db_wr32(__u32 val, __u32 *wqe_word) ++{ ++ *wqe_word = val; ++} ++ ++typedef int (*rdma_disp_func)(struct sxe2_qp_common *qp, ++ struct sxe2_wr_info *wr_info, bool post_sq); ++int sxe2_hw_send(struct sxe2_qp_common *qp, struct sxe2_wr_info *wr_info, ++ bool post_sq); ++int sxe2_hw_inline_send(struct sxe2_qp_common *qp, ++ struct sxe2_wr_info *wr_info, bool post_sq); ++int sxe2_hw_rdma_write(struct sxe2_qp_common *qp, ++ struct sxe2_wr_info *wr_info, bool post_sq); ++int sxe2_hw_inline_rdma_write(struct sxe2_qp_common *qp, ++ struct sxe2_wr_info *wr_info, bool post_sq); ++int sxe2_hw_rdma_read(struct sxe2_qp_common *qp, ++ struct sxe2_wr_info *wr_info, bool post_sq); ++int sxe2_hw_mw_bind(struct sxe2_qp_common *qp, struct sxe2_wr_info *wr_info, ++ bool post_sq); ++int sxe2_hw_local_invalidate(struct sxe2_qp_common *qp, ++ struct sxe2_wr_info *wr_info, bool post_sq); ++void sxe2_clean_cqes(struct sxe2_qp_common *qp, ++ struct sxe2_ucq *vendor_cq, int cq_type); ++void sxe2_push_nop_wqe(struct sxe2_qp_common *qp); ++ ++#endif +diff -Naur rdma-core-48.0.bak/providers/sxe2/log.c rdma-core-48.0/providers/sxe2/log.c +--- rdma-core-48.0.bak/providers/sxe2/log.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/log.c 2026-05-27 17:08:05.579104733 +0800 +@@ -0,0 +1,468 @@ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "log.h" ++ ++#define SXE2_LOG_EXTEND_INFO "SXE2_LOG_EXTEND_INFO" ++#define SXE2_LOG_FILE_SIZE "SXE2_LOG_FILE_SIZE" ++#define SXE2_LOG_SERIAL "SXE2_LOG_SERIAL" ++#define SXE2_LOG_COVER_WRITE "SXE2_LOG_COVER_WRITE" ++ ++#define SXE2_LOG_PATH "/var/log/" ++#define SXE2_LOG_FILE_NAME "sxe2_rdma_file_current.log" ++#define SXE2_LOG_FLOCK_NAME "sxe2_rdma_file_log.flock" ++#define CONCAT(a, b) a##b ++ ++#define SXE2_LOG_DEFAULT_DEBUG_FILE_SIZE \ ++ (800) ++#define SXE2_LOG_DEFAULT_RELEASE_FILE_SIZE \ ++ (300) ++ ++log_mgr_s g_log_mgr = {false, NULL, 200, false, false, -1, -1, false}; ++pthread_mutex_t g_log_mutex; ++ ++log_mgr_s *log_mgr_point_get(void) ++{ ++ return &g_log_mgr; ++} ++ ++void log_set_serial_switch(bool on) ++{ ++ log_mgr_s *p_log_mgr = NULL; ++ ++ p_log_mgr = log_mgr_point_get( ); ++ p_log_mgr->serial_on = on; ++ return; ++} ++ ++int log_init0(bool is_ut) ++{ ++ char *env = NULL; ++ int file_size; ++ int rc; ++ ++ env = getenv(SXE2_LOG_EXTEND_INFO); ++ if (env) { ++ if (0 == strncmp(env, "true", 4)) { ++ g_log_mgr.extend = true; ++ } else { ++ g_log_mgr.extend = false; ++ } ++ } ++ ++ env = getenv(SXE2_LOG_SERIAL); ++ if (env) { ++ if (0 == strncmp(env, "true", 4)) { ++ g_log_mgr.serial_on = true; ++ } else { ++ g_log_mgr.serial_on = false; ++ } ++ } ++ ++ env = getenv(SXE2_LOG_COVER_WRITE); ++ if (env) { ++ if (0 == strncmp(env, "true", 4)) { ++ g_log_mgr.file_cover_write_flag = true; ++ } else { ++ g_log_mgr.file_cover_write_flag = false; ++ } ++ } ++ ++#if defined(SXE2_CFG_DEBUG) ++ g_log_mgr.file_size = SXE2_LOG_DEFAULT_DEBUG_FILE_SIZE; ++#else ++ g_log_mgr.file_size = SXE2_LOG_DEFAULT_RELEASE_FILE_SIZE; ++#endif ++ ++ env = getenv(SXE2_LOG_FILE_SIZE); ++ if (env) { ++ file_size = atoi(env); ++ if (file_size > 0) { ++ g_log_mgr.file_size = file_size; ++ } ++ } ++ ++ if (g_log_mgr.fd == -1) { ++ if (is_ut) { ++ g_log_mgr.is_ut = true; ++ g_log_mgr.fd = ++ open(SXE2_LOG_FILE_NAME, ++ O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); ++ } else { ++ g_log_mgr.is_ut = false; ++ g_log_mgr.fd = ++ open(SXE2_LOG_PATH SXE2_LOG_FILE_NAME, ++ O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); ++ } ++ if (g_log_mgr.fd == -1) { ++ perror("open"); ++ } ++ } ++ ++ if (g_log_mgr.lockfd == -1) { ++ if (is_ut) { ++ g_log_mgr.lockfd = ++ open(SXE2_LOG_FLOCK_NAME, O_WRONLY | O_CREAT, 0666); ++ } else { ++ g_log_mgr.lockfd = open(SXE2_LOG_PATH SXE2_LOG_FLOCK_NAME, ++ O_WRONLY | O_CREAT, 0666); ++ } ++ if (g_log_mgr.fd == -1) { ++ perror("open"); ++ } ++ } ++ ++ rc = pthread_mutex_init(&g_log_mutex, NULL); ++ if (rc != 0) { ++ perror("mutex_init"); ++ } ++ ++ return 0; ++} ++ ++int log_uninit0(void) ++{ ++ if (g_log_mgr.fd != -1) { ++ close(g_log_mgr.fd); ++ g_log_mgr.fd = -1; ++ } ++ ++ if (g_log_mgr.lockfd != -1) { ++ close(g_log_mgr.lockfd); ++ g_log_mgr.lockfd = -1; ++ } ++ ++ pthread_mutex_destroy(&g_log_mutex); ++ ++ return 0; ++} ++ ++static inline bool log_file_lock(int fd) ++{ ++ int rc = flock(fd, LOCK_EX); ++ return (bool)(rc == 0); ++} ++ ++static inline bool log_file_unlock(int fd) ++{ ++ int rc = flock(fd, LOCK_UN); ++ return (bool)(rc == 0); ++} ++ ++void log_file_name_set(char *p_name) ++{ ++ struct timeval tv; ++ time_t time_stamp_now; ++ struct tm *p_time_now; ++ long usec; ++ ++ gettimeofday(&tv, NULL); ++ usec = tv.tv_usec; ++ time_stamp_now = tv.tv_sec; ++ p_time_now = localtime(&time_stamp_now); ++ ++ if (g_log_mgr.is_ut) { ++ sprintf(p_name, ++ "sxe2_rdma_file_%04d_%02d_%02d-%02d_%02d_%02d.%06ld.log", ++ p_time_now->tm_year + 1900, p_time_now->tm_mon + 1, ++ p_time_now->tm_mday, p_time_now->tm_hour, p_time_now->tm_min, ++ p_time_now->tm_sec, usec); ++ } else { ++ sprintf(p_name, ++ SXE2_LOG_PATH ++ "sxe2_rdma_file_%04d_%02d_%02d-%02d_%02d_%02d.%06ld.log", ++ p_time_now->tm_year + 1900, p_time_now->tm_mon + 1, ++ p_time_now->tm_mday, p_time_now->tm_hour, p_time_now->tm_min, ++ p_time_now->tm_sec, usec); ++ } ++ ++ return; ++} ++ ++void log_file_store(const char *p_name) ++{ ++ char new_name[35] = {0}; ++ log_file_name_set(new_name); ++ (void)rename(p_name, new_name); ++ return; ++} ++ ++void log_file_not_cover_write(void) ++{ ++ int new_fd = -1; ++ int old_fd = g_log_mgr.fd; ++ struct stat st; ++ if (fstat(old_fd, &st) < 0 || (st.st_size >> 20) < g_log_mgr.file_size) { ++ goto end; ++ } ++ ++ if (g_log_mgr.is_ut) { ++ new_fd = open(SXE2_LOG_FILE_NAME, ++ O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); ++ } else { ++ new_fd = open(SXE2_LOG_PATH SXE2_LOG_FILE_NAME, ++ O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); ++ } ++ if (new_fd < 0) { ++ goto end; ++ } ++ ++ if (fstat(new_fd, &st) < 0 || (st.st_size >> 20) < g_log_mgr.file_size) { ++ g_log_mgr.fd = new_fd; ++ close(old_fd); ++ goto end; ++ } ++ close(new_fd); ++ ++ if (g_log_mgr.is_ut) { ++ log_file_store(SXE2_LOG_FILE_NAME); ++ } else { ++ log_file_store(SXE2_LOG_PATH SXE2_LOG_FILE_NAME); ++ } ++ ++ if (g_log_mgr.is_ut) { ++ new_fd = open(SXE2_LOG_FILE_NAME, ++ O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); ++ } else { ++ new_fd = open(SXE2_LOG_PATH SXE2_LOG_FILE_NAME, ++ O_APPEND | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); ++ } ++ if (new_fd < 0) { ++ goto end; ++ } ++ g_log_mgr.fd = new_fd; ++ ++ close(old_fd); ++end: ++ return; ++} ++ ++void log_file_cover_write(void) ++{ ++ int old_fd = -1, new_fd = -1; ++ old_fd = g_log_mgr.fd; ++ struct stat st; ++ if (fstat(g_log_mgr.fd, &st) < 0 || ++ (st.st_size >> 20) < g_log_mgr.file_size) { ++ goto end; ++ } ++ ++ if (g_log_mgr.is_ut) { ++ new_fd = ++ open(SXE2_LOG_FILE_NAME, ++ O_APPEND | O_TRUNC | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); ++ } else { ++ new_fd = ++ open(SXE2_LOG_PATH SXE2_LOG_FILE_NAME, ++ O_APPEND | O_TRUNC | O_WRONLY | O_CREAT | O_NONBLOCK, 0666); ++ } ++ if (new_fd < 0) { ++ goto end; ++ } ++ g_log_mgr.fd = new_fd; ++ close(old_fd); ++end: ++ return; ++} ++ ++int log_file_rotate(void) ++{ ++ int ret = 0; ++ __u64 fileSize; ++ struct stat st; ++ sigset_t mask, oldmask; ++ if (fstat(g_log_mgr.fd, &st) == -1) { ++ perror("fstat"); ++ ret = -1; ++ goto end; ++ } ++ ++ fileSize = st.st_size >> 20; ++ if (fileSize >= (__u64)g_log_mgr.file_size) { ++ if (sigfillset(&mask) == -1) { ++ perror("sigfillset"); ++ ret = -1; ++ goto end; ++ } ++ ++ if (pthread_sigmask(SIG_BLOCK, &mask, &oldmask) != 0) { ++ perror("pthread_sigmask"); ++ ret = -1; ++ goto end; ++ } ++ ++ log_file_lock(g_log_mgr.lockfd); ++ ++ if (g_log_mgr.file_cover_write_flag) { ++ log_file_cover_write( ); ++ } else { ++ log_file_not_cover_write( ); ++ } ++ ++ if (g_log_mgr.fd < 0) { ++ ret = -1; ++ } ++ ++ log_file_unlock(g_log_mgr.lockfd); ++ ++ if (pthread_sigmask(SIG_SETMASK, &oldmask, NULL) != 0) { ++ perror("pthread_sigmask"); ++ ret = -1; ++ } ++ } ++end: ++ return ret; ++} ++ ++__u64 sxe2_current_time_us_get(void) ++{ ++ struct timeval tv = {0}; ++ gettimeofday(&tv, NULL); ++ return (SXE2_SEC2US(tv.tv_sec) + tv.tv_usec); ++} ++ ++__u32 log_time_to_str(char *p_buf, __u32 buf_size, struct timeval *p_time) ++{ ++ struct tm time; ++ __u32 use_len; ++ ++ (void)localtime_r(&p_time->tv_sec, &time); ++ ++ use_len = (__u32)strftime(p_buf, buf_size, "[%Y/%m/%d.%H:%M:%S", &time); ++ use_len += snprintf(p_buf + use_len, buf_size - use_len, ".%06lu]", ++ p_time->tv_usec); ++ ++ return use_len; ++} ++ ++__u32 log_format_prefix(FILE *p_stream, char *p_buf, __u32 buf_size, ++ struct timeval *p_time_stamp, const char *bdf, ++ __u32 level, const char *p_func_name, __u32 line) ++{ ++ static const char *log_level_str[LOG_LEVEL_MAX] = { ++ "INVALID", "FATAL", "ERROR", "WARN", "INFO", "DEBUG", "TRACE", ++ }; ++ __u32 use_len = 0; ++ int fd; ++ if (p_stream == NULL) { ++ fd = g_log_mgr.fd; ++ } else { ++ fd = fileno(p_stream); ++ } ++ ++ use_len = log_time_to_str(p_buf, buf_size, p_time_stamp); ++ if (g_log_mgr.extend) { ++ use_len += snprintf(p_buf + use_len, buf_size - use_len, "[%s][%d][%d]", ++ log_level_str[level], (int)getpid( ), fd); ++ } else { ++ use_len += snprintf(p_buf + use_len, buf_size - use_len, "[%s]", ++ log_level_str[level]); ++ } ++ ++ if (NULL != bdf) { ++ use_len += snprintf(p_buf + use_len, buf_size - use_len, "[%s][%s:%u]", ++ bdf, p_func_name, line); ++ } else { ++ use_len += snprintf(p_buf + use_len, buf_size - use_len, "[%s:%u]", ++ p_func_name, line); ++ } ++ ++ return use_len; ++} ++ ++__u32 log_buff_fill(FILE *p_stream, char *p_buff, __u32 buff_size, ++ struct timeval *p_time_stamp, const char *bdf, __u32 level, ++ const char *p_func_name, __u32 line, const char *p_format, ++ va_list va_list) ++{ ++ log_hdr_s *p_log_hdr = NULL; ++ char *p_data_buf; ++ __u32 log_data_cap; ++ __u32 use_len; ++ ++ p_log_hdr = (log_hdr_s *)p_buff; ++ p_data_buf = p_buff + sizeof(log_hdr_s); ++ log_data_cap = ++ buff_size - sizeof(log_hdr_s) - 1; ++ ++ p_log_hdr->ts = SXE2_SEC2US(p_time_stamp->tv_sec) + p_time_stamp->tv_usec; ++ p_log_hdr->magic = LOG_MAGIC; ++ ++ use_len = log_format_prefix(p_stream, p_data_buf, log_data_cap, ++ p_time_stamp, bdf, level, p_func_name, line); ++ use_len += vsnprintf(p_data_buf + use_len, log_data_cap - use_len, p_format, ++ va_list); ++ use_len = use_len > log_data_cap - 1 ? log_data_cap - 1 : use_len; ++ if (p_data_buf[use_len - 1] != '\n') { ++ use_len += sprintf(p_data_buf + use_len, "\n"); ++ } ++ p_log_hdr->len = (__u16)use_len; ++ ++ return use_len; ++} ++ ++void log_store(FILE *p_stream, const char *bdf, log_level_e level, ++ const char *p_file, const char *p_func_name, __u32 line, ++ const char *p_format, ...) ++{ ++ struct timeval time_stamp; ++ __u64 us_now; ++ char *local_log_buf = malloc(LOG_LINE_MAX); ++ __u32 use_len; ++ va_list va_list; ++ ssize_t ret = 0; ++ char *p_data_buf = local_log_buf + sizeof(log_hdr_s); ++ int fd; ++ ++ if ((p_func_name == NULL) || (p_format == NULL)) { ++ goto end; ++ } ++ ++ us_now = sxe2_current_time_us_get( ); ++ time_stamp.tv_sec = (__time_t)SXE2_US2SEC(us_now); ++ time_stamp.tv_usec = (__suseconds_t)(us_now - SXE2_SEC2US(time_stamp.tv_sec)); ++ va_start(va_list, p_format); ++ use_len = log_buff_fill(p_stream, local_log_buf, LOG_LINE_MAX, &time_stamp, ++ bdf, level, p_func_name, line, p_format, va_list); ++ va_end(va_list); ++ ++ if (NULL == p_stream) { ++ if (g_log_mgr.fd == -1) { ++ goto err; ++ } ++ ++ if (log_file_rotate( ) < 0) { ++ goto err; ++ } ++ ++ ret = write(g_log_mgr.fd, p_data_buf, use_len); ++ if (ret < 1) { ++ perror("write"); ++ } ++ } else { ++ fd = fileno(p_stream); ++ ret = write(fd, p_data_buf, use_len); ++ if (ret < 1) { ++ perror("write"); ++ } ++ } ++ ++err: ++ if (g_log_mgr.serial_on) { ++ printf("%s", p_data_buf); ++ } ++end: ++ (void)p_file; ++ free(local_log_buf); ++ return; ++} +diff -Naur rdma-core-48.0.bak/providers/sxe2/log.h rdma-core-48.0/providers/sxe2/log.h +--- rdma-core-48.0.bak/providers/sxe2/log.h 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/log.h 2026-05-27 17:08:05.579104733 +0800 +@@ -0,0 +1,245 @@ ++ ++#ifndef _SXE2_LOG_H_ ++#define _SXE2_LOG_H_ ++ ++#include ++#include ++#include ++#include ++ ++#define SXE2_LOG_FD_NULL NULL ++#define SXE2_LOG_FILE_NULL NULL ++#define LOG_LINE_MAX (2048) ++#define LOG_MAGIC (0xBAAB) ++ ++#define SXE2_SEC2US(_sec) ((_sec)*1000000ULL) ++#define SXE2_US2SEC(_us) ((_us)/1000000ULL) ++ ++#define SXE2_LOG_IOLOG_ON "SXE2_LOG_IOLOG_SWITCH" ++#define SXE2_VERBS_LOG_LEVEL "SXE2_RDMA_LOG_LEVEL" ++#define SXE2_INVALID_BDF "ff:ff.f" ++ ++typedef struct log_hdr{ ++ __u64 ts; ++ __u32 pad; ++ __u16 len; ++ __u16 magic; ++}log_hdr_s; ++ ++typedef enum log_level { ++ LOG_LEVEL_INVALID, ++ LOG_LEVEL_FATAL, ++ LOG_LEVEL_ERROR, ++ LOG_LEVEL_WARN, ++ LOG_LEVEL_INFO, ++ LOG_LEVEL_DEBUG, ++ LOG_LEVEL_TRACE, ++ LOG_LEVEL_MAX, ++} log_level_e; ++ ++typedef struct log_persist_ops { ++ void (*pre_persist)(void *persist_mgr); ++ bool (*checkPersistSpace)(void *persist_mgr, __u32 persist_threshold); ++ int (*flushInBlock)(void *persist_mgr, __u8 *buf, __u32 len); ++ int (*flushOne)(void *persist_mgr, __u8 *buf, __u32 len); ++ void (*postPersist)(void *persist_mgr); ++ void (*printPersistInfo)(void *persist_mgr); ++ int (*forcedExport)(void *persist_mgr); ++ int (*forcedSync)(void *persist_mgr); ++}log_persist_ops_s; ++ ++typedef struct log_mgr { ++ bool serial_on; ++ log_persist_ops_s *persist_ops; ++ int file_size; ++ bool file_cover_write_flag; ++ bool extend; ++ int fd; ++ int lockfd; ++ bool is_ut; ++} log_mgr_s; ++ ++log_mgr_s *log_mgr_point_get(void); ++ ++int log_init0(bool is_ut); ++ ++int log_uninit0(void); ++ ++void log_set_serial_switch(bool on); ++ ++void log_file_name_set(char *p_name); ++ ++void log_file_store(const char *p_name); ++ ++void log_file_not_cover_write(void); ++ ++void log_file_cover_write(void); ++ ++int log_file_rotate(void); ++ ++__u64 sxe2_current_time_us_get(void); ++ ++__u32 log_time_to_str(char *p_buf, __u32 buf_size, struct timeval *p_time); ++ ++__u32 log_format_prefix(FILE *p_stream, char *p_buf, __u32 buf_size, ++ struct timeval *p_time_stamp, const char *bdf, ++ __u32 level, const char *p_func_name, __u32 line); ++ ++__u32 log_buff_fill(FILE *p_stream, char *p_buff, __u32 buff_size, ++ struct timeval *p_time_stamp, const char *bdf, __u32 level, ++ const char *p_func_name, __u32 line, const char *p_format, ++ va_list va_list); ++ ++void log_store(FILE *p_stream, const char *bdf, ++ log_level_e level, const char *p_file, const char *p_func_name, __u32 line, ++ const char *p_format, ...) __attribute__((format(printf, 7, 8))); ++ ++#define SXE2_LOG_STORE(p_stream, bdf, level, fmt, ...) \ ++ do { \ ++ log_store(p_stream, bdf, level, \ ++ (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) \ ++ : __FILE__), \ ++ __func__, __LINE__, fmt, ##__VA_ARGS__); \ ++ } while (0) ++ ++#define SXE2_LOG_DEV_INVALID(p_stream, bdf, fmt, ...) \ ++ SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_INVALID, fmt, ##__VA_ARGS__) ++ ++#define SXE2_LOG_DEV_FATAL(p_stream, bdf, fmt, ...) \ ++ SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_FATAL, fmt, ##__VA_ARGS__) ++ ++#define SXE2_LOG_DEV_ERROR(p_stream, bdf, fmt, ...) \ ++ SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__) ++ ++#define SXE2_LOG_DEV_WARN(p_stream, bdf, fmt, ...) \ ++ SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_WARN, fmt, ##__VA_ARGS__) ++ ++#define SXE2_LOG_DEV_INFO(p_stream, bdf, fmt, ...) \ ++ SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_INFO, fmt, ##__VA_ARGS__) ++ ++#define SXE2_LOG_DEV_DEBUG(p_stream, bdf, fmt, ...) \ ++ SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) ++ ++#define SXE2_LOG_DEV_TRACE(p_stream, bdf, fmt, ...) \ ++ SXE2_LOG_STORE(p_stream, bdf, LOG_LEVEL_TRACE, fmt, ##__VA_ARGS__) ++ ++#define SXE2_VERBS_LOG_FATAL(fmt, ...) \ ++ do { \ ++ (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ ++ (void)printf("\n"); \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_ERROR(fmt, ...) \ ++ do { \ ++ (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ ++ (void)printf("\n"); \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_WARN(fmt, ...) \ ++ do { \ ++ (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ ++ (void)printf("\n"); \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_INFO(fmt, ...) \ ++ do { \ ++ (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ ++ (void)printf("\n"); \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_DEBUG(fmt, ...) \ ++ do { \ ++ (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ ++ (void)printf("\n"); \ ++ } while (0) ++#define SXE2_VERBS_LOG_TRACE(fmt, ...) \ ++ do { \ ++ (void)printf("%s: " fmt, __func__, ##__VA_ARGS__); \ ++ (void)printf("\n"); \ ++ } while (0) ++ ++#if defined SXE2_CFG_DEBUG ++ ++#define SXE2_VERBS_LOG_FATAL_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_FATAL) { \ ++ SXE2_LOG_DEV_FATAL(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_ERROR_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_ERROR) { \ ++ SXE2_LOG_DEV_ERROR(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_WARN_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_WARN) { \ ++ SXE2_LOG_DEV_WARN(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_INFO_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_INFO) { \ ++ SXE2_LOG_DEV_INFO(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_DEBUG_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_DEBUG) { \ ++ SXE2_LOG_DEV_DEBUG(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_TRACE_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_TRACE) { \ ++ SXE2_LOG_DEV_TRACE(sctx->dbg_fp, sctx->bdf, fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#else ++ ++#define SXE2_VERBS_LOG_FATAL_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_FATAL) \ ++ SXE2_VERBS_LOG_FATAL(fmt, ##__VA_ARGS__); \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_ERROR_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_ERROR) \ ++ SXE2_VERBS_LOG_ERROR(fmt, ##__VA_ARGS__); \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_WARN_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_WARN) \ ++ SXE2_VERBS_LOG_WARN(fmt, ##__VA_ARGS__); \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_INFO_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_INFO) \ ++ SXE2_VERBS_LOG_INFO(fmt, ##__VA_ARGS__); \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_DEBUG_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_DEBUG) \ ++ SXE2_VERBS_LOG_DEBUG(fmt, ##__VA_ARGS__); \ ++ } while (0) ++ ++#define SXE2_VERBS_LOG_TRACE_BDF(fmt, ...) \ ++ do { \ ++ if (sctx && sctx->log_level >= LOG_LEVEL_TRACE) \ ++ SXE2_VERBS_LOG_TRACE(fmt, ##__VA_ARGS__); \ ++ } while (0) ++ ++#endif ++ ++#endif +diff -Naur rdma-core-48.0.bak/providers/sxe2/Makefile rdma-core-48.0/providers/sxe2/Makefile +--- rdma-core-48.0.bak/providers/sxe2/Makefile 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/Makefile 2026-05-27 17:08:05.579104733 +0800 +@@ -0,0 +1,80 @@ ++################################################################################ ++# Copyright (C) ++################################################################################ ++#(1) 固定模式. 定义路径信息 ++################################################################################ ++#注: MD_TOPDIR在Makefile.define将被使用, 因此需要预先定义 ++#考虑到内核模块编译, Makefile的调用路径与存储路径不一致, 因此不可以$(shell pwd)方式获取路径. ++TOPDIR_MP :=. ++MAKEPATH := $(abspath $(lastword $(MAKEFILE_LIST))) ++CURDIR := $(shell dirname $(MAKEPATH)) ++TOPDIR := $(abspath $(CURDIR)/$(TOPDIR_MP)) ++MD_TOPDIR := $(TOPDIR) ++MT_TOPDIR := $(TOPDIR) ++ ++################################################################################# ++#(2) 自定义模式 ++################################################################################# ++ ++###################################### ++#(2.1) 定义模块基本信息 ++####################################### ++ ++SRCS := $(wildcard *.c) ++OBJS := $(patsubst %.c,%.o,$(SRCS)) ++OBJ_S := libsxe2-rdmav34.a ++#CMR_INCS := -I$(INCS_ARCH) ++#CMR_INCS += -I$(INCS_ARCH)/cmr/ ++CMR_INCS += -I/usr/include/libnl3 ++CMR_INCS += -I/usr/include/drm ++CMR_INCS += -I$(TOPDIR)/../rdma-core-46.0/build/include/ ++LIBS_DIR := $(TOPDIR)/../rdma-core-46.0/build/lib/libibverbs.so ++CCAN_DIR := $(TOPDIR)/../rdma-core-46.0/build/ccan/libccan_pic.a ++UTIL_DIR := $(TOPDIR)/../rdma-core-46.0/build/util/librdma_util_pic.a ++CMR_LIBS := -lpthread ++#LIBS_DIR += $(TOPDIR_MP)/lib/lib_shca_libbase_x86_64.a ++ ++CFLAGS := -g -ggdb #支持调试信息 ++CFLAGS += -Wall ++#CFLAGS += -Werror #打开gcc所有告警选项,并将告警当作错误信息 ++CFLAGS += -Wextra #打开gcc额外告警选项 ++CFLAGS += -frecord-gcc-switches #后续可通过命令“readelf -p .GCC.command.line”查看编译选项参数 ++CFLAGS += -fprofile-arcs -ftest-coverage #启用gcover统计代码覆盖率 ++ ++VERBS_CFLAGS := $(CFLAGS) #verbs使用的cflags ++VERBS_CFLAGS += -std=gnu11 #使用gnu11编译 ++VERBS_CFLAGS += -Wno-sign-compare #临时关闭 ++VERBS_CFLAGS += -Wno-unused-parameter #临时关闭 ++VERBS_CFLAGS += -Wno-missing-field-initializers #临时关闭 ++####################################### ++#(2.2) 定义执行目标 ++####################################### ++ ++all:$(OBJS) ++ ar -cr -o $(OBJ_S) $^ ++ ranlib $(OBJ_S) ++ ++$(OBJS):%.o:%.c ++ gcc $(VERBS_CFLAGS) $(CMR_INCS) -c -o $@ $< $(CMR_LIBS) ++ ++clean: ++ @rm -rf *.o ut *.gcno *.gcda *.a ++ @rm -rf $(TOPDIR)/rdma-core-46.0/build/* ++ @rm -rf $(TOPDIR)/bin/verbs/* ++# @-$(MAKE) clean -C $(TOPDIR)/lib ++ @rm -rf logfile.txt result.txt ++ @echo all files made removed ++ ++#makefile帮助文档 ++help: ++ @echo Usage: ++ @echo "The following are some of the valid targets for this Makefile:" ++ @echo " all: Compile the whole project" ++ @echo " clean: Clean Compile file" ++ @echo "" ++.PHONY:all clean help liba ++%: ++ @-$(warning MAKE target needed, please get help from command 'make help') ++ @$(MAKE) help ++ ++.DEFAULT_GOAL=all +diff -Naur rdma-core-48.0.bak/providers/sxe2/mc.c rdma-core-48.0/providers/sxe2/mc.c +--- rdma-core-48.0.bak/providers/sxe2/mc.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/mc.c 2026-05-27 17:08:05.580104739 +0800 +@@ -0,0 +1,13 @@ ++ ++#include "verbs.h" ++#include "sxe2_common.h" ++ ++int sxe2_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, __u16 lid) ++{ ++ return ibv_cmd_attach_mcast(qp, gid, lid); ++} ++ ++int sxe2_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, __u16 lid) ++{ ++ return ibv_cmd_detach_mcast(qp, gid, lid); ++} +diff -Naur rdma-core-48.0.bak/providers/sxe2/mr.c rdma-core-48.0/providers/sxe2/mr.c +--- rdma-core-48.0.bak/providers/sxe2/mr.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/mr.c 2026-05-27 17:08:05.580104739 +0800 +@@ -0,0 +1,98 @@ ++ ++#include ++#include ++ ++#include "sxe2_common.h" ++#include "sxe2_abi.h" ++#include "log.h" ++ ++struct ibv_mr *sxe2_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, ++ uint64_t hca_va, int acc) ++{ ++ struct sxe2_umr *mr; ++ struct ibv_reg_mr cmd; ++ int ret; ++ struct ib_uverbs_reg_mr_resp resp; ++ struct sxe2_rdma_ucontext *sctx = to_sctx(pd->context); ++ ++ mr = calloc(1, sizeof(*mr)); ++ if (!mr) { ++ SXE2_VERBS_LOG_ERROR_BDF("user alloc mr mem failed\n"); ++ return NULL; ++ } ++ ++ ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, acc, &mr->vmr, &cmd, ++ sizeof(cmd), &resp, sizeof(resp)); ++ if (ret) { ++ free(mr); ++ errno = ret; ++ SXE2_VERBS_LOG_ERROR_BDF("ibv cmd reg mr err(%d)\n", ret); ++ return NULL; ++ } ++ mr->acc_flags = acc; ++ SXE2_VERBS_LOG_DEBUG_BDF("addr:%#lx, length:%zu, hca_va:%#lx,acc:%#x,handle:%#x, lkey:%#x, rkey:%#x", ++ (uint64_t)addr, length, hca_va, acc, mr->vmr.ibv_mr.handle, ++ mr->vmr.ibv_mr.lkey, mr->vmr.ibv_mr.rkey); ++ ++ return &mr->vmr.ibv_mr; ++} ++ ++int sxe2_udereg_mr(struct verbs_mr *vmr) ++{ ++ int ret; ++ ++ ret = ibv_cmd_dereg_mr(vmr); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR("ibv cmd dereg mr err(%d)\n", ret); ++ return ret; ++ } ++ ++ free(vmr); ++ return 0; ++} ++ ++int sxe2_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, ++ void *addr, size_t length, int access) ++{ ++ int ret; ++ struct ibv_rereg_mr cmd; ++ struct ib_uverbs_rereg_mr_resp resp; ++ ++ ret = ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr, ++ access, pd, &cmd, sizeof(cmd), &resp, ++ sizeof(resp)); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR("ibv cmd rereg mr err(%d)\n", ret); ++ } ++ SXE2_VERBS_LOG_DEBUG("addr:%#lx, length:%zu, acc:%#x,handle:%#x, lkey:%#x, rkey:%#x, flags:%#x", ++ (uint64_t)addr, length, access,vmr->ibv_mr.handle, vmr->ibv_mr.lkey, ++ vmr->ibv_mr.rkey, flags); ++ return ret; ++} ++ ++struct ibv_mr *sxe2_ureg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, ++ size_t length, uint64_t iova, int fd, ++ int acc) ++{ ++ struct sxe2_umr *mr; ++ int ret; ++ struct sxe2_rdma_ucontext *sctx = to_sctx(pd->context); ++ ++ mr = calloc(1, sizeof(*mr)); ++ if (!mr) { ++ SXE2_VERBS_LOG_ERROR_BDF("user alloc dma buf mr failed\n"); ++ return NULL; ++ } ++ ++ ret = ibv_cmd_reg_dmabuf_mr(pd, offset, length, iova, fd, acc, ++ &mr->vmr); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("ibv cmd reg dma buf mr err(%d)\n", ret); ++ free(mr); ++ errno = ret; ++ return NULL; ++ } ++ mr->acc_flags = acc; ++ ++ return &mr->vmr.ibv_mr; ++} +diff -Naur rdma-core-48.0.bak/providers/sxe2/pd.c rdma-core-48.0/providers/sxe2/pd.c +--- rdma-core-48.0.bak/providers/sxe2/pd.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/pd.c 2026-05-27 17:08:05.579104733 +0800 +@@ -0,0 +1,87 @@ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "ccan/array_size.h" ++#include "util/compiler.h" ++#include "util/mmio.h" ++#include "util/util.h" ++#include ++#include ++#include "sxe2_common.h" ++#include "log.h" ++ ++struct ibv_pd *sxe2_ualloc_pd(struct ibv_context *context) ++{ ++ struct ibv_alloc_pd cmd; ++ struct sxe2_ualloc_pd_resp resp; ++ struct sxe2_upd *pd; ++ struct ibv_pd *ibv_pd = NULL; ++ struct sxe2_rdma_ucontext *sctx; ++ int ret; ++ ++ memset(&resp, 0, sizeof(resp)); ++ if (context == NULL) { ++ SXE2_VERBS_LOG_ERROR("user alloc pd context err\n"); ++ errno = EINVAL; ++ goto end; ++ } ++ ++ sctx = container_of(context, struct sxe2_rdma_ucontext, ibv_ctx.context); ++ ++ pd = calloc(1, sizeof(*pd)); ++ if (!pd) { ++ SXE2_VERBS_LOG_ERROR_BDF("user pd mem alloc failed\n"); ++ goto end; ++ } ++ ++ ret = ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp)); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("ibv cmd alloc pd err(%d)\n", ret); ++ free(pd); ++ errno = ret; ++ goto end; ++ } ++ ++ pd->pd_id = resp.pd_id; ++ ibv_pd = &pd->ibv_pd; ++ ++ SXE2_VERBS_LOG_DEBUG_BDF("user pd alloc resp.pdn:%u\n", pd->pd_id); ++ ++end: ++ return ibv_pd; ++} ++ ++int sxe2_ufree_pd(struct ibv_pd *pd) ++{ ++ int ret; ++ struct sxe2_upd *upd; ++ ++ if (pd == NULL) { ++ SXE2_VERBS_LOG_ERROR("user free pd is null\n"); ++ ret = EINVAL; ++ goto end; ++ } ++ ++ upd = container_of(pd, struct sxe2_upd, ibv_pd); ++ ret = ibv_cmd_dealloc_pd(pd); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR("ibv cmd dealloc pd err(%d)\n", ret); ++ goto end; ++ } ++ free(upd); ++end: ++ return ret; ++} ++ +diff -Naur rdma-core-48.0.bak/providers/sxe2/qp.c rdma-core-48.0/providers/sxe2/qp.c +--- rdma-core-48.0.bak/providers/sxe2/qp.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/qp.c 2026-05-27 17:08:05.581104746 +0800 +@@ -0,0 +1,782 @@ ++#include ++#include ++ ++#include "log.h" ++#include "verbs.h" ++#include "sxe2_common.h" ++#include "sxe2_abi.h" ++#include "io.h" ++#include "ring.h" ++#include "buf.h" ++ ++#define MIN_SQ_SIZE (8u) ++#define MIN_RQ_SIZE (8u) ++#define SQ_RSV_SIZE (8) ++#define RQ_RSV_SIZE (8) ++#define QP_QUANTA_SIZE (32) ++#define QP_DB_NOTE_SIZE (16) ++#define QP_MEMALIGN_1K (1024) ++ ++enum { ++ SXE2_SUPPORTED_SEND_OPS_FLAGS_RC = ++ IBV_QP_EX_WITH_SEND | ++ IBV_QP_EX_WITH_SEND_WITH_INV | ++ IBV_QP_EX_WITH_SEND_WITH_IMM | ++ IBV_QP_EX_WITH_RDMA_WRITE | ++ IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | ++ IBV_QP_EX_WITH_RDMA_READ | ++ IBV_QP_EX_WITH_LOCAL_INV | ++ IBV_QP_EX_WITH_BIND_MW, ++ SXE2_SUPPORTED_SEND_OPS_FLAGS_UD = ++ IBV_QP_EX_WITH_SEND | ++ IBV_QP_EX_WITH_SEND_WITH_IMM, ++}; ++ ++static void sxe2_calc_sq_wqe_shift(__u32 max_sq_sge, __u32 max_inline_data, ++ __u8 *wqe_shift) ++{ ++ *wqe_shift = WQE_SIZE_32BYTE; ++ ++ if (max_sq_sge > MAX_SGE_SIZE_1 || max_inline_data > MAX_INLINE_DATA_8) { ++ if (max_sq_sge < MAX_SGE_SIZE_4 && max_inline_data <= MAX_INLINE_DATA_39) { ++ *wqe_shift = WQE_SIZE_64BYTE; ++ } else if (max_sq_sge < MAX_SGE_SIZE_8 && max_inline_data <= MAX_INLINE_DATA_101) { ++ *wqe_shift = WQE_SIZE_128BYTE; ++ } else { ++ *wqe_shift = WQE_SIZE_256BYTE; ++ } ++ } ++} ++static void sxe2_calc_rq_wqe_shift(__u32 max_rq_sge, __u8 *wqe_shift) ++{ ++ *wqe_shift = WQE_SIZE_32BYTE; ++ ++ if (max_rq_sge > MAX_SGE_SIZE_1) { ++ if (max_rq_sge < MAX_SGE_SIZE_4) { ++ *wqe_shift = WQE_SIZE_64BYTE; ++ } else if (max_rq_sge < MAX_SGE_SIZE_8) { ++ *wqe_shift = WQE_SIZE_128BYTE; ++ } else { ++ *wqe_shift = WQE_SIZE_256BYTE; ++ } ++ } ++} ++static int sxe2_calc_sqdepth(struct sxe2_rdma_ucontext *sctx, ++ struct sxe2_common_attrs *common_attrs, ++ __u32 max_wr, __u8 wqe_shift, ++ __u32 *sq_depth) ++{ ++ int ret = 0; ++ __u32 depth; ++ ++ depth = sxe2_round_up_pow_2((max_wr << wqe_shift) + SQ_RSV_SIZE); ++ ++ depth = max(depth, MIN_SQ_SIZE); ++ *sq_depth = depth; ++ if (depth > common_attrs->max_hw_wq_quanta) { ++ SXE2_VERBS_LOG_ERROR_BDF("sq depth illegal, sq depth(%u)\n", depth); ++ ret = EINVAL; ++ } ++ ++ return ret; ++} ++static int sxe2_calc_rqdepth(struct sxe2_rdma_ucontext *sctx, ++ struct sxe2_common_attrs *common_attrs, ++ __u32 max_wr, __u8 wqe_shift, ++ __u32 *rq_depth) ++{ ++ int ret = 0; ++ __u32 depth; ++ depth = sxe2_round_up_pow_2((max_wr << wqe_shift) + RQ_RSV_SIZE); ++ ++ depth = max(depth, MIN_RQ_SIZE); ++ *rq_depth = depth; ++ if (depth > common_attrs->max_hw_rq_quanta) { ++ SXE2_VERBS_LOG_ERROR_BDF("rq depth illegal, rq depth(%u)\n", depth); ++ ret = EINVAL; ++ } ++ return ret; ++} ++static int sxe2_calc_sq_depth_shift(struct sxe2_rdma_ucontext *sctx, ++ struct sxe2_qp_common_init_info *init_info, ++ __u32 *sq_depth, __u8 *sq_shift) ++{ ++ int ret; ++ ++ sxe2_calc_sq_wqe_shift(init_info->max_sq_sge_cnt + 1, init_info->max_inline_data, ++ sq_shift); ++ ret = sxe2_calc_sqdepth(sctx, init_info->common_attrs, init_info->sq_size, ++ *sq_shift, sq_depth); ++ return ret; ++} ++ ++static int sxe2_calc_rq_depth_shift(struct sxe2_rdma_ucontext *sctx, ++ struct sxe2_qp_common_init_info *init_info, ++ __u32 *rq_depth, __u8 *rq_shift) ++{ ++ int ret; ++ ++ if ((init_info->srq) || (init_info->rq_size == 0)) { ++ *rq_depth = 0; ++ *rq_shift = 0; ++ ret = 0; ++ goto end; ++ } ++ ++ sxe2_calc_rq_wqe_shift(init_info->max_rq_sge_cnt, rq_shift); ++ ret = sxe2_calc_rqdepth(sctx, init_info->common_attrs, init_info->rq_size, ++ *rq_shift, rq_depth); ++end: ++ return ret; ++} ++ ++static void sxe2_fill_qp(struct sxe2_uqp *uqp, ++ struct sxe2_qp_common_init_info *init_info, ++ struct sxe2_ucreate_qp_resp resp, ++ struct ibv_qp_init_attr_ex *attr_ex) ++{ ++ __u32 sq_ring_size; ++ uqp->sq_sig_all = (__u32)attr_ex->sq_sig_all; ++ uqp->qp_type = attr_ex->qp_type; ++ uqp->qp.back_qp = uqp; ++ uqp->qp.lock = &uqp->lock; ++ uqp->verbs_qp.qp.qp_num = resp.qpn; ++ uqp->send_cq = container_of(attr_ex->send_cq, struct sxe2_ucq, verbs_cq.cq); ++ uqp->recv_cq = container_of(attr_ex->recv_cq, struct sxe2_ucq, verbs_cq.cq); ++ uqp->send_cq->uqp = uqp; ++ uqp->recv_cq->uqp = uqp; ++ ++ uqp->qp.common_attrs = init_info->common_attrs; ++ uqp->qp.sq_base = init_info->sq; ++ uqp->qp.rq_base = init_info->rq; ++ uqp->qp.srq = init_info->srq ? init_info->srq : NULL; ++ uqp->qp.doorbell_note = init_info->doorbell_note; ++ uqp->qp.sq_wrtrk_array = init_info->sq_wrtrk_array; ++ uqp->qp.rq_wrid_array = init_info->rq_wrid_array; ++ uqp->qp.qp_db_no_llwqe = init_info->qp_db_no_llwqe; ++ uqp->qp.qp_caps = resp.qp_caps; ++ uqp->qp.qpn = resp.qpn; ++ uqp->qp.llwqe_mode = false; ++ uqp->qp.max_sq_sge_cnt = init_info->max_sq_sge_cnt; ++ uqp->qp.max_rq_sge_cnt = init_info->max_rq_sge_cnt; ++ uqp->qp.sq_size = init_info->sq_size; ++ uqp->qp.rq_size = init_info->rq_size; ++ sq_ring_size = uqp->qp.sq_size << init_info->sq_shift; ++ SXE2_RING_INIT(uqp->qp.sq_ring, sq_ring_size); ++ SXE2_RING_INIT(uqp->qp.initial_ring, sq_ring_size); ++ uqp->qp.swqe_polarity = 0; ++ uqp->qp.rwqe_polarity = 0; ++ uqp->qp.max_inline_data = init_info->max_inline_data; ++ uqp->qp.rq_wqe_size = init_info->rq_shift; ++ SXE2_RING_INIT(uqp->qp.rq_ring, uqp->qp.rq_size); ++ uqp->qp.rq_wqe_size_multiplier = init_info->srq ? 0 : 1 << init_info->rq_shift; ++} ++ ++static void sxe2_fill_wr_builders_rc_xrc_dc(struct ibv_qp_ex *ibqp) ++{ ++ ibqp->wr_send = sxe2_wr_send_rc_ud; ++ ibqp->wr_send_imm = sxe2_wr_send_imm_rc_ud; ++ ibqp->wr_send_inv = sxe2_wr_send_inv_rc; ++ ibqp->wr_rdma_write = sxe2_wr_rdma_write_rc; ++ ibqp->wr_rdma_write_imm = sxe2_wr_rdma_write_imm_rc; ++ ibqp->wr_rdma_read = sxe2_wr_rdma_read_rc; ++ ibqp->wr_bind_mw = sxe2_wr_bind_mw_rc; ++ ibqp->wr_local_inv = sxe2_wr_local_inv_rc; ++} ++ ++static void sxe2_fill_wr_setters_rc_ud(struct ibv_qp_ex *ibqp) ++{ ++ ibqp->wr_set_sge = sxe2_wr_set_sge_rc_ud; ++ ibqp->wr_set_sge_list = sxe2_wr_set_sge_list_rc_ud; ++ ibqp->wr_set_inline_data = sxe2_wr_set_inline_data_rc_ud; ++ ibqp->wr_set_inline_data_list = sxe2_wr_set_inline_data_list_rc_ud; ++} ++ ++static void sxe2_fill_wr_builders_ud(struct ibv_qp_ex *ibqp) ++{ ++ ibqp->wr_send = sxe2_wr_send_rc_ud; ++ ibqp->wr_send_imm = sxe2_wr_send_imm_rc_ud; ++} ++ ++static int sxe2_qp_fill_wr_func(struct sxe2_rdma_ucontext *sctx, struct sxe2_uqp *uqp, ++ const struct ibv_qp_init_attr_ex *attr) ++{ ++ struct ibv_qp_ex *ibqp = &uqp->verbs_qp.qp_ex; ++ uint64_t ib_ops = attr->send_ops_flags; ++ int ret = 0; ++ ++ SXE2_VERBS_LOG_DEBUG_BDF("qp_type:%u, ops:%#llx\n", attr->qp_type, (__u64)ib_ops); ++ ++ ibqp->wr_start = sxe2_wr_start; ++ ibqp->wr_complete = sxe2_wr_complete; ++ ibqp->wr_abort = sxe2_wr_abort; ++ ++ if (ib_ops & IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP || ++ ib_ops & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD) { ++ ret = EOPNOTSUPP; ++ SXE2_VERBS_LOG_ERROR_BDF("atomics not support\n"); ++ goto end; ++ } ++ ++ switch (attr->qp_type) { ++ case IBV_QPT_RC: ++ if (ib_ops & ~SXE2_SUPPORTED_SEND_OPS_FLAGS_RC) { ++ SXE2_VERBS_LOG_ERROR_BDF("not support ops:%#llx", (__u64)ib_ops); ++ ret = EOPNOTSUPP; ++ goto end; ++ } ++ ++ sxe2_fill_wr_builders_rc_xrc_dc(ibqp); ++ sxe2_fill_wr_setters_rc_ud(ibqp); ++ ++ break; ++ ++ case IBV_QPT_UD: ++ if (ib_ops & ~SXE2_SUPPORTED_SEND_OPS_FLAGS_UD) { ++ SXE2_VERBS_LOG_ERROR_BDF("not support ops:%#llx", (__u64)ib_ops); ++ ret = EOPNOTSUPP; ++ goto end; ++ } ++ ++ sxe2_fill_wr_builders_ud(ibqp); ++ sxe2_fill_wr_setters_rc_ud(ibqp); ++ ibqp->wr_set_ud_addr = sxe2_wr_set_ud_addr; ++ break; ++ ++ default: ++ ret = EOPNOTSUPP; ++ break; ++ } ++ ++end: ++ return ret; ++} ++ ++static const char *sxe2_qptype2key(enum ibv_qp_type type) ++{ ++ const char *key = "HUGE_NA"; ++ ++ switch (type) { ++ case IBV_QPT_RC: ++ key = "HUGE_RC"; ++ break; ++ case IBV_QPT_UD: ++ key = "HUGE_UD"; ++ break; ++ default: ++ key = "HUGE_NA"; ++ } ++ ++ return key; ++} ++ ++static bool sxe2_is_huge_buf(const char *key) ++{ ++ bool ret = false; ++ char *env = getenv(key); ++ ++ if (env && (!strncmp(env, "y", strlen(env)))) { ++ ret = true; ++ } ++ ++ return ret; ++} ++static int sxe2_alloc_qp_buf(struct sxe2_rdma_ucontext *sctx, sxe2_buf_s *buf, size_t size, ++ enum ibv_qp_type qp_type) ++{ ++ int ret; ++ sxe2_alloc_type_e type; ++ sxe2_alloc_type_e default_type = SXE2_ALLOC_TYPE_ANON; ++ const char *qp_huge_key; ++ long page_size = 0; ++ ++ qp_huge_key = sxe2_qptype2key(qp_type); ++ SXE2_VERBS_LOG_DEBUG_BDF("in sxe2_alloc_qp_buf qp_huge_key(%s)", qp_huge_key); ++ if (sxe2_is_huge_buf(qp_huge_key)) { ++ default_type = SXE2_ALLOC_TYPE_HUGE; ++ } ++ ++ sxe2_alloc_type_get(SXE2_QP_PREFIX, &type, default_type); ++ ++ page_size = sysconf(_SC_PAGE_SIZE); ++ if (page_size < 0) { ++ SXE2_VERBS_LOG_ERROR_BDF("get system page size failed."); ++ ret = EPERM; ++ goto end; ++ } ++ ret = sxe2_prefered_buf_alloc( ++ sctx, buf, size, (size_t)page_size, type); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("qp buf alloc err ret (%d)", ret); ++ goto end; ++ } ++ ++ memset(buf->buf, 0, buf->length); ++ ++end: ++ return ret; ++} ++ ++static void sxe2_free_qp_buf(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf) ++{ ++ sxe2_actual_buf_free(ctx, buf); ++} ++ ++static struct ibv_qp *ucreate_qp(struct ibv_context *context, struct ibv_qp_init_attr_ex *attr_ex) ++{ ++ struct sxe2_qp_common_init_info init_info = {}; ++ struct sxe2_ucreate_qp cmd = {}; ++ struct sxe2_ucreate_qp_resp resp = {}; ++ struct sxe2_uqp *uqp; ++ int err; ++ struct ibv_qp *ibqp = NULL; ++ struct sxe2_rdma_ucontext *sctx = to_sctx(context); ++ struct sxe2_common_attrs *common_attrs = &sctx->uk_attrs; ++ ++ if (attr_ex->qp_type != IBV_QPT_RC && attr_ex->qp_type != IBV_QPT_UD) { ++ errno = EOPNOTSUPP; ++ SXE2_VERBS_LOG_ERROR_BDF("qp type illegal!, qp_type:%d\n", attr_ex->qp_type); ++ goto end; ++ } ++ if (attr_ex->cap.max_send_sge > common_attrs->max_hw_wq_frags || ++ attr_ex->cap.max_recv_sge > common_attrs->max_hw_wq_frags || ++ attr_ex->cap.max_send_wr > common_attrs->max_hw_wq_quanta || ++ attr_ex->cap.max_recv_wr > common_attrs->max_hw_rq_quanta || ++ attr_ex->cap.max_inline_data > common_attrs->max_hw_inline) { ++ errno = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF("qp request caps illegal!, max_send_sge(%u) max_recv_sge(%u)" ++ "max_send_wr(%u) max_recv_wr(%u) max_inline_data(%u)" ++ "dev hw_attrs: max_hw_inline(%d), max_hw_wq_frags(%d)," ++ "max_hw_wq_quanta(%d), max_hw_rq_quanta(%d),\n", attr_ex->cap.max_send_sge, ++ attr_ex->cap.max_recv_sge, attr_ex->cap.max_send_wr, attr_ex->cap.max_recv_wr, ++ attr_ex->cap.max_inline_data, common_attrs->max_hw_inline, ++ common_attrs->max_hw_wq_frags, common_attrs->max_hw_wq_quanta, ++ common_attrs->max_hw_rq_quanta); ++ goto end; ++ } ++ ++ SXE2_VERBS_LOG_INFO_BDF("qp caps, max_send_sge(%u) max_recv_sge(%u)" ++ "max_send_wr(%u) max_recv_wr(%u) max_inline_data(%u)\n", attr_ex->cap.max_send_sge, ++ attr_ex->cap.max_recv_sge, attr_ex->cap.max_send_wr, attr_ex->cap.max_recv_wr, ++ attr_ex->cap.max_inline_data); ++ init_info.sq_size = attr_ex->cap.max_send_wr; ++ init_info.rq_size = attr_ex->cap.max_recv_wr; ++ ++ init_info.max_sq_sge_cnt = attr_ex->cap.max_send_sge; ++ init_info.max_rq_sge_cnt = attr_ex->cap.max_recv_sge; ++ init_info.max_inline_data = attr_ex->cap.max_inline_data; ++ init_info.common_attrs = common_attrs; ++ if (attr_ex->srq) { ++ init_info.srq = &((to_usrq(attr_ex->srq))->srq_verbs); ++ } ++ ++ err = sxe2_calc_sq_depth_shift(sctx, &init_info, &init_info.sq_depth, &init_info.sq_shift); ++ if (err) { ++ errno = err; ++ SXE2_VERBS_LOG_ERROR_BDF("sxe2_calc_sq_depth_shift fail, err(%d)\n", err); ++ goto end; ++ } ++ err = sxe2_calc_rq_depth_shift(sctx, &init_info, &init_info.rq_depth, &init_info.rq_shift); ++ if (err) { ++ errno = err; ++ SXE2_VERBS_LOG_ERROR_BDF("sxe2_calc_rq_depth_shift fail, err(%d)\n", err); ++ goto end; ++ } ++ uqp = memalign(QP_MEMALIGN_1K, sizeof(*uqp)); ++ if (!uqp) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("memalign uqp fail!\n"); ++ goto end; ++ } ++ memset(uqp, 0, sizeof(*uqp)); ++ if (pthread_spin_init(&uqp->lock, PTHREAD_PROCESS_PRIVATE)) { ++ errno = EBUSY; ++ SXE2_VERBS_LOG_ERROR_BDF("pthread_spin_init fail!\n"); ++ goto err_free_qp; ++ } ++ uqp->qp.common_attrs = common_attrs; ++ init_info.sq_size = init_info.sq_depth >> init_info.sq_shift; ++ init_info.rq_size = init_info.rq_depth >> init_info.rq_shift; ++ ++ init_info.qp_db_no_llwqe = (__u32 *)sctx->qp_db_no_llwqe; ++ init_info.sq_wrtrk_array = calloc(init_info.sq_depth, sizeof(*init_info.sq_wrtrk_array)); ++ if (!init_info.sq_wrtrk_array) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("calloc sq_wrtrk_array fail!\n"); ++ goto err_destroy_lock; ++ } ++ ++ if (init_info.rq_depth != 0) { ++ init_info.rq_wrid_array = calloc(init_info.rq_depth, sizeof(*init_info.rq_wrid_array)); ++ if (!init_info.rq_wrid_array) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("calloc rq_wrid_array fail!\n"); ++ goto err_free_sq_wrtrk; ++ } ++ } else { ++ init_info.rq_wrid_array = NULL; ++ } ++ ++ uqp->buf_size = (init_info.sq_depth + init_info.rq_depth) * QP_QUANTA_SIZE; ++ ++ if (sxe2_alloc_qp_buf(sctx, &uqp->buf, uqp->buf_size, attr_ex->qp_type)) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("total_size %zu", uqp->buf_size); ++ goto err_free_rq_wrid; ++ } ++ init_info.rq = uqp->buf.buf; ++ if (!init_info.rq) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("alloc qp buffer fail!\n"); ++ goto err_free_rq_wrid; ++ } ++ memset(init_info.rq, 0, uqp->buf_size); ++ ++ init_info.doorbell_note = sxe2_alloc_hw_buf(QP_DB_NOTE_SIZE); ++ if (!init_info.doorbell_note) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("alloc shadow area buffer fail!\n"); ++ goto err_free_vmap_qp; ++ } ++ memset(init_info.doorbell_note, 0, QP_DB_NOTE_SIZE); ++ ++ init_info.sq = &init_info.rq[init_info.rq_depth]; ++ ++ if (attr_ex->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { ++ err = sxe2_qp_fill_wr_func(sctx, uqp, attr_ex); ++ if (err) { ++ errno = err; ++ SXE2_VERBS_LOG_ERROR_BDF("fill_wr_func fail, err:%d", err); ++ goto err_free_vmap_doorbell_note; ++ } ++ } ++ ++ SXE2_VERBS_LOG_INFO_BDF("rq depth %u shift %u", init_info.rq_depth, init_info.rq_shift); ++ cmd.sq_depth = init_info.sq_depth; ++ cmd.rq_depth = init_info.rq_depth; ++ cmd.sq_shift = init_info.sq_shift; ++ cmd.rq_shift = init_info.rq_shift; ++ cmd.user_wqe_bufs = (__u64)(init_info.rq); ++ cmd.doorbell_note = (__u64)(init_info.doorbell_note); ++ cmd.user_compl_ctx = (__u64)(&uqp->qp); ++ err = ibv_cmd_create_qp_ex(context, &uqp->verbs_qp, attr_ex, &(cmd.ibv_cmd), ++ sizeof(cmd), &(resp.ibv_resp), ++ sizeof(struct sxe2_ucreate_qp_resp)); ++ if (err) { ++ errno = err; ++ SXE2_VERBS_LOG_ERROR_BDF("ibv_cmd_create_qp_ex fail!, err(%d)\n", err); ++ goto err_free_vmap_doorbell_note; ++ } ++ ++ sxe2_fill_qp(uqp, &init_info, resp, attr_ex); ++ ++ attr_ex->cap.max_send_wr = (init_info.sq_depth - SQ_RSV_SIZE) >> init_info.sq_shift; ++ if ((init_info.srq) || (init_info.rq_depth == 0)) { ++ attr_ex->cap.max_recv_wr = 0; ++ } else { ++ attr_ex->cap.max_recv_wr = (init_info.rq_depth - RQ_RSV_SIZE) >> init_info.rq_shift; ++ } ++ ++ if (attr_ex->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { ++ uqp->verbs_qp.comp_mask |= VERBS_QP_EX; ++ } ++ ++ ibqp = &uqp->verbs_qp.qp; ++ goto end; ++err_free_vmap_doorbell_note: ++ sxe2_free_hw_buf(init_info.doorbell_note, QP_DB_NOTE_SIZE); ++err_free_vmap_qp: ++ sxe2_free_qp_buf(sctx, &uqp->buf); ++err_free_rq_wrid: ++ if (init_info.rq_wrid_array) { ++ free(init_info.rq_wrid_array); ++ init_info.rq_wrid_array = NULL; ++ } ++err_free_sq_wrtrk: ++ free(init_info.sq_wrtrk_array); ++ init_info.sq_wrtrk_array = NULL; ++err_destroy_lock: ++ pthread_spin_destroy(&uqp->lock); ++err_free_qp: ++ if (uqp) { ++ free(uqp); ++ uqp = NULL; ++ } ++end: ++ return ibqp; ++} ++ ++struct ibv_qp *sxe2_ucreate_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) ++{ ++ struct ibv_qp *qp = NULL; ++ struct ibv_qp_init_attr_ex attr_ex; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ if (!pd || !attr) { ++ errno = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF("inval param, pd(%p), attr(%p)", pd, attr); ++ goto out; ++ } ++ ++ sctx = to_sctx(pd->context); ++ ++ SXE2_VERBS_LOG_DEBUG_BDF( ++ "type:%u, max_inline_data:%u, max_send_wr:%u" ++ "max_send_sge:%u, max_send_recv_wr:%u, max_recv_sge:%u, pd_handle:%u", ++ attr->qp_type, attr->cap.max_inline_data, attr->cap.max_send_wr, ++ attr->cap.max_send_sge, attr->cap.max_recv_wr, attr->cap.max_recv_sge, ++ pd->handle); ++ ++ memset(&attr_ex, 0, sizeof(attr_ex)); ++ memcpy(&attr_ex, attr, sizeof(*attr)); ++ ++ attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD; ++ attr_ex.pd = pd; ++ qp = ucreate_qp(pd->context, &attr_ex); ++ if (qp) { ++ memcpy(attr, &attr_ex, sizeof(*attr)); ++ } ++ ++out: ++ return qp; ++} ++ ++struct ibv_qp *sxe2_ucreate_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr_ex) ++{ ++ struct ibv_qp *qp = NULL; ++ struct sxe2_rdma_ucontext *sctx = NULL; ++ ++ if (!context || !attr_ex) { ++ errno = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF("inval param, context(%p), attr_ex(%p)", context, attr_ex); ++ goto out; ++ } ++ ++ sctx = to_sctx(context); ++ ++ SXE2_VERBS_LOG_DEBUG_BDF( ++ "type:%u, max_inline_data:%u, max_send_wr:%u, " ++ "max_send_sge:%u, max_send_recv_wr:%u, " ++ "max_recv_sge:%u, pd_handle:%u, comp_mask:%#x," ++ "create_flags:%#x, send_flags:%#lx", ++ attr_ex->qp_type, attr_ex->cap.max_inline_data, ++ attr_ex->cap.max_send_wr, attr_ex->cap.max_send_sge, ++ attr_ex->cap.max_recv_wr, attr_ex->cap.max_recv_sge, ++ attr_ex->pd->handle, attr_ex->comp_mask, attr_ex->create_flags, ++ attr_ex->send_ops_flags); ++ qp = ucreate_qp(context, attr_ex); ++ ++out: ++ return qp; ++} ++ ++int sxe2_udestroy_qp(struct ibv_qp *ibqp) ++{ ++ struct sxe2_uqp *uqp; ++#ifdef SXE2_CFG_DEBUG ++ struct sxe2_qp_common *qp; ++#endif ++ struct sxe2_rdma_ucontext *sctx = container_of(ibqp->context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ int ret = 0; ++ ++ uqp = container_of(ibqp, struct sxe2_uqp, verbs_qp.qp); ++#ifdef SXE2_CFG_DEBUG ++ qp = &uqp->qp; ++#endif ++ ret = pthread_spin_destroy(&uqp->lock); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("pthread_spin_destroy fail!\n"); ++ } ++ ++ ret = ibv_cmd_destroy_qp(ibqp); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("ibv_cmd_destroy_qp fail!, ret(%d)\n", ret); ++ } ++ ++ if (uqp->qp.verbs_llwqe) { ++ db_uput_qp_llwqe(sctx, uqp->qp.verbs_llwqe); ++ } ++ if (uqp->send_cq) { ++ sxe2_clean_cqes(&uqp->qp, uqp->send_cq, SQ_CQ); ++ } ++ if (uqp->recv_cq && uqp->recv_cq != uqp->send_cq) { ++ sxe2_clean_cqes(&uqp->qp, uqp->recv_cq, RQ_CQ); ++ } ++ if (uqp->qp.sq_wrtrk_array) { ++ free(uqp->qp.sq_wrtrk_array); ++ uqp->qp.sq_wrtrk_array = NULL; ++ } ++ if (uqp->qp.rq_wrid_array) { ++ free(uqp->qp.rq_wrid_array); ++ uqp->qp.rq_wrid_array = NULL; ++ } ++ sxe2_free_qp_buf(sctx, &uqp->buf); ++ sxe2_free_hw_buf(uqp->qp.doorbell_note, QP_DB_NOTE_SIZE); ++ uqp->qp.doorbell_note = NULL; ++#ifdef SXE2_CFG_DEBUG ++ SXE2_VERBS_LOG_DEBUG_BDF("qp [%d] io statistics:\n"\ ++ "total_sqe_cnt : %u\n"\ ++ "total_rqe_cnt : %u\n"\ ++ "finished_sqe_cnt : %u\n"\ ++ "finished_rqe_cnt : %u\n"\ ++ "finished_rqe_insrq_cnt : %u\n"\ ++ "flushed_sq_cnt : %u\n"\ ++ "flushed_rq_cnt : %u\n"\ ++ "cleaned_sq_cnt : %u\n"\ ++ "cleaned_rq_cnt : %u\n"\ ++ "cleaned_flushsq_cnt : %u\n"\ ++ "cleaned_flushrq_cnt : %u\n"\ ++ "total_signal_cnt : %u\n"\ ++ "send_cnt : %u\n"\ ++ "send_inv_cnt : %u\n"\ ++ "read_cnt : %u\n"\ ++ "write_cnt : %u\n"\ ++ "local_inv_cnt : %u\n"\ ++ "bind_mw_cnt : %u\n"\ ++ "fast_regmr_cnt : %u\n"\ ++ "last_send_sqwrid : %llu\n"\ ++ "last_rcvd_sqwrid : %llu\n"\ ++ "last_send_rqwrid : %llu\n"\ ++ "last_rcvd_rqwrid : %llu\n", ++ qp->qpn, ++ qp->statistics.total_sqe_cnt, ++ qp->statistics.total_rqe_cnt, ++ qp->statistics.finished_sqe_cnt, ++ qp->statistics.finished_rqe_cnt, ++ qp->statistics.finished_rqe_insrq_cnt, ++ qp->statistics.flushed_sq_cnt, ++ qp->statistics.flushed_rq_cnt, ++ qp->statistics.cleaned_sq_cnt, ++ qp->statistics.cleaned_rq_cnt, ++ qp->statistics.cleaned_flushsq_cnt, ++ qp->statistics.cleaned_flushrq_cnt, ++ qp->statistics.total_signal_cnt, ++ qp->statistics.send_cnt, ++ qp->statistics.send_inv_cnt, ++ qp->statistics.read_cnt, ++ qp->statistics.write_cnt, ++ qp->statistics.local_inv_cnt, ++ qp->statistics.bind_mw_cnt, ++ qp->statistics.fast_regmr_cnt, ++ qp->statistics.last_send_sqwrid, ++ qp->statistics.last_rcvd_sqwrid, ++ qp->statistics.last_send_rqwrid, ++ qp->statistics.last_rcvd_rqwrid); ++#endif ++ if (uqp) { ++ free(uqp); ++ uqp = NULL; ++ } ++ return ret; ++} ++ ++int sxe2_umodify_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask) ++{ ++ struct sxe2_umodify_qp cmd = {}; ++ struct sxe2_umodify_qp_resp resp = {}; ++ struct sxe2_uqp *uqp; ++ struct sxe2_verbs_llwqe *ll_wqe = NULL; ++ struct sxe2_rdma_ucontext *sctx = container_of(ibqp->context, struct sxe2_rdma_ucontext, ++ ibv_ctx.context); ++ bool need_alloc_page = false; ++ __u8 alloc_page_type = 0; ++ __u32 db_mmap_size; ++ __u32 db_page_id; ++ __u64 db_mmap_offset; ++ int ret; ++ SXE2_VERBS_LOG_DEBUG_BDF("uqp:umodify qp %u start\n", ibqp->qp_num); ++ if (attr_mask & IBV_QP_STATE) { ++ SXE2_VERBS_LOG_DEBUG_BDF("uqp %u :umodify qp cur start %u next state %u\n", ++ ibqp->qp_num, ibqp->state, attr->qp_state); ++ } ++ uqp = container_of(ibqp, struct sxe2_uqp, verbs_qp.qp); ++ if (sctx->ll_mode && ++ attr_mask & IBV_QP_STATE && attr->qp_state == IBV_QPS_RTS) { ++ ll_wqe = db_uget_qp_llwqe(ibqp->context, &need_alloc_page, &alloc_page_type); ++ if (ll_wqe) { ++ uqp->qp.verbs_llwqe = ll_wqe; ++ uqp->qp.push_wqe = ll_wqe->wqe_addr; ++ uqp->qp.push_db = ll_wqe->db_addr; ++ uqp->qp.llwqe_enable = true; ++ uqp->qp.llwqe_mode = true; ++ cmd.llwqe_enable = true; ++ cmd.llwqe_page_index = ll_wqe->db_page_id; ++ } ++ if (need_alloc_page) { ++ cmd.new_page_alloc = true; ++ } ++ } else if (attr_mask & IBV_QP_STATE && attr->qp_state == IBV_QPS_RESET) { ++ if (uqp->send_cq) { ++ sxe2_clean_cqes(&uqp->qp, uqp->send_cq, SQ_CQ); ++ } ++ if (uqp->recv_cq && uqp->recv_cq != uqp->send_cq) { ++ sxe2_clean_cqes(&uqp->qp, uqp->recv_cq, RQ_CQ); ++ } ++ ++ uqp->qp.sq_ring.head = 0; ++ uqp->qp.sq_ring.tail = 0; ++ uqp->qp.rq_ring.head = 0; ++ uqp->qp.rq_ring.tail = 0; ++ uqp->qp.initial_ring.head = 0; ++ uqp->qp.initial_ring.tail = 0; ++ if (uqp->qp.verbs_llwqe) { ++ db_uput_qp_llwqe(sctx, uqp->qp.verbs_llwqe); ++ uqp->qp.verbs_llwqe = NULL; ++ } ++ uqp->qp.swqe_polarity = 0; ++ uqp->qp.rwqe_polarity = 0; ++ uqp->qp.llwqe_mode = false; ++ memset(uqp->qp.rq_base, 0, uqp->buf_size); ++ } ++ if ((attr_mask & IBV_QP_STATE) && ++ (ibqp->state == IBV_QPS_SQD) ++ &&(attr->qp_state == IBV_QPS_RTS)) { ++ if (uqp->qp.sq_ring.head == uqp->qp.sq_ring.tail) { ++ sxe2_push_nop_wqe(&uqp->qp); ++ SXE2_VERBS_LOG_DEBUG_BDF("uqp %u:before modify sqd to rts push nop head=%u tail=%u\n", ++ ibqp->qp_num, uqp->qp.sq_ring.head, uqp->qp.sq_ring.tail); ++ } ++ } ++ ++ ret = ibv_cmd_modify_qp_ex(ibqp, attr, attr_mask, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("ibv_modify_qp ret(%d)\n", ret); ++ } ++ if (sctx->ll_mode && need_alloc_page && ++ attr_mask & IBV_QP_STATE && attr->qp_state == IBV_QPS_RTS) { ++ db_mmap_size = resp.db_mmap_size; ++ db_page_id = resp.db_page_id; ++ db_mmap_offset = resp.db_mmap_offset; ++ if (!ll_wqe) { ++ ll_wqe = alloc_db_page_and_get_qp_llwqe(ibqp->context, db_mmap_size, db_page_id, db_mmap_offset, alloc_page_type); ++ if (ll_wqe) { ++ uqp->qp.verbs_llwqe = ll_wqe; ++ uqp->qp.push_wqe = ll_wqe->wqe_addr; ++ uqp->qp.push_db = ll_wqe->db_addr; ++ uqp->qp.llwqe_enable = true; ++ uqp->qp.llwqe_mode = true; ++ } else { ++ uqp->qp.llwqe_enable = false; ++ uqp->qp.llwqe_mode = false; ++ } ++ } else { ++ if (!db_ualloc_page_and_llwqes(ibqp->context, false, db_mmap_size, db_page_id, db_mmap_offset)) { ++ SXE2_VERBS_LOG_WARN_BDF("LLWQE:Alloc new shared db page failed"); ++ } ++ } ++ } ++ ++ uqp->qp.rd_fence_rate = resp.rd_fence_rate; ++ SXE2_VERBS_LOG_DEBUG_BDF("uqp %u:modify qp finish\n", ibqp->qp_num); ++ return ret; ++} ++ ++int sxe2_uquery_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask, ++ struct ibv_qp_init_attr *init_attr) ++{ ++ struct ibv_query_qp cmd; ++ ++ return ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof(cmd)); ++} +diff -Naur rdma-core-48.0.bak/providers/sxe2/readme.txt rdma-core-48.0/providers/sxe2/readme.txt +--- rdma-core-48.0.bak/providers/sxe2/readme.txt 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/readme.txt 2026-05-27 17:08:05.580104739 +0800 +@@ -0,0 +1,15 @@ ++ah.c:ah用户态接口实现 ++ah.h:ah用户态接口声明 ++cq.c:cq用户态创建、删除、查询、修改接口实现 ++db.c:doorbell资源管理 ++sxe2_abi.h:基于ibv标准req、resp结构体扩展sxe2厂商字段 ++sxe2_private_verbs.h:厂商私有接口声明 ++qp.c :qp用户态创建、删除、查询、修改接口实现 ++srq.c :srq用户态创建、删除、查询、修改接口、post_srq_recv实现 ++verbs.c :用户态厂商驱动注册到libibverbs的接口函数,qp.c、srq.c是各个接口的具体实现 ++io.h:数据面相关结构体声明,如wqe结构体 ++io.c:post_send实现、post_recv实现、post_srq_recv实现、poll_cq实现、arm_cq实现、cq_event实现 ++mr.c:mr用户态接口实现 ++mw.c:mw用户态接口实现 ++pd.c:pd用户态接口实现 ++mc.c:组播用户态接口实现 +diff -Naur rdma-core-48.0.bak/providers/sxe2/ring.h rdma-core-48.0/providers/sxe2/ring.h +--- rdma-core-48.0.bak/providers/sxe2/ring.h 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/ring.h 2026-05-27 17:08:05.580104739 +0800 +@@ -0,0 +1,127 @@ ++#ifndef __RING_H_ ++#define __RING_H_ ++ ++#define SXE2_RING_INIT(_ring, _size) \ ++ { \ ++ (_ring).head = 0; \ ++ (_ring).tail = 0; \ ++ (_ring).size = (_size); \ ++ } ++#define SXE2_RING_SIZE(_ring) ((_ring).size) ++#define SXE2_RING_CURRENT_HEAD(_ring) ((_ring).head) ++#define SXE2_RING_CURRENT_TAIL(_ring) ((_ring).tail) ++ ++#define SXE2_RING_USED_QUANTA(_ring) \ ++ ( \ ++ (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \ ++ ) ++#define SXE2_RING_USED_QUANTA_PAD(_ring) \ ++ ( \ ++ (((_ring).head + (_ring).size - (_ring).tail - 1) % (_ring).size) \ ++ ) ++ ++#define SXE2_RING_FREE_QUANTA(_ring) \ ++ ( \ ++ ((_ring).size - SXE2_RING_USED_QUANTA(_ring) - 1) \ ++ ) ++ ++#define SXE2_SQ_RING_FREE_QUANTA(_ring) \ ++ ( \ ++ ((_ring).size - SXE2_RING_USED_QUANTA(_ring) - 1) \ ++ ) ++ ++#define SXE2_RING_FULL_ERR(_ring) \ ++ ( \ ++ (SXE2_RING_USED_QUANTA(_ring) == ((_ring).size - 1)) \ ++ ) ++ ++#define SXE2_SQ_RING_FULL_ERR(_ring) \ ++ ( \ ++ (SXE2_RING_USED_QUANTA(_ring) == ((_ring).size - 1)) \ ++ ) ++ ++#define SXE2_RING_MORE_WORK(_ring) \ ++ ( \ ++ (SXE2_RING_USED_QUANTA(_ring) != 0) \ ++ ) ++#define SXE2_RING_MORE_WORK_PAD(_ring) \ ++ ( \ ++ (SXE2_RING_USED_QUANTA_PAD(_ring) != 0) \ ++ ) ++ ++#define SXE2_RING_MOVE_HEAD(_ring, _retcode) \ ++ { \ ++ register __u32 size; \ ++ size = (_ring).size; \ ++ if (!SXE2_RING_FULL_ERR(_ring)) { \ ++ (_ring).head = ((_ring).head + 1) % size; \ ++ (_retcode) = 0; \ ++ } else { \ ++ (_retcode) = ENOMEM; \ ++ } \ ++ } ++ ++#define SXE2_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ ++ { \ ++ register __u32 size; \ ++ size = (_ring).size; \ ++ if ((SXE2_RING_USED_QUANTA(_ring) + (_count)) < size) { \ ++ (_ring).head = ((_ring).head + (_count)) % size; \ ++ (_retcode) = 0; \ ++ } else { \ ++ (_retcode) = ENOMEM; \ ++ } \ ++ } ++ ++#define SXE2_SQ_RING_MOVE_HEAD(_ring, _retcode) \ ++ { \ ++ register __u32 size; \ ++ size = (_ring).size; \ ++ if (!SXE2_SQ_RING_FULL_ERR(_ring)) { \ ++ (_ring).head = ((_ring).head + 1) % size; \ ++ (_retcode) = 0; \ ++ } else { \ ++ (_retcode) = ENOMEM; \ ++ } \ ++ } ++ ++#define SXE2_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ ++ { \ ++ register __u32 size; \ ++ size = (_ring).size; \ ++ if ((SXE2_RING_USED_QUANTA(_ring) + (_count)) < (size - 1)) { \ ++ (_ring).head = ((_ring).head + (_count)) % size; \ ++ (_retcode) = 0; \ ++ } else { \ ++ (_retcode) = ENOMEM; \ ++ } \ ++ } ++ ++#define SXE2_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ ++ (_ring).head = ((_ring).head + (_count)) % (_ring).size ++ ++#define SXE2_RING_MOVE_TAIL(_ring) \ ++ (_ring).tail = ((_ring).tail + 1) % (_ring).size ++ ++#define SXE2_RING_MOVE_HEAD_NOCHECK(_ring) \ ++ (_ring).head = ((_ring).head + 1) % (_ring).size ++ ++#define SXE2_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ ++ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size ++ ++#define SXE2_RING_SET_TAIL(_ring, _pos) \ ++ (_ring).tail = (_pos) % (_ring).size ++ ++#define SXE2_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \ ++ { \ ++ index = SXE2_RING_CURRENT_HEAD(_ring); \ ++ SXE2_RING_MOVE_HEAD(_ring, _retcode); \ ++ } ++ ++#define SXE2_CQP_INIT_WQE(wqe) memset(wqe, 0, 64) ++ ++#define SXE2_GET_CURRENT_CQ_ELEM(_cq) \ ++ ( \ ++ (_cq)->cq_base[SXE2_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \ ++ ) ++#endif +diff -Naur rdma-core-48.0.bak/providers/sxe2/srq.c rdma-core-48.0/providers/sxe2/srq.c +--- rdma-core-48.0.bak/providers/sxe2/srq.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/srq.c 2026-05-27 17:08:05.579104733 +0800 +@@ -0,0 +1,451 @@ ++ ++#include ++#include ++#include "log.h" ++#include "verbs.h" ++#include "sxe2_common.h" ++#include "sxe2_abi.h" ++#include "ring.h" ++#include "io.h" ++#include "buf.h" ++ ++#define SXE2_VERBS_SRQE_MIN_SIZE (32) ++#define SXE2_SRQE_MAX_FRAG_CNT_SHIFT \ ++ (2) ++#define SXE2_SRQE_MAX_FRAG_CNT_SUB (1) ++#define SXE2_SRQ_SIZE_MUL_SHIFT (1) ++#define SXE2_SRQ_POLARITY_INIT (0) ++#define SXE2_SRQ_DB_NOTE_SIZE (16) ++#define SXE2_SRQ_MAX_BUF_SIZE (2 * 1024 * 1024) ++ ++static void srq_ucal_wqe_shift(__u32 max_srq_sge, __u8 *srqe_shift) ++{ ++ *srqe_shift = WQE_SIZE_32BYTE; ++ ++ if (max_srq_sge > MAX_SGE_SIZE_1) { ++ if (max_srq_sge < MAX_SGE_SIZE_4) { ++ *srqe_shift = WQE_SIZE_64BYTE; ++ } else if (max_srq_sge < MAX_SGE_SIZE_8) { ++ *srqe_shift = WQE_SIZE_128BYTE; ++ } else { ++ *srqe_shift = WQE_SIZE_256BYTE; ++ } ++ } ++ return; ++} ++ ++static int srq_uget_depth(struct sxe2_common_attrs *common_attrs, ++ __u32 srq_size, __u8 shift, __u32 *srq_depth) ++{ ++ int ret = 0; ++ ++ *srq_depth = sxe2_round_up_pow_2((srq_size << shift) + SXE2_RQ_RSVD); ++ ++ if (*srq_depth > common_attrs->max_hw_srq_quanta) { ++ ret = EINVAL; ++ } ++ ++ return ret; ++} ++ ++static void srq_uinit_verbs_common(struct sxe2_srq_verbs *srq, ++ struct sxe2_srq_verbs_init_info *info) ++{ ++ __u8 srqshift; ++ ++ srq->common_attrs = info->common_attrs; ++ ++ srq_ucal_wqe_shift(info->max_srq_frag_cnt, &srqshift); ++ ++ srq->srq_base = info->srq; ++ srq->db_note = info->db_note; ++ srq->srq_wrid_array = info->srq_wrid_array; ++ srq->srqe_array = info->srqe_array; ++ ++ srq->srq_id = info->srq_id; ++ ++ srq->srq_polarity = SXE2_SRQ_POLARITY_INIT; ++ ++ srq->srq_size = info->srq_size; ++ ++ srq->wqe_size = srqshift; ++ ++ srq->max_srq_frag_cnt = info->max_srq_frag_cnt; ++ ++ SXE2_RING_INIT(srq->srq_ring, srq->srq_size); ++ ++ srq->wqe_size_multiplier = SXE2_SRQ_SIZE_MUL_SHIFT << srqshift; ++ ++ srq->srq_buf_size = info->srq_buf_size; ++ ++ return; ++} ++ ++int sxe2_uget_srq_num(struct ibv_srq *ibv_srq, uint32_t *srqn) ++{ ++ int ret = 0; ++ struct sxe2_usrq *usrq; ++ struct sxe2_rdma_ucontext *sctx; ++ ++ if (!ibv_srq || !srqn) { ++ ret = EINVAL; ++ SXE2_VERBS_LOG_ERROR("inv param, ibv_srq %p, srqn addr %p", ++ ibv_srq, srqn); ++ goto end; ++ } ++ ++ usrq = container_of(ibv_srq, struct sxe2_usrq, vsrq.srq); ++ sctx = to_sctx(ibv_srq->context); ++ if (!usrq || !sctx) { ++ ret = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF( ++ "SRQ:destroy srq attr illegal:usrq:%p, sctx:%p, ret:%d\n", usrq, ++ sctx, ret); ++ goto end; ++ } ++ ++ *srqn = usrq->srq_verbs.srq_id; ++ ++end: ++ return ret; ++} ++ ++static int sxe2_alloc_srq_buf(struct sxe2_rdma_ucontext *sctx, sxe2_buf_s *buf, size_t size) ++{ ++ int ret; ++ sxe2_alloc_type_e type; ++ sxe2_alloc_type_e default_type = SXE2_ALLOC_TYPE_ANON; ++ long page_size; ++ ++ sxe2_alloc_type_get(SXE2_SRQ_PREFIX, &type, default_type); ++ ++ page_size = sysconf(_SC_PAGE_SIZE); ++ if (page_size < 0) { ++ SXE2_VERBS_LOG_ERROR_BDF("get system page size failed."); ++ ret = EPERM; ++ goto end; ++ } ++ ret = sxe2_prefered_buf_alloc( ++ sctx, buf, size, page_size, type); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("cq buf alloc err ret (%d)", ret); ++ goto end; ++ } ++ ++ memset(buf->buf, 0, buf->length); ++ ++end: ++ return ret; ++} ++ ++static void sxe2_free_srq_buf(struct sxe2_rdma_ucontext *ctx, sxe2_buf_s *buf) ++{ ++ sxe2_actual_buf_free(ctx, buf); ++} ++ ++struct ibv_srq *sxe2_ucreate_srq(struct ibv_pd *pd, ++ struct ibv_srq_init_attr *attr) ++{ ++ struct sxe2_usrq *usrq; ++ struct ibv_srq *ibvsrq = NULL; ++ struct sxe2_rdma_ucontext *sctx; ++ struct sxe2_ucreate_srq cmd; ++ struct sxe2_ucreate_srq_resp resp; ++ struct sxe2_srq_verbs_init_info info = { }; ++ __u32 depth = 0; ++ __u8 shift = 0; ++ int ret = 0; ++ ++ if (!pd || !attr) { ++ errno = EINVAL; ++ SXE2_VERBS_LOG_ERROR( ++ "SRQ:inv create srq input param, pd:%p, attr:%p, ret:%d", pd, attr, ++ errno); ++ goto end; ++ } ++ ++ sctx = to_sctx(pd->context); ++ if (attr->attr.max_wr > sctx->uk_attrs.max_hw_srq_wr || ++ attr->attr.max_sge > sctx->uk_attrs.max_hw_wq_frags) { ++ errno = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF( ++ "SRQ:create srq attr illegal:attr max_wr:%#x, max_sge:%#x, " ++ "srq_limit:%#x, ret:%d\n", ++ attr->attr.max_wr, attr->attr.max_sge, attr->attr.srq_limit, errno); ++ goto end; ++ } ++ SXE2_VERBS_LOG_INFO_BDF( ++ "SRQ:create srq start, attr max_wr:%#x, max_sge:%#x, srq_limit:%#x", ++ attr->attr.max_wr, attr->attr.max_sge, attr->attr.srq_limit); ++ ++ usrq = calloc(1, sizeof(*usrq)); ++ if (!usrq) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF( ++ "SRQ:user mem alloc failed, mem size:%zu, ret:%d\n", sizeof(*usrq), ++ errno); ++ goto end; ++ } ++ ++ if (pthread_spin_init(&usrq->lock, PTHREAD_PROCESS_SHARED)) { ++ errno = EBUSY; ++ SXE2_VERBS_LOG_ERROR_BDF("SRQ:lock init failed, ret:%d\n", errno); ++ goto free_usrq; ++ } ++ ++ memset(&cmd, 0, sizeof(cmd)); ++ memset(&resp, 0, sizeof(resp)); ++ memset(&info, 0, sizeof(info)); ++ ibvsrq = &usrq->vsrq.srq; ++ ++ info.common_attrs = &sctx->uk_attrs; ++ info.max_srq_frag_cnt = attr->attr.max_sge; ++ ++ srq_ucal_wqe_shift(info.max_srq_frag_cnt, &shift); ++ ret = srq_uget_depth(info.common_attrs, attr->attr.max_wr, shift, &depth); ++ if (ret) { ++ errno = ret; ++ SXE2_VERBS_LOG_ERROR_BDF("SRQ:calculate srq depth failed, ret:%d\n", ++ ret); ++ goto free_mutex; ++ } ++ ++ info.srq_buf_size = depth * SXE2_VERBS_SRQE_MIN_SIZE; ++ ++ info.srq_size = depth >> shift; ++ ++ info.srq_wrid_array = calloc(info.srq_size, sizeof(*info.srq_wrid_array)); ++ if (!info.srq_wrid_array) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("SRQ:calloc srq_wrid_array failed, ret: %d\n", ++ errno); ++ goto free_mutex; ++ } ++ ++ info.srqe_array = calloc(info.srq_size, sizeof(*info.srqe_array)); ++ if (!info.srqe_array) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("SRQ:calloc srqe_array failed, ret: %d\n", ++ errno); ++ goto free_wrid_array; ++ } ++ ++ if (sxe2_alloc_srq_buf(sctx, &usrq->buf, info.srq_buf_size)) { ++ errno = ENOMEM; ++ goto free_srqe_array; ++ } ++ ++ info.srq = usrq->buf.buf; ++ if (!info.srq) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF("SRQ:buf alloc failed, size:%#x, ret:%d\n", ++ info.srq_buf_size, errno); ++ goto free_srqe_array; ++ } ++ memset(info.srq, 0, info.srq_buf_size); ++ ++ info.db_note = (__u64 *)sxe2_alloc_hw_buf(SXE2_SRQ_DB_NOTE_SIZE); ++ if (!info.db_note) { ++ errno = ENOMEM; ++ SXE2_VERBS_LOG_ERROR_BDF( ++ "SRQ:DB Note buf alloc failed, size:%#x, ret %d\n", ++ SXE2_SRQ_DB_NOTE_SIZE, errno); ++ goto free_srq_buf; ++ } ++ memset(info.db_note, 0, SXE2_SRQ_DB_NOTE_SIZE); ++ ++ pthread_spin_lock(&usrq->lock); ++ cmd.user_srq_buf = (__u64)((uintptr_t)info.srq); ++ cmd.user_srq_db_note = (__u64)((uintptr_t)info.db_note); ++ cmd.srq_cmpl_ctx = (__u64)((uintptr_t)(&usrq->srq_verbs)); ++ cmd.srq_buf_size = info.srq_buf_size; ++ cmd.srq_size = info.srq_size; ++ cmd.max_wr_cal = ((depth - SXE2_RQ_RSVD) >> shift); ++ ret = ibv_cmd_create_srq(pd, ibvsrq, attr, &cmd.ibv_cmd, sizeof(cmd), ++ &resp.ibv_resp, sizeof(resp)); ++ pthread_spin_unlock(&usrq->lock); ++ if (ret) { ++ errno = ret; ++ SXE2_VERBS_LOG_ERROR_BDF("SRQ:ibv cmd create srq failed, ret:%d\n", ++ ret); ++ goto free_srq_db_note; ++ } ++ ++ info.srq_id = resp.srq_id; ++ ++ srq_uinit_verbs_common(&usrq->srq_verbs, &info); ++ ++ SXE2_VERBS_LOG_INFO_BDF( ++ "SRQ(%#x):Create over, srq_buf_base addr:%p, srq_db_note addr:%p" ++ "srq_size:%#x, max_srq_frag_cnt:%#x, srq_buf_size:%#x\n", ++ usrq->srq_verbs.srq_id, usrq->srq_verbs.srq_base, ++ usrq->srq_verbs.db_note, usrq->srq_verbs.srq_size, ++ usrq->srq_verbs.max_srq_frag_cnt, usrq->srq_verbs.srq_buf_size); ++ ++ usrq->max_wr = (depth - SXE2_RQ_RSVD) >> shift; ++ usrq->max_sge = usrq->srq_verbs.max_srq_frag_cnt; ++ attr->attr.max_wr = usrq->max_wr; ++ attr->attr.max_sge = usrq->max_sge; ++ ++ goto end; ++ ++free_srq_db_note: ++ sxe2_free_hw_buf(info.db_note, SXE2_SRQ_DB_NOTE_SIZE); ++ info.db_note = NULL; ++free_srq_buf: ++ sxe2_free_srq_buf(sctx, &usrq->buf); ++ info.srq = NULL; ++free_srqe_array: ++ free(info.srqe_array); ++ info.srqe_array = NULL; ++free_wrid_array: ++ free(info.srq_wrid_array); ++ info.srq_wrid_array = NULL; ++free_mutex: ++ pthread_spin_destroy(&usrq->lock); ++free_usrq: ++ free(usrq); ++ usrq = NULL; ++ ibvsrq = NULL; ++end: ++ return ibvsrq; ++} ++ ++struct ibv_srq *sxe2_ucreate_srq_ex(struct ibv_context *context, ++ struct ibv_srq_init_attr_ex *attr) ++{ ++ struct ibv_srq *srq = NULL; ++ struct sxe2_rdma_ucontext *sctx; ++ ++ if (!context || !attr) { ++ SXE2_VERBS_LOG_ERROR("SRQ:Create inv param, context(%p) attr(%p)", ++ context, attr); ++ goto end; ++ } ++ ++ sctx = to_sctx(context); ++ SXE2_VERBS_LOG_INFO_BDF("SRQ:Create ex in, mask(%#x) srq_type(%#x) " ++ "max_sge(%#x) max_wr(%#x) srq_limit(%#x)", ++ attr->comp_mask, (__u32)attr->srq_type, ++ attr->attr.max_sge, attr->attr.max_wr, ++ attr->attr.srq_limit); ++ ++ if ((attr->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) && ++ (attr->srq_type == IBV_SRQT_BASIC)) { ++ srq = sxe2_ucreate_srq(attr->pd, (struct ibv_srq_init_attr *)attr); ++ } else { ++ SXE2_VERBS_LOG_ERROR_BDF("SRQ:Create inv mask, comp_mask(%d) srq_type(%d)", ++ attr->comp_mask, attr->srq_type); ++ } ++ ++end: ++ return srq; ++} ++ ++int sxe2_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, ++ int attr_mask) ++{ ++ int ret = 0; ++ struct sxe2_rdma_ucontext *sctx; ++ struct ibv_modify_srq cmd; ++ ++ if (!srq || !attr) { ++ ret = EINVAL; ++ SXE2_VERBS_LOG_ERROR( ++ "SRQ:inv modify srq input param, srq:%p, attr:%p, ret:%d", srq, ++ attr, ret); ++ goto end; ++ } ++ sctx = to_sctx(srq->context); ++ ++ ret = ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof(cmd)); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("SRQ:ibv modify srq kernel failed, ret:%d", ret); ++ goto end; ++ } ++ ++ SXE2_VERBS_LOG_INFO_BDF("SRQ:Modify attr_mask:%#x, srq_limit:%#x\n", ++ (__u32)attr_mask, attr->srq_limit); ++ ++end: ++ return ret; ++} ++ ++int sxe2_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr) ++{ ++ int ret = 0; ++ struct sxe2_rdma_ucontext *sctx; ++ struct ibv_query_srq cmd; ++ ++ if (!srq || !attr) { ++ ret = EINVAL; ++ SXE2_VERBS_LOG_ERROR( ++ "SRQ:inv modify srq input param, srq:%p, attr:%p, ret:%d", srq, ++ attr, ret); ++ goto end; ++ } ++ sctx = to_sctx(srq->context); ++ ++ ret = ibv_cmd_query_srq(srq, attr, &cmd, sizeof(cmd)); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("SRQ:ibv query srq kernel failed, ret:%d", ++ ret); ++ goto end; ++ } ++ ++ SXE2_VERBS_LOG_INFO_BDF("SRQ:Query max_wr:%#x, max_sge:%#x, limit:%#x\n", ++ attr->max_wr, attr->max_sge, attr->srq_limit); ++ ++end: ++ return ret; ++} ++ ++int sxe2_udestroy_srq(struct ibv_srq *ibv_srq) ++{ ++ struct sxe2_usrq *usrq; ++ struct sxe2_rdma_ucontext *sctx; ++ int ret = 0; ++ ++ if (!ibv_srq) { ++ ret = EINVAL; ++ SXE2_VERBS_LOG_ERROR( ++ "SRQ:inv destroy srq input param, ibv_srq:%p, ret:%d", ibv_srq, ++ ret); ++ goto end; ++ } ++ ++ usrq = container_of(ibv_srq, struct sxe2_usrq, vsrq.srq); ++ sctx = to_sctx(ibv_srq->context); ++ if (!usrq || !sctx) { ++ ret = EINVAL; ++ SXE2_VERBS_LOG_ERROR_BDF( ++ "SRQ:destroy srq attr illegal:usrq:%p, sctx:%p, ret:%d\n", usrq, ++ sctx, ret); ++ goto end; ++ } ++ ++ ret = ibv_cmd_destroy_srq(ibv_srq); ++ if (ret) { ++ SXE2_VERBS_LOG_ERROR_BDF("SRQ(%#x):destroy srq kernel failed: " ++ "srq_size:%#x, ret:%d\n", ++ usrq->srq_verbs.srq_id, ++ usrq->srq_verbs.srq_size, ret); ++ goto end; ++ } ++ ++ sxe2_free_srq_buf(sctx, &usrq->buf); ++ sxe2_free_hw_buf(usrq->srq_verbs.db_note, SXE2_DB_NOTE_SIZE); ++ usrq->srq_verbs.db_note = NULL; ++ free(usrq->srq_verbs.srqe_array); ++ usrq->srq_verbs.srqe_array = NULL; ++ free(usrq->srq_verbs.srq_wrid_array); ++ usrq->srq_verbs.srq_wrid_array = NULL; ++ ++ pthread_spin_destroy(&usrq->lock); ++ ++ free(usrq); ++ usrq = NULL; ++ ++end: ++ return ret; ++} +diff -Naur rdma-core-48.0.bak/providers/sxe2/sxe2-abi.h rdma-core-48.0/providers/sxe2/sxe2-abi.h +--- rdma-core-48.0.bak/providers/sxe2/sxe2-abi.h 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/sxe2-abi.h 2026-05-27 17:08:05.580104739 +0800 +@@ -0,0 +1,108 @@ ++#ifndef __SXE2_ABI_STRUCT_H__ ++#define __SXE2_ABI_STRUCT_H__ ++ ++#include ++ ++#define SXE2_RDMA_ABI_VER (5) ++ ++struct sxe2_create_ah_resp { ++ __u32 ah_id; ++ __u8 rsvd[4]; ++}; ++ ++struct sxe2_modify_qp_req { ++ __u8 sq_flush; ++ __u8 rq_flush; ++ __u8 llwqe_enable; ++ __u8 new_page_alloc; ++ __u32 llwqe_page_index; ++}; ++ ++struct sxe2_modify_qp_resp { ++ __u8 rd_fence_rate; ++ __u8 rsvd[3]; ++ __u32 db_mmap_size; ++ __u32 db_page_id; ++ __u32 rsvd1; ++ __u64 db_mmap_offset; ++}; ++ ++struct sxe2_create_qp_req { ++ __u32 sq_depth; ++ __u32 rq_depth; ++ __u8 sq_shift; ++ __u8 rq_shift; ++ __u8 rsvd[6]; ++ __u64 user_wqe_bufs; ++ __u64 doorbell_note; ++ __u64 user_compl_ctx; ++}; ++ ++struct sxe2_create_cq_req { ++ __aligned_u64 user_cq_buf; ++ __aligned_u64 user_cq_db_note; ++}; ++ ++struct sxe2_create_cq_resp { ++ __u32 cq_id; ++ __u32 ncqe; ++}; ++ ++struct sxe2_alloc_pd_resp { ++ __u32 pd_id; ++ __u8 rsvd[4]; ++}; ++ ++struct sxe2_create_qp_resp { ++ __u32 qpn; ++ __u32 qp_caps; ++}; ++ ++struct sxe2_alloc_ucontext_req { ++ __u32 rsvd32; ++ __u8 userspace_ver; ++ __u8 rsvd8[3]; ++ __aligned_u64 comp_mask; ++}; ++ ++struct sxe2_alloc_ucontext_resp { ++ __u32 max_pds; ++ __u32 max_qps; ++ __u32 wq_size; ++ __u8 kernel_ver; ++ __u8 rsvd[3]; ++ __aligned_u64 feature_flags; ++ __aligned_u64 db_mmap_key; ++ __u32 max_hw_wq_frags; ++ __u32 max_hw_read_sges; ++ __u32 max_hw_inline; ++ __u32 max_hw_rq_quanta; ++ __u32 max_hw_wq_quanta; ++ __u32 min_hw_cq_size; ++ __u32 max_hw_cq_size; ++ __u16 max_hw_sq_chunk; ++ __u8 hw_rev; ++ __u8 is_pf; ++ __aligned_u64 comp_mask; ++ __u16 min_hw_wq_size; ++ __u32 max_db; ++ __u8 rsvd3[2]; ++ __u8 bdf[16]; ++ __u32 max_hw_srq_quanta; ++ __u32 max_hw_srq_wr; ++}; ++ ++struct sxe2_create_srq_req { ++ __aligned_u64 user_srq_buf; ++ __aligned_u64 user_srq_db_note; ++ __aligned_u64 srq_cmpl_ctx; ++ __u32 srq_buf_size; ++ __u32 srq_size; ++ __u32 max_wr_cal; ++}; ++ ++struct sxe2_create_srq_resp { ++ __u32 srq_id; ++}; ++ ++#endif +diff -Naur rdma-core-48.0.bak/providers/sxe2/sxe2_abi.h rdma-core-48.0/providers/sxe2/sxe2_abi.h +--- rdma-core-48.0.bak/providers/sxe2/sxe2_abi.h 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/sxe2_abi.h 2026-05-27 17:08:05.580104739 +0800 +@@ -0,0 +1,29 @@ ++#ifndef __SXE2_ABI_H__ ++#define __SXE2_ABI_H__ ++ ++#include ++#include ++#include ++#include "sxe2_common.h" ++ ++#define SXE2_IB_UVERBS_ABI_VERSION 1 ++DECLARE_DRV_CMD(sxe2_ucreate_ah, IB_USER_VERBS_CMD_CREATE_AH, ++ empty, sxe2_create_ah_resp); ++DECLARE_DRV_CMD(sxe2_umodify_qp, IB_USER_VERBS_EX_CMD_MODIFY_QP, ++ sxe2_modify_qp_req, sxe2_modify_qp_resp); ++DECLARE_DRV_CMD(sxe2_ucreate_cq, IB_USER_VERBS_CMD_CREATE_CQ, ++ sxe2_create_cq_req, sxe2_create_cq_resp); ++DECLARE_DRV_CMD(sxe2_ucreate_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ, ++ sxe2_create_cq_req, sxe2_create_cq_resp); ++DECLARE_DRV_CMD(sxe2_ualloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, ++ empty, sxe2_alloc_pd_resp); ++DECLARE_DRV_CMD(sxe2_ucreate_qp, IB_USER_VERBS_CMD_CREATE_QP, ++ sxe2_create_qp_req, sxe2_create_qp_resp); ++DECLARE_DRV_CMD(sxe2_get_context, IB_USER_VERBS_CMD_GET_CONTEXT, ++ sxe2_alloc_ucontext_req, sxe2_alloc_ucontext_resp); ++DECLARE_DRV_CMD(sxe2_ucreate_srq, IB_USER_VERBS_CMD_CREATE_SRQ, ++ sxe2_create_srq_req, sxe2_create_srq_resp); ++DECLARE_DRV_CMD(sxe2_ucreate_srq_ex, IB_USER_VERBS_CMD_CREATE_XSRQ, ++ sxe2_create_srq_req, sxe2_create_srq_resp); ++ ++#endif +diff -Naur rdma-core-48.0.bak/providers/sxe2/sxe2_common.c rdma-core-48.0/providers/sxe2/sxe2_common.c +--- rdma-core-48.0.bak/providers/sxe2/sxe2_common.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/sxe2_common.c 2026-05-27 17:08:05.580104739 +0800 +@@ -0,0 +1,37 @@ ++#include "sxe2_common.h" ++ ++void *sxe2_alloc_hw_buf(size_t size) ++{ ++ void *buf; ++ long page_size = sysconf(_SC_PAGE_SIZE); ++ ++ buf = memalign(page_size, size); ++ if (!buf) { ++ goto end; ++ } ++ ++ if (ibv_dontfork_range(buf, size)) { ++ free(buf); ++ buf = NULL; ++ goto end; ++ } ++end: ++ return buf; ++} ++ ++void sxe2_free_hw_buf(void *buf, size_t size) ++{ ++ ibv_dofork_range(buf, size); ++ free(buf); ++} ++ ++__u32 sxe2_round_up_pow_2(__u32 value) ++{ ++ int count = 1; ++ ++ for (value--; count <= 16; count *= 2) { ++ value |= value >> count; ++ } ++ return ++value; ++} ++ +diff -Naur rdma-core-48.0.bak/providers/sxe2/sxe2_common.h rdma-core-48.0/providers/sxe2/sxe2_common.h +--- rdma-core-48.0.bak/providers/sxe2/sxe2_common.h 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/sxe2_common.h 2026-05-27 17:08:05.581104746 +0800 +@@ -0,0 +1,717 @@ ++#ifndef __SXE2_COMMON_H__ ++#define __SXE2_COMMON_H__ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "util/compiler.h" ++#include ++#include "util/udma_barrier.h" ++#include "util/util.h" ++#include "ccan/minmax.h" ++#include ++#include "sxe2_abi.h" ++ ++#define SXE2_RDMA_BDF_BUFF_LEN 16 ++#define SXE2_WQE_SIZE 4 ++#define SXE2_QP_WQE_MAX_QUANTA 8 ++#define SXE2_QP_WQE_MIN_QUANTA 1 ++#define SXE2_CQE_SIZE 8 ++#define SXE2_U_MINCQ_SIZE (4) ++#define SXE2_DB_NOTE_SIZE (64) ++#define SXE2_DB_PAGE_SIZE (4096) ++#define SXE2_FEATURE_CQ_RESIZE (2ULL) ++#define SXE2_SRQE_BUSY 1 ++#define SXE2_SRQE_FREE 0 ++ ++#define SQ_CQ 1 ++#define RQ_CQ 2 ++ ++#define SXE2_WQE_QUANTA_ODD_NUMBER 0x1 ++ ++#define SXE2_RQ_WQE_HEAD_OFFSET 32 ++#define SXE2_RQ_WQE_FRAG_OFFSET 16 ++ ++#define SXE2_SINGLE_THREADED "SXE2_SINGLE_THREADED" ++#define SXE2_LL_MODE "SXE2_LL_MODE" ++#define SXE2_TOTAL_LL_WQE "SXE2_TOTAL_LL_WQE" ++#define SXE2_DEDICATED_LL_WQE "SXE2_DEDICATED_LL_WQE" ++ ++#define sxe2_handle void* ++#define sxe2_qp_handle sxe2_handle ++ ++#define SXE2_CQ_PREFIX "SXE2_CQ" ++#define SXE2_QP_PREFIX "SXE2_QP" ++#define SXE2_SRQ_PREFIX "SXE2_SRQ" ++ ++#if defined(SXE2_CFG_DEBUG) && defined(SXE2_SUPPORT_INJECT) ++#define SXE2_INJECT_LLWQE_ERR "SXE2_INJECT_LLWQE_ERR" ++#endif ++ ++enum sxe2_supported_wc_flags { ++ SXE2_CQ_SUPPORTED_WC_FLAGS = IBV_WC_EX_WITH_BYTE_LEN ++ | IBV_WC_EX_WITH_IMM ++ | IBV_WC_EX_WITH_QP_NUM ++ | IBV_WC_EX_WITH_SRC_QP ++ | IBV_WC_EX_WITH_SLID ++ | IBV_WC_EX_WITH_SL ++ | IBV_WC_EX_WITH_DLID_PATH_BITS ++ | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK ++ | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP, ++}; ++ ++enum inline_data_size { ++ MAX_INLINE_DATA_8 = 8, ++ MAX_INLINE_DATA_39 = 39, ++ MAX_INLINE_DATA_101 = 101, ++}; ++ ++enum sge_size { ++ MAX_SGE_SIZE_0 = 0, ++ MAX_SGE_SIZE_1, ++ MAX_SGE_SIZE_2, ++ MAX_SGE_SIZE_3, ++ MAX_SGE_SIZE_4, ++ MAX_SGE_SIZE_5, ++ MAX_SGE_SIZE_6, ++ MAX_SGE_SIZE_7, ++ MAX_SGE_SIZE_8, ++ MAX_SGE_SIZE_9, ++ MAX_SGE_SIZE_10, ++ MAX_SGE_SIZE_11, ++ MAX_SGE_SIZE_12, ++ MAX_SGE_SIZE_13, ++ MAX_SGE_SIZE_14, ++}; ++ ++enum wqe_size_shift { ++ WQE_SIZE_32BYTE = 0, ++ WQE_SIZE_64BYTE, ++ WQE_SIZE_128BYTE, ++ WQE_SIZE_256BYTE, ++}; ++ ++enum sxe2_verbs_db_page_type { ++ SXE2_VERBS_DB_PAGE_TYPE_LLWQE = 0x0, ++ SXE2_VERBS_DB_PAGE_TYPE_NO_LLWQE = 0x1, ++}; ++ ++enum sxe2_verbs_db_llwqe_page_type { ++ SXE2_VERBS_DB_PAGE_TYPE_DEDICATED = 0x0, ++ SXE2_VERBS_DB_PAGE_TYPE_SHARED = 0x1, ++}; ++ ++enum sxe2_disp_id { ++ SXE2_RDMA_SEND = 0, ++ SXE2_RDMA_SEND_INLINE = 1, ++ SXE2_RDMA_WRITE = 2, ++ SXE2_RDMA_WRITE_INLINE = 3, ++ SXE2_RDMA_READ = 4, ++ SXE2_RDMA_BIND_MW = 5, ++ SXE2_RDMA_LOCAL_INV = 6, ++ SXE2_RDMA_FAST_REG_MR = 7, ++ SXE2_RDMA_MAX_ID ++}; ++ ++typedef enum sxe2_alloc_type { ++ SXE2_ALLOC_TYPE_ANON, ++ SXE2_ALLOC_TYPE_HUGE, ++ SXE2_ALLOC_TYPE_PREFER_HUGE, ++} sxe2_alloc_type_e; ++ ++typedef struct sxe2_hugetlb_mem { ++ int shmid; ++ __u32 reserved; ++ void *shmaddr; ++ unsigned long *bitmap; ++ unsigned long bmp_size; ++ struct list_node entry; ++} sxe2_hugetlb_mem_s; ++ ++typedef struct sxe2_buf { ++ void *buf; ++ size_t length; ++ __u64 base; ++ __u32 reserved; ++ sxe2_hugetlb_mem_s *hmem; ++ sxe2_alloc_type_e type; ++ __u64 resource_type; ++ size_t req_alignment; ++} sxe2_buf_s; ++ ++#define SXE2_RING_INIT(_ring, _size) \ ++ { \ ++ (_ring).head = 0; \ ++ (_ring).tail = 0; \ ++ (_ring).size = (_size); \ ++ } ++ ++struct sxe2_ring { ++ __u32 head; ++ __u32 tail; ++ __u32 size; ++}; ++ ++struct sxe2_sq_common_wr_trk_info { ++ __u64 wrid; ++ __u32 wr_len; ++ __u16 quanta; ++ __u8 reserved[2]; ++}; ++ ++struct sxe2_qp_quanta { ++ __le64 elem[SXE2_WQE_SIZE]; ++}; ++ ++struct sxe2_common_attrs { ++ __u64 feature_flags; ++ __u32 max_hw_wq_frags; ++ __u32 max_hw_read_sges; ++ __u32 max_hw_inline; ++ __u32 max_hw_rq_quanta; ++ __u32 max_hw_wq_quanta; ++ __u32 min_hw_cq_size; ++ __u32 max_hw_cq_size; ++ __u16 max_hw_push_len; ++ __u16 max_hw_sq_chunk; ++ __u16 min_hw_wq_size; ++ __u8 hw_rev; ++ __u8 rsv; ++ __u32 max_hw_srq_quanta; ++ __u32 max_hw_srq_wr; ++}; ++ ++typedef struct sxe2_qp_common_init_info { ++ struct sxe2_qp_quanta *sq; ++ struct sxe2_qp_quanta *rq; ++ struct sxe2_srq_verbs *srq; ++ struct sxe2_common_attrs *common_attrs; ++ __u32 *qp_db_no_llwqe; ++ __u32 *doorbell_note; ++ struct sxe2_sq_common_wr_trk_info *sq_wrtrk_array; ++ __u64 *rq_wrid_array; ++ __u32 qpn; ++ __u32 qp_caps; ++ __u32 sq_size; ++ __u32 rq_size; ++ __u32 max_sq_sge_cnt; ++ __u32 max_rq_sge_cnt; ++ __u32 max_inline_data; ++ __u32 sq_depth; ++ __u32 rq_depth; ++ __u8 qp_type; ++ __u8 sq_shift; ++ __u8 rq_shift; ++ bool legacy_mode; ++} sxe2_qp_common_init_info_s; ++ ++struct sxe2_io_info { ++ __u32 total_sqe_cnt; ++ __u32 total_rqe_cnt; ++ __u32 finished_sqe_cnt; ++ __u32 finished_rqe_cnt; ++ __u32 finished_rqe_insrq_cnt; ++ __u32 flushed_sq_cnt; ++ __u32 flushed_rq_cnt; ++ __u32 cleaned_sq_cnt; ++ __u32 cleaned_rq_cnt; ++ __u32 cleaned_flushsq_cnt; ++ __u32 cleaned_flushrq_cnt; ++ __u32 total_signal_cnt; ++ __u32 send_cnt; ++ __u32 send_inv_cnt; ++ __u32 read_cnt; ++ __u32 write_cnt; ++ __u32 local_inv_cnt; ++ __u32 bind_mw_cnt; ++ __u32 fast_regmr_cnt; ++ __u64 last_send_sqwrid; ++ __u64 last_rcvd_sqwrid; ++ __u64 last_send_rqwrid; ++ __u64 last_rcvd_rqwrid; ++}; ++ ++typedef struct sxe2_qp_common { ++ struct sxe2_qp_quanta *sq_base; ++ struct sxe2_qp_quanta *rq_base; ++ struct sxe2_srq_verbs *srq; ++ struct sxe2_common_attrs *common_attrs; ++ __u32 *qp_db_no_llwqe; ++ struct sxe2_sq_common_wr_trk_info *sq_wrtrk_array; ++ __u64 *rq_wrid_array; ++ __u32 *doorbell_note; ++ struct sxe2_verbs_llwqe *verbs_llwqe; ++ __le32 *push_db; ++ __u64 *push_wqe; ++ struct sxe2_ring sq_ring; ++ struct sxe2_ring rq_ring; ++ struct sxe2_ring initial_ring; ++ __u32 qpn; ++ __u32 qp_caps; ++ __u32 sq_size; ++ __u32 rq_size; ++ __u32 max_sq_sge_cnt; ++ __u32 max_rq_sge_cnt; ++ __u32 max_inline_data; ++ __u8 swqe_polarity; ++ __u8 rwqe_polarity; ++ __u8 rq_wqe_size; ++ __u8 rq_wqe_size_multiplier; ++ __u8 rd_fence_rate; ++ __u16 ord_cnt; ++ bool llwqe_enable : 1; ++ bool llwqe_mode : 1; ++ bool push_dropped : 1; ++ bool destroy_pending : 1; ++ void *back_qp; ++ pthread_spinlock_t *lock; ++ struct sxe2_io_info statistics; ++} sxe2_qp_common_s; ++ ++typedef struct sxe2_uqp { ++ struct verbs_qp verbs_qp; ++ struct sxe2_ucq *send_cq; ++ struct sxe2_ucq *recv_cq; ++ size_t buf_size; ++ pthread_spinlock_t lock; ++ __u32 sq_sig_all; ++ sxe2_qp_common_s qp; ++ enum ibv_qp_type qp_type; ++ int err; ++ __u32 wqe_idx; ++ __le64 *cur_wqe; ++ __u16 quanta; ++ __le64 *wqe_hdr; ++ unsigned int rb_sq_head; ++ enum sxe2_disp_id funid; ++ struct sxe2_qp_quanta wqebuf[SXE2_QP_WQE_MIN_QUANTA]; ++ sxe2_buf_s buf; ++} sxe2_uqp_s; ++ ++struct sxe2_cqe { ++ __le64 buf[SXE2_CQE_SIZE]; ++}; ++ ++struct sxe2_cq_db_note { ++ __le64 buf[SXE2_CQE_SIZE]; ++}; ++ ++struct sxe2_cqe_info { ++ __u64 wr_id; ++ __u32 bytes; ++ union { ++ struct { ++ __u64 payload_len : 32; ++ __u64 packet_seq : 24; ++ __u64 rsvd1 : 8; ++ __u64 qpc; ++ __u64 l_r_key : 32; ++ __u64 qp_id : 18; ++ __u64 rsvd2 : 14; ++ __u64 minor_err : 16; ++ __u64 major_err : 16; ++ __u64 wq_desc_idx : 15; ++ __u64 rsvd3 : 3; ++ __u64 extended_cqe : 1; ++ __u64 push_dropped : 1; ++ __u64 ipv4 : 1; ++ __u64 stag_or_lrkey : 1; ++ __u64 solicited_evt : 1; ++ __u64 error : 1; ++ __u64 op : 6; ++ __u64 qp_type : 1; ++ __u64 rsvd4 : 1; ++ __u64 imme_data : 32; ++ __u64 srqn : 18; ++ __u64 is_srq : 1; ++ __u64 rsvd5 : 13; ++ __u64 cqe_timestamp; ++ __u64 ud_smac : 48; ++ __u64 ud_vlan_tag : 16; ++ __u64 ud_src_qpn : 24; ++ __u64 rsvd6 : 8; ++ __u64 rsvd7 : 6; ++ __u64 vsi_index : 10; ++ __u64 rsvd8 : 12; ++ __u64 vlan_tag_flag : 1; ++ __u64 ud_smac_valid : 1; ++ __u64 imm_data_flag : 1; ++ __u64 cqe_valid : 1; ++ } field; ++ __u64 buf[SXE2_CQE_SIZE]; ++ }info; ++}; ++ ++struct sxe2_cq_uk_init_info { ++ __le64 *cqe_alloc_db; ++ struct sxe2_cqe *cq_base; ++ __le32 *doorbell_note; ++ __u32 cq_id; ++ __u32 ncqe; ++}; ++ ++struct sxe2_cq_uk { ++ struct sxe2_cqe *cq_base; ++ __le64 *cqe_alloc_db; ++ __le32 *doorbell_note; ++ __u32 arm_sn; ++ __u32 cq_id; ++ __u32 ncqe; ++ struct sxe2_ring cq_ring; ++ __u8 polarity; ++}; ++ ++struct sxe2_ucq { ++ struct verbs_cq verbs_cq; ++ pthread_spinlock_t lock; ++ sxe2_buf_s buf; ++ size_t buf_size; ++ uint32_t comp_vector; ++ uint32_t report_rtt; ++ struct sxe2_uqp *uqp; ++ struct sxe2_cq_uk cq; ++ struct sxe2_cqe_info cur_cqe; ++}; ++ ++struct sxe2_upd { ++ struct ibv_pd ibv_pd; ++ void *arm_cq_page; ++ void *arm_cq; ++ uint32_t pd_id; ++}; ++ ++struct sxe2_srq_verbs { ++ struct sxe2_qp_quanta *srq_base; ++ struct sxe2_common_attrs *common_attrs; ++ __u64 *db_note; ++ struct sxe2_ring srq_ring; ++ __u32 srq_id; ++ __u32 srq_size; ++ __u32 max_srq_frag_cnt; ++ __u8 srq_polarity; ++ __u8 wqe_size; ++ __u8 wqe_size_multiplier; ++ __u32 srq_buf_size; ++ __u64 *srq_wrid_array; ++ __u8 *srqe_array; ++}; ++ ++struct sxe2_srq_verbs_init_info { ++ struct sxe2_qp_quanta *srq; ++ struct sxe2_common_attrs *common_attrs; ++ __le64 *db_note; ++ __u64 *srq_wrid_array; ++ __u8 *srqe_array; ++ __u32 srq_id; ++ __u32 srq_size; ++ __u32 max_srq_frag_cnt; ++ __u32 srq_buf_size; ++}; ++ ++struct sxe2_usrq { ++ struct verbs_srq vsrq; ++ struct sxe2_srq_verbs srq_verbs; ++ pthread_spinlock_t lock; ++ __u32 max_wr; ++ __u32 max_sge; ++ sxe2_buf_s buf; ++}; ++ ++struct sxe2_rdma_hw_attrs { ++ struct sxe2_common_attrs uk_attrs; ++ __u64 max_hw_outbound_msg_size; ++ __u64 max_hw_inbound_msg_size; ++ __u64 max_mr_size; ++ __u32 min_hw_qp_id; ++ __u32 min_hw_aeq_size; ++ __u32 max_hw_aeq_size; ++ __u32 min_hw_ceq_size; ++ __u32 max_hw_ceq_size; ++ __u32 max_hw_device_pages; ++ __u32 max_hw_vf_fpm_id; ++ __u32 first_hw_vf_fpm_id; ++ __u32 max_rra; ++ __u32 max_sra; ++ __u32 max_hw_wqes; ++ __u32 max_hw_pds; ++ __u32 max_hw_ena_vf_count; ++ __u32 max_qp_wr; ++ __u32 max_pe_ready_count; ++ __u32 max_done_count; ++ __u32 max_sleep_count; ++ __u32 max_mq_compl_wait_time_ms; ++}; ++ ++typedef struct sxe2_spinlock { ++ pthread_spinlock_t lock; ++ int in_use; ++ int need_lock; ++} sxe2_spinlock_s; ++ ++struct sxe2_rdma_ucontext { ++ struct verbs_context ibv_ctx; ++ int abi_ver; ++ struct sxe2_upd *sxe2_upd; ++ struct sxe2_common_attrs uk_attrs; ++ FILE *dbg_fp; ++ char bdf[SXE2_RDMA_BDF_BUFF_LEN]; ++ int ll_mode; ++ __u32 tot_ll_wqes; ++ __u32 ded_ll_wqes; ++ __u32 shared_ll_wqes; ++ struct list_head shared_llwqe_list; ++ struct list_head dedicated_llwqe_list; ++ struct list_head mmap_page_addr_list; ++ __u16 alloc_dedicated_llwqes; ++ __u16 alloc_shared_llwqes; ++ pthread_mutex_t alloc_llwqe_mutex; ++ void *qp_db_no_llwqe; ++ void *cq_db_arm; ++ void *cq_db_info; ++ __u32 enable_io_log; ++ __u32 log_level; ++ sxe2_spinlock_s hugetlb_lock; ++ struct list_head hugetlb_list; ++}; ++ ++struct sxe2_umr { ++ struct verbs_mr vmr; ++ int acc_flags; ++}; ++ ++struct sxe2_uah { ++ struct ibv_ah ibv_ah; ++ __u32 ah_id; ++}; ++ ++struct sxe2_verbs_llwqe { ++ void *wqe_addr; ++ void *db_addr; ++ pthread_spinlock_t lock; ++ __u32 wqe_buf_size; ++ __u32 num_llwqe; ++ int need_lock; ++ void *db_page_addr; ++ off_t db_mmap_offset; ++ __u8 mmaped_entry : 1; ++ __u8 no_ll_mode : 1; ++ __u8 qp_dedicated : 1; ++ __u8 qp_shared : 1; ++ __u32 count; ++ struct list_node list_entry; ++ __u32 db_handle; ++ __u32 db_mmap_size; ++ __u32 db_page_id; ++}; ++ ++struct sxe2_db_mmap_db_page_addr { ++ void* db_page_addr; ++ __u32 mmap_size; ++ __u32 db_page_id; ++ struct list_node list_entry; ++}; ++struct ibv_cq *sxe2_ucreate_cq(struct ibv_context *context, int cqe, ++ struct ibv_comp_channel *channel, int comp_vector); ++struct ibv_cq_ex *sxe2_ucreate_cq_ex(struct ibv_context *context, ++ struct ibv_cq_init_attr_ex *cq_attr); ++int sxe2_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr); ++int sxe2_udestroy_cq(struct ibv_cq *cq); ++struct ibv_pd *sxe2_ualloc_pd(struct ibv_context *context); ++int sxe2_ufree_pd(struct ibv_pd *pd); ++struct ibv_mr *sxe2_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, ++ uint64_t hca_va, int acc); ++int sxe2_udereg_mr(struct verbs_mr *vmr); ++int sxe2_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, ++ void *addr, size_t length, int access); ++struct ibv_mr *sxe2_ureg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, ++ size_t length, uint64_t iova, int fd, ++ int acc); ++struct ibv_mw *sxe2_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type); ++struct ibv_ah *sxe2_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr); ++int sxe2_udestroy_ah(struct ibv_ah *ibah); ++int sxe2_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, ++ struct ibv_send_wr **bad_wr); ++int sxe2_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, ++ struct ibv_recv_wr **bad_wr); ++int sxe2_upoll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc); ++int sxe2_uarm_cq(struct ibv_cq *cq, int solicited); ++void sxe2_ucq_event(struct ibv_cq *cq); ++int sxe2_upost_srq_recv(struct ibv_srq *ibv_srq, ++ struct ibv_recv_wr *ibv_wr, struct ibv_recv_wr **bad_wr); ++void sxe2_wr_start(struct ibv_qp_ex *qp_ex); ++int sxe2_wr_complete(struct ibv_qp_ex *qp_ex); ++void sxe2_wr_abort(struct ibv_qp_ex *qp_ex); ++void sxe2_wr_send_rc_ud(struct ibv_qp_ex *qp_ex); ++void sxe2_wr_send_imm_rc_ud(struct ibv_qp_ex *qp_ex, __be32 imm_data); ++void sxe2_wr_send_inv_rc(struct ibv_qp_ex *qp_ex, uint32_t invalidate_rkey); ++void sxe2_wr_rdma_read_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, ++ uint64_t remote_addr); ++void sxe2_wr_rdma_write_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, ++ uint64_t remote_addr); ++void sxe2_wr_rdma_write_imm_rc(struct ibv_qp_ex *qp_ex, uint32_t rkey, ++ uint64_t remote_addr, __be32 imm_data); ++void sxe2_wr_set_ud_addr(struct ibv_qp_ex *qp_ex, struct ibv_ah *ah, ++ uint32_t remote_qpn, uint32_t remote_qkey); ++void sxe2_wr_set_inline_data_rc_ud(struct ibv_qp_ex *qp_ex, ++ void *addr, size_t length); ++void sxe2_wr_set_inline_data_list_rc_ud(struct ibv_qp_ex *qp_ex, ++ size_t num_buf, const struct ibv_data_buf *buf_list); ++void sxe2_wr_set_sge_rc_ud(struct ibv_qp_ex *qp_ex, uint32_t lkey, ++ uint64_t addr, uint32_t length); ++void sxe2_wr_set_sge_list_rc_ud(struct ibv_qp_ex *qp_ex, ++ size_t num_sge, const struct ibv_sge *sg_list); ++void sxe2_wr_bind_mw_rc(struct ibv_qp_ex *qp_ex, struct ibv_mw *mw, ++ uint32_t rkey, const struct ibv_mw_bind_info *bind_info); ++void sxe2_wr_local_inv_rc(struct ibv_qp_ex *qp_ex, uint32_t invalidate_rkey); ++ ++static inline struct sxe2_rdma_ucontext *to_sctx(struct ibv_context *ibctx) ++{ ++ return container_of(ibctx, struct sxe2_rdma_ucontext, ibv_ctx.context); ++} ++ ++int sxe2_uget_single_threaded_env(void); ++ ++int sxe2_uget_ll_mode(void); ++ ++int sxe2_uget_tot_llwqe(struct sxe2_rdma_ucontext *sctx, ++ struct sxe2_get_context_resp *resp); ++ ++int sxe2_uget_ded_llwqe(struct sxe2_rdma_ucontext *sctx, int total_llwqe); ++ ++int sxe2_uinit_doorbell(struct sxe2_rdma_ucontext *sctx, ++ struct sxe2_get_context_resp *resp); ++ ++void sxe2_ufree_doorbell(struct ibv_context *context); ++ ++ ++struct sxe2_verbs_llwqe *alloc_db_page_and_get_qp_llwqe(struct ibv_context *context, ++ __u32 db_mmap_size, ++ __u32 db_page_id, ++ __u64 db_mmap_offset, ++ __u8 alloc_page_type); ++struct sxe2_verbs_llwqe *db_uget_qp_llwqe(struct ibv_context *context, ++ bool *need_alloc_page, ++ __u8 *alloc_page_type); ++ ++struct sxe2_verbs_llwqe * ++db_ualloc_page_and_llwqes(struct ibv_context *context, bool dedicated, ++ __u32 db_mmap_size, ++ __u32 db_page_id, ++ __u64 db_mmap_offset); ++ ++void db_uput_qp_llwqe(struct sxe2_rdma_ucontext *sctx, ++ struct sxe2_verbs_llwqe *llwqe); ++ ++void *sxe2_alloc_hw_buf(size_t size); ++ ++void sxe2_free_hw_buf(void *buf, size_t size); ++ ++__u32 sxe2_round_up_pow_2(__u32 value); ++ ++struct ibv_qp *sxe2_ucreate_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); ++ ++struct ibv_qp *sxe2_ucreate_qp_ex(struct ibv_context *context, ++ struct ibv_qp_init_attr_ex *attr_ex); ++ ++int sxe2_udestroy_qp(struct ibv_qp *ibqp); ++ ++int sxe2_umodify_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask); ++ ++int sxe2_uquery_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask, ++ struct ibv_qp_init_attr *init_attr); ++int sxe2_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr); ++ ++int sxe2_next_poll(struct ibv_cq_ex *ibvcq_ex); ++void sxe2_end_poll(struct ibv_cq_ex *ibvcq_ex); ++uint64_t sxe2_wc_read_completion_ts(struct ibv_cq_ex *ibvcq_ex); ++uint64_t sxe2_wc_read_completion_wallclock_ns(struct ibv_cq_ex *ibvcq_ex); ++enum ibv_wc_opcode sxe2_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex); ++uint32_t sxe2_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex); ++unsigned int sxe2_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex); ++uint32_t sxe2_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex); ++__be32 sxe2_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex); ++uint32_t sxe2_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex); ++uint32_t sxe2_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex); ++uint32_t sxe2_wc_read_slid(struct ibv_cq_ex *ibvcq_ex); ++uint8_t sxe2_wc_read_sl(struct ibv_cq_ex *ibvcq_ex); ++uint8_t sxe2_wc_read_dlid_path_bits(struct ibv_cq_ex *ibvcq_ex); ++ ++int sxe2_uget_srq_num(struct ibv_srq *ibv_srq, uint32_t *srqn); ++ ++struct ibv_srq *sxe2_ucreate_srq(struct ibv_pd *pd, ++ struct ibv_srq_init_attr *attr); ++ ++struct ibv_srq *sxe2_ucreate_srq_ex(struct ibv_context *context, ++ struct ibv_srq_init_attr_ex *attr); ++ ++int sxe2_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, ++ int attr_mask); ++ ++int sxe2_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr); ++ ++int sxe2_udestroy_srq(struct ibv_srq *ibv_srq); ++ ++static inline struct sxe2_usrq *to_usrq(struct ibv_srq *ibv_srq) ++{ ++ return container_of(ibv_srq, struct sxe2_usrq, vsrq.srq); ++} ++ ++int sxe2_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, __u16 lid); ++ ++int sxe2_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, __u16 lid); ++ ++static inline int sxe2_spin_lock(sxe2_spinlock_s *lock) ++{ ++ int ret = 0; ++ if (lock->need_lock) { ++ ret = pthread_spin_lock(&lock->lock); ++ goto end; ++ } ++ ++ if (unlikely(lock->in_use)) { ++ fprintf(stderr, "*** ERROR: multithreading violation ***\n" ++ "You are running a multithreaded application but\n" ++ "you set SXE2_SINGLE_THREADED=1. Please unset it.\n"); ++ abort(); ++ } else { ++ lock->in_use = 1; ++ atomic_thread_fence(memory_order_acq_rel); ++ } ++end: ++ return ret; ++} ++ ++static inline int sxe2_spin_unlock(sxe2_spinlock_s *lock) ++{ ++ int ret = 0; ++ ++ if (lock->need_lock) { ++ ret = pthread_spin_unlock(&lock->lock); ++ goto end; ++ } ++ ++ lock->in_use = 0; ++end: ++ return ret; ++} ++ ++static inline int sxe2_spinlock_init(sxe2_spinlock_s *lock, int need_lock) ++{ ++ lock->in_use = 0; ++ lock->need_lock = need_lock; ++ ++ return pthread_spin_init(&lock->lock, PTHREAD_PROCESS_SHARED); ++} ++ ++static inline int sxe2_spinlock_destroy(sxe2_spinlock_s *lock) ++{ ++ return pthread_spin_destroy(&lock->lock); ++} ++ ++#endif +diff -Naur rdma-core-48.0.bak/providers/sxe2/verbs.c rdma-core-48.0/providers/sxe2/verbs.c +--- rdma-core-48.0.bak/providers/sxe2/verbs.c 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/verbs.c 2026-05-27 17:08:05.581104746 +0800 +@@ -0,0 +1 @@ ++ +diff -Naur rdma-core-48.0.bak/providers/sxe2/verbs.h rdma-core-48.0/providers/sxe2/verbs.h +--- rdma-core-48.0.bak/providers/sxe2/verbs.h 1970-01-01 08:00:00.000000000 +0800 ++++ rdma-core-48.0/providers/sxe2/verbs.h 2026-05-27 17:08:05.579104733 +0800 +@@ -0,0 +1,6 @@ ++#ifndef __SXE2_USER_VERBS_H__ ++#define __SXE2_USER_VERBS_H__ ++ ++#include "sxe2_common.h" ++ ++#endif \ 文件尾没有换行符