* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#ifndef HNS_ROCE_LITE_H
#define HNS_ROCE_LITE_H
#include "verbs_exp.h"
#include "rdma_lite.h"
#include "ascend_hal.h"
#include "hns_roce_u_hw_v2.h"
#define BIT_CNT_PER_BYTE 8
#define BIT_CNT_PER_LONG (BIT_CNT_PER_BYTE * sizeof(uint64_t))
#define GENMASK(h, l) (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BIT_CNT_PER_LONG - 1 - (h))))
#define RECORD_DB_CI_MASK GENMASK(23, 0)
#define HNS_ROCE_LITE_CQ_FLAG_RECORD_DB (1 << 0)
#define HNS_ROCE_LITE_CQE_SUCCESS 0
#define HNS_ROCE_LITE_CQE_IS_SQ 0
#define HNS_ROCE_LITE_INVALID_SGE_KEY 0x100
enum {
HNS_ROCE_LITE_CQE_QPN_MASK = 0x3ffff,
HNS_ROCE_LITE_CQE_STATUS_MASK = 0xff,
HNS_ROCE_LITE_CQE_OPCODE_MASK = 0x1f
};
#define CQE_BYTE_4_OPCODE_S 0
#define CQE_BYTE_4_OPCODE_M (((1UL << 5) - 1) << CQE_BYTE_4_OPCODE_S)
#define CQE_BYTE_4_RQ_INLINE_S 5
#define CQE_BYTE_4_S_R_S 6
#define CQE_BYTE_4_OWNER_S 7
#define CQE_BYTE_4_STATUS_S 8
#define CQE_BYTE_4_STATUS_M (((1UL << 8) - 1) << CQE_BYTE_4_STATUS_S)
#define CQE_BYTE_4_WQE_IDX_S 16
#define CQE_BYTE_4_WQE_IDX_M (((1UL << 16) - 1) << CQE_BYTE_4_WQE_IDX_S)
#define CQE_BYTE_12_XRC_SRQN_S 0
#define CQE_BYTE_12_XRC_SRQN_M (((1UL << 24) - 1) << CQE_BYTE_12_XRC_SRQN_S)
#define CQE_BYTE_16_LCL_QPN_S 0
#define CQE_BYTE_16_LCL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LCL_QPN_S)
#define CQE_BYTE_28_SMAC_S 0
#define CQE_BYTE_28_SMAC_M (((1UL << 16) - 1) << CQE_BYTE_28_SMAC_S)
#define CQE_BYTE_28_PORT_TYPE_S 16
#define CQE_BYTE_28_PORT_TYPE_M (((1UL << 2) - 1) << CQE_BYTE_28_PORT_TYPE_S)
#define CQE_BYTE_32_RMT_QPN_S 0
#define CQE_BYTE_32_RMT_QPN_M (((1UL << 24) - 1) << CQE_BYTE_32_RMT_QPN_S)
#define CQE_BYTE_32_SL_S 24
#define CQE_BYTE_32_SL_M (((1UL << 3) - 1) << CQE_BYTE_32_SL_S)
#define CQE_BYTE_32_PORTN_S 27
#define CQE_BYTE_32_PORTN_M (((1UL << 3) - 1) << CQE_BYTE_32_PORTN_S)
#define CQE_BYTE_32_GLH_S 30
#define CQE_BYTE_32_LPK_S 31
#define BYTE_LEN 8
#define DB_SL_OFFSET 48
#define DB_PI_OFFSET 32
#define DB_QPN_OFFSET 24
#define INLINE_REDUCE_OP_MASK 0x7
#define INLINE_REDUCE_TYPE_MASK 0xf
#define RC_SQ_WQE_BYTE_20_REDUCE_TYPE_S 24
#define RC_SQ_WQE_BYTE_20_REDUCE_TYPE_M \
(((1UL << 4) - 1) << RC_SQ_WQE_BYTE_20_REDUCE_TYPE_S)
#define RC_SQ_WQE_BYTE_20_REDUCE_OP_S 28
#define RC_SQ_WQE_BYTE_20_REDUCE_OP_M \
(((1UL << 3) - 1) << RC_SQ_WQE_BYTE_20_REDUCE_OP_S)
#define RC_SQ_WQE_BYTE_12_NOTIFY_INFO_S 0
#define RC_SQ_WQE_BYTE_12_NOTIFY_INFO_M \
(((1UL << 25) - 1) << RC_SQ_WQE_BYTE_12_NOTIFY_INFO_S)
#define RC_SQ_WQE_BYTE_4_OWNER_S 7
#define RC_SQ_WQE_BYTE_4_FENCE_S 9
#define RC_SQ_WQE_BYTE_4_SE_S 11
#define RC_SQ_WQE_BYTE_4_INLINE_S 12
#define RC_SQ_WQE_BYTE_16_SGE_NUM_S 24
#define RC_SQ_WQE_BYTE_16_SGE_NUM_M \
(((1UL << 8) - 1) << RC_SQ_WQE_BYTE_16_SGE_NUM_S)
#define RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
#define RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_M \
(((1UL << 24) - 1) << RC_SQ_WQE_BYTE_20_MSG_START_SGE_IDX_S)
#define RC_SQ_WQE_BYTE_20_INL_TYPE_S 31
#define RC_SQ_WQE_BYTE_4_OPCODE_S 0
#define RC_SQ_WQE_BYTE_4_OPCODE_M \
(((1UL << 5) - 1) << RC_SQ_WQE_BYTE_4_OPCODE_S)
#define RC_SQ_WQE_BYTE_4_CQE_S 8
#define RC_MAX_SGE_NUME 2
#define BITMAP_WORD_LEN 32
#define BITMAP_WORD_NUM 320
#define BITMAP_WORD_SIZE (BITMAP_WORD_LEN * BITMAP_WORD_NUM)
#define PAGE_ALIGN_4KB (4 * 1024)
enum hns_roce_qp_cap_flags {
HNS_ROCE_QP_CAP_RQ_RECORD_DB = (1 << 0),
HNS_ROCE_QP_CAP_SQ_RECORD_DB = (1 << 1),
HNS_ROCE_QP_CAP_OWNER_DB = (1 << 2),
HNS_ROCE_QP_CAP_DIRECT_WQE = (1 << 5)
};
enum {
HNS_ROCE_LITE_USER_CANCLE_QP = 0x0,
HNS_ROCE_LITE_USER_POLL_CQE = 0x1,
HNS_ROCE_LITE_USER_RESERVER = 0xff
};
enum {
HNS_ROCE_LITE_QP_TABLE_BITS = 8,
HNS_ROCE_LITE_QP_TABLE_SIZE = (1 << HNS_ROCE_LITE_QP_TABLE_BITS)
};
enum {
HNS_ROCE_SQ_OP_NOP = 0x13,
HNS_ROCE_SQ_OP_REDUCE_WRITE = 0x14,
HNS_ROCE_SQ_OP_REDUCE_WRITE_NOTIFY = 0x15,
HNS_ROCE_SQ_OP_WRITE_NOTIFY = 0x16,
HNS_ROCE_SQ_OP_ATOMIC_WRITE = 0x17,
};
enum {
HNS_ROCE_WC_MASK_COMP_SWAP = 8,
HNS_ROCE_WC_MASK_FETCH_ADD,
HNS_ROCE_WC_ATOMIC_WRITE = 0xf0,
HNS_ROCE_WC_WRITE_NOTIFY = 0xf2,
HNS_ROCE_WC_NOP = 0xf3,
HNS_ROCE_WC_PERSIST_WRITE_WITH_IMM,
HNS_ROCE_WC_REDUCE_WRITE,
HNS_ROCE_WC_REDUCE_WRITE_NOTIFY,
};
struct hns_roce_lite_sge_ex {
int offset;
u32 sge_cnt;
int sge_shift;
};
struct hns_roce_lite_sge_info {
u32 valid_num;
u32 start_idx;
u32 total_len;
};
struct hns_roce_lite_wq {
unsigned long *wrid;
unsigned long wrid_len;
pthread_spinlock_t lock;
u32 wqe_cnt;
unsigned int max_post;
u32 head;
u32 tail;
u32 max_gs;
u32 wqe_shift;
u32 shift;
int offset;
};
struct hns_roce_lite_context {
struct rdma_lite_context lite_ctx;
struct {
struct hns_roce_lite_qp **table;
int refcnt;
} qp_table[HNS_ROCE_LITE_QP_TABLE_SIZE];
pthread_mutex_t qp_table_mutex;
int qp_table_shift;
int qp_table_mask;
u32 num_qps;
u32 max_qp_wr;
u32 max_sge;
u32 port_num;
u32 dev_id;
u32 page_size;
struct list_head db_list;
u32 mem_bitmap_list[BITMAP_WORD_NUM];
struct rdma_lite_mem_pool *mem_pool_list[BITMAP_WORD_SIZE];
pthread_mutex_t mutex;
};
struct hns_roce_lite_cq {
struct rdma_lite_cq lite_cq;
u32 depth;
u32 flags;
u32 cqe_size;
u32 cqn;
pthread_spinlock_t lock;
u32 cons_index;
struct rdma_lite_host_buf swdb_buf;
struct rdma_lite_host_buf cq_buf;
struct list_head list_sq;
struct list_head list_rq;
};
struct hns_roce_lite_qp {
struct rdma_lite_qp lite_qp;
struct rdma_lite_host_buf buf;
struct rdma_lite_host_buf sdb_buf;
struct rdma_lite_host_buf rdb_buf;
u32 max_inline_data;
int buf_size;
u32 sq_signal_bits;
struct hns_roce_lite_wq sq;
struct hns_roce_lite_wq rq;
struct hns_roce_lite_sge_ex sge;
u32 next_sge;
int port_num;
int sl;
int gdr_enabled;
u32 flags;
struct list_node rcq_list;
struct list_node scq_list;
};
struct db_dva_node {
u64 db_align_dva;
void *hva;
struct list_node entry;
u32 ref_cnt;
};
enum {
CQ_OK = 0,
CQ_EMPTY = -1,
CQ_POLL_ERR = -2
};
struct hns_roce_lite_cqe {
__le32 byte_4;
union {
__le32 rkey;
__le32 immtdata;
};
__le32 byte_12;
__le32 byte_16;
__le32 byte_cnt;
__le32 smac;
__le32 byte_28;
__le32 byte_32;
};
struct hns_roce_lite_rc_sq_wqe {
__le32 byte_4;
__le32 msg_len;
union {
__le32 inv_key;
__le32 immtdata;
__le32 new_rkey;
};
__le32 byte_16;
__le32 byte_20;
__le32 rkey;
__le64 va;
};
struct hns_roce_lite_wqe_data_seg {
__le32 len;
__le32 lkey;
__le64 addr;
};
struct cqe_wc_status {
u32 cqe_status;
u32 wc_status;
};
enum hns_roce_lite_qp_ai_mode {
HNS_ROCE_QP_AI_MODE_NOR = 0,
HNS_ROCE_QP_AI_MODE_GDR = 1,
HNS_ROCE_QP_AI_MODE_OP = 2,
HNS_ROCE_QP_AI_MODE_GDR_ASYN = 3,
HNS_ROCE_QP_AI_MODE_OP_EXT = 4
};
static inline struct hns_roce_lite_cq *to_hr_lite_cq(struct rdma_lite_cq *lite_cq)
{
return container_of(lite_cq, struct hns_roce_lite_cq, lite_cq);
}
static inline struct hns_roce_lite_qp *to_hr_lite_qp(struct rdma_lite_qp *lite_qp)
{
return container_of(lite_qp, struct hns_roce_lite_qp, lite_qp);
}
static inline struct hns_roce_lite_context *to_hr_lite_ctx(struct rdma_lite_context *lite_ctx)
{
return container_of(lite_ctx, struct hns_roce_lite_context, lite_ctx);
}
#define HR_RDMA_LITE_OPC_MAP(rdma_lite_key, hr_key) \
[RDMA_LITE_WR_ ## rdma_lite_key] = HNS_ROCE_WQE_OP_ ## hr_key
static const u32 hns_roce_lite_op_code[] = {
HR_RDMA_LITE_OPC_MAP(RDMA_WRITE, RDMA_WRITE),
HR_RDMA_LITE_OPC_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE_WITH_IMM),
HR_RDMA_LITE_OPC_MAP(SEND, SEND),
HR_RDMA_LITE_OPC_MAP(SEND_WITH_IMM, SEND_WITH_IMM),
HR_RDMA_LITE_OPC_MAP(RDMA_READ, RDMA_READ),
HR_RDMA_LITE_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOMIC_COM_AND_SWAP),
HR_RDMA_LITE_OPC_MAP(NOP, NOP),
HR_RDMA_LITE_OPC_MAP(ATOMIC_WRITE, ATOMIC_WRITE),
HR_RDMA_LITE_OPC_MAP(REDUCE_WRITE, REDUCE_WRITE),
HR_RDMA_LITE_OPC_MAP(REDUCE_WRITE_NOTIFY, REDUCE_WRITE_NOTIFY),
HR_RDMA_LITE_OPC_MAP(WRITE_WITH_NOTIFY, WRITE_WITH_NOTIFY),
};
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
static inline u32 to_hr_lite_opcode(enum rdma_lite_wr_opcode lite_opcode)
{
if (lite_opcode >= ARRAY_SIZE(hns_roce_lite_op_code)) {
return HNS_ROCE_WQE_OP_MASK;
}
return hns_roce_lite_op_code[lite_opcode];
}
struct rdma_lite_context *hns_roce_lite_alloc_context(u8 phy_id, struct dev_cap_info *cap);
void hns_roce_lite_free_context(struct rdma_lite_context *lite_ctx);
int hns_roce_lite_init_mem_pool(struct rdma_lite_context *lite_ctx, struct rdma_lite_mem_attr *lite_mem_attr);
int hns_roce_lite_deinit_mem_pool(struct rdma_lite_context *lite_ctx, u32 mem_idx);
struct rdma_lite_cq *hns_roce_lite_create_cq(struct rdma_lite_context *lite_ctx,
struct rdma_lite_cq_attr *lite_cq_attr);
int hns_roce_lite_destroy_cq(struct rdma_lite_cq *lite_cq);
struct rdma_lite_qp *hns_roce_lite_create_qp(struct rdma_lite_context *lite_ctx,
struct rdma_lite_qp_attr *lite_qp_attr);
int hns_roce_lite_destroy_qp(struct rdma_lite_qp *lite_qp);
int hns_roce_lite_poll_cq(struct rdma_lite_cq *lite_cq, int num_entries, struct rdma_lite_wc *lite_wc);
int hns_roce_lite_poll_cq_v2(struct rdma_lite_cq *lite_cq, int num_entries, struct rdma_lite_wc_v2 *lite_wc);
int hns_roce_lite_post_send(struct rdma_lite_qp *lite_qp, struct rdma_lite_send_wr *lite_wr,
struct rdma_lite_send_wr **bad_wr, struct rdma_lite_post_send_attr *attr, struct rdma_lite_post_send_resp *resp);
int hns_roce_lite_post_recv(struct rdma_lite_qp *lite_qp, struct rdma_lite_recv_wr *lite_wr,
struct rdma_lite_recv_wr **bad_wr);
int hns_roce_lite_set_qp_sl(struct rdma_lite_qp *lite_qp, int sl);
int hns_roce_lite_clean_qp(struct rdma_lite_qp *lite_qp);
int hns_roce_lite_restore_snapshot(struct rdma_lite_context *lite_ctx);
#endif