* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include "gtest/gtest.h"
#include <mockcpp/mockcpp.hpp>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <securec.h>
#include <ifaddrs.h>
#include <sys/socket.h>
#include <netdb.h>
#include <string>
#include <sys/types.h>
#include <stddef.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <sys/mman.h>
#include "dlra_function.h"
#define private public
#define protected public
#include "externalinput.h"
#include "adapter_rts.h"
#include "network_manager_pub.h"
#include "adapter_rts.h"
#include "typical_qp_manager.h"
#include "externalinput_pub.h"
#include "interface_hccl.h"
#undef private
#include <hccl/hccl_comm.h>
#include <hccl/hccl_inner.h>
#include <hccl/hccl_ex.h>
#include "llt_hccl_stub_pub.h"
#include "llt_hccl_stub_gdr.h"
#include <iostream>
#include <fstream>
using namespace std;
using namespace hccl;
class MultiThreadNpuGpu : public testing::Test {
protected:
static void SetUpTestCase()
{
ClearRdevNicInfo();
std::cout << "MultiThreadNpuGpu SetUP" << std::endl;
}
static void TearDownTestCase()
{
ClearRdevNicInfo();
std::cout << "MultiThreadNpuGpu TearDown" << std::endl;
}
virtual void SetUp()
{
std::cout << "A Test SetUP" << std::endl;
}
virtual void TearDown()
{
GlobalMockObject::verify();
std::cout << "A Test TearDown" << std::endl;
}
};
HcclResult stub_HrtRaGetNotifyBaseAddr_3(RdmaHandle handle, u64 *va, u64 *size)
{
*va = 0x20000000;
*size = 4;
return HCCL_SUCCESS;
}
HcclResult stub_HrtRaGetHccnCfg(s32 networkMode, u32 devicePhyId, enum HccnCfgKeyT key, std::string &value)
{
value = "0_1_128_4";
return HCCL_SUCCESS;
}
struct StubQpInfo {
u32 qpn = 0;
};
thread_local static u32 gQpn = 1;
#define DEV_NUM 1
HcclResult stub_hrtRaTypicalQpCreate(RdmaHandle rdmaHandle, int flag,
int qpMode, struct TypicalQp* qpInfo, QpHandle &qpHandle)
{
StubQpInfo *info = new StubQpInfo();
info->qpn = gQpn++;
HCCL_ERROR("QPN:%u", gQpn);
qpHandle = (void *)info;
qpInfo->qpn = info->qpn;
return HCCL_SUCCESS;
}
HcclResult stub_hrtRaQpCreateWithAttrs(RdmaHandle rdmaHandle, struct QpExtAttrs *attrs, QpHandle &qpHandle)
{
StubQpInfo *info = new StubQpInfo();
info->qpn = gQpn++;
HCCL_ERROR("QPN:%u", gQpn);
qpHandle = (void *)info;
return HCCL_SUCCESS;
}
HcclResult stub_hrtRaGetQpAttr(QpHandle qpHandle, struct QpAttr *attr)
{
attr->qpn = gQpn++;
return HCCL_SUCCESS;
}
HcclResult stub_hrtRaGetInterfaceVersion_support(unsigned int phyId, unsigned int interfaceOpcode, unsigned int* interfaceVersion)
{
*interfaceVersion = 2;
return HCCL_SUCCESS;
}
HcclResult stub_hrtRaQpDestroy_1(QpHandle handle)
{
delete (StubQpInfo *)handle;
handle = nullptr;
return HCCL_SUCCESS;
}
TEST_F(MultiThreadNpuGpu, EndtoEndOneProcess)
{
MOCKER(hrtRaTypicalQpCreate).stubs().will(invoke(stub_hrtRaTypicalQpCreate));
MOCKER(HrtRaQpDestroy).stubs().will(invoke(stub_hrtRaQpDestroy_1));
MOCKER(HrtRaGetNotifyBaseAddr).stubs().will(invoke(stub_HrtRaGetNotifyBaseAddr_3));
MOCKER(GetExternalInputRdmaTrafficClass).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaServerLevel).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaRetryCnt).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaTimeOut).stubs().will(returnValue(1));
MOCKER(hrtMemSyncCopy).stubs().will(returnValue(HCCL_SUCCESS));
MOCKER(hrtNotifyWaitWithTimeOut).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
MOCKER(hrtRDMADBSend).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
HcclResult ret = HCCL_SUCCESS;
EXPECT_EQ(hrtSetDevice(0), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaInit(), HCCL_SUCCESS);
AscendQPInfo localQPInfo;
EXPECT_EQ(hcclCreateAscendQP(&localQPInfo), HCCL_SUCCESS);
AscendQPInfo remoteQpInfo;
AscendQPQos qpQos;
qpQos.sl = 4;
qpQos.tc = 4;
EXPECT_EQ(hcclModifyAscendQPEx(&localQPInfo, &remoteQpInfo, &qpQos), HCCL_SUCCESS);
AscendMrInfo localSyncMemPrepare;
localSyncMemPrepare.addr = 0x2;
localSyncMemPrepare.size = 4;
localSyncMemPrepare.key = 4;
AscendMrInfo localSyncMemDone;
localSyncMemDone.addr = 0x3;
localSyncMemDone.size = 4;
localSyncMemDone.key = 4;
AscendMrInfo localSyncMemAck;
localSyncMemAck.addr = 0x5;
localSyncMemAck.size = 4;
localSyncMemAck.key = 4;
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemPrepare.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemPrepare), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemDone.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemDone), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemAck.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemAck), HCCL_SUCCESS);
AscendMrInfo localMr;
localMr.addr = 0x11111111;
localMr.size = 8;
localMr.key = 4;
EXPECT_EQ(hcclAllocWindowMem((void**)(&localMr.addr), localMr.size), HCCL_SUCCESS);
EXPECT_EQ(hcclRegisterMem(&localMr), HCCL_SUCCESS);
AscendSendRecvLinkInfo linkInfo;
linkInfo.localSyncMemAck = &localSyncMemAck;
linkInfo.localQPinfo = &localQPInfo;
linkInfo.localSyncMemDone = &localSyncMemDone;
linkInfo.localSyncMemPrepare = &localSyncMemPrepare;
linkInfo.remoteSyncMemAck = &localSyncMemAck;
linkInfo.remoteSyncMemDone = &localSyncMemDone;
linkInfo.remoteSyncMemPrepare = &localSyncMemPrepare;
linkInfo.wqePerDoorbell = 2;
aclrtStream stream = (aclrtStream)0x87654321;
EXPECT_EQ(HcclBatchPutMRByAscendQP(1, &localMr, &localMr, &linkInfo, stream), HCCL_SUCCESS);
EXPECT_EQ(HcclWaitPutMRByAscendQP(&linkInfo, stream), HCCL_SUCCESS);
EXPECT_EQ(hcclDeRegisterMem(&localMr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeWindowMem((void*)localMr.addr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemPrepare.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemDone.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemAck.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclDestroyAscendQP(&localQPInfo), HCCL_SUCCESS);
EXPECT_EQ(hrtResetDevice(0), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaDeInit(), HCCL_SUCCESS);
GlobalMockObject::verify();
}
void* ThreadHandleTypIcalQP(void* args)
{
s32 devId = *(s32*)args;
HcclResult ret = HCCL_SUCCESS;
EXPECT_EQ(hrtSetDevice(devId), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaInit(), HCCL_SUCCESS);
AscendQPInfo localQPInfo;
localQPInfo.qpn = 1;
EXPECT_EQ(hcclCreateAscendQP(&localQPInfo), HCCL_SUCCESS);
AscendQPQos qpQos;
qpQos.sl = 4;
qpQos.tc = 4;
EXPECT_EQ(hcclModifyAscendQPEx(&localQPInfo, &localQPInfo, &qpQos), HCCL_SUCCESS);
AscendMrInfo localSyncMemPrepare;
localSyncMemPrepare.addr = 0x2;
localSyncMemPrepare.size = 4;
localSyncMemPrepare.key = 4;
AscendMrInfo localSyncMemDone;
localSyncMemDone.addr = 0x3;
localSyncMemDone.size = 4;
localSyncMemDone.key = 4;
AscendMrInfo localSyncMemAck;
localSyncMemAck.addr = 0x5;
localSyncMemAck.size = 4;
localSyncMemAck.key = 4;
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemPrepare.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemPrepare), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemDone.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemDone), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemAck.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemAck), HCCL_SUCCESS);
AscendMrInfo localMr;
localMr.addr = 0x11111111;
localMr.size = 8;
localMr.key = 4;
EXPECT_EQ(hcclAllocWindowMem((void**)(&localMr.addr), localMr.size), HCCL_SUCCESS);
EXPECT_EQ(hcclRegisterMem(&localMr), HCCL_SUCCESS);
AscendSendRecvLinkInfo linkInfo;
linkInfo.localSyncMemAck = &localSyncMemAck;
linkInfo.localQPinfo = &localQPInfo;
linkInfo.localSyncMemDone = &localSyncMemDone;
linkInfo.localSyncMemPrepare = &localSyncMemPrepare;
linkInfo.remoteSyncMemAck = &localSyncMemAck;
linkInfo.remoteSyncMemDone = &localSyncMemDone;
linkInfo.remoteSyncMemPrepare = &localSyncMemPrepare;
linkInfo.wqePerDoorbell = 2;
aclrtStream stream = (aclrtStream)0x87654321;
EXPECT_EQ(HcclBatchPutMRByAscendQP(1, &localMr, &localMr, &linkInfo, stream), HCCL_SUCCESS);
EXPECT_EQ(HcclWaitPutMRByAscendQP(&linkInfo, stream), HCCL_SUCCESS);
EXPECT_EQ(hcclDeRegisterMem(&localMr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeWindowMem((void*)localMr.addr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemPrepare.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemDone.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemAck.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclDestroyAscendQP(&localQPInfo), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaDeInit(), HCCL_SUCCESS);
EXPECT_EQ(hrtResetDevice(devId), HCCL_SUCCESS);
return (nullptr);
}
void* ThreadHandleQPWithAttr(void* args)
{
s32 devId = *(s32*)args;
HcclResult ret = HCCL_SUCCESS;
EXPECT_EQ(hrtSetDevice(devId), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaInit(), HCCL_SUCCESS);
AscendQPInfo localQPInfo;
localQPInfo.qpn = 1;
localQPInfo.rq_depth = 128;
localQPInfo.sq_depth = 128;
localQPInfo.scq_depth = 128;
localQPInfo.rcq_depth = 128;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_SUCCESS);
AscendQPQos qpQos;
qpQos.sl = 4;
qpQos.tc = 4;
EXPECT_EQ(hcclModifyAscendQPEx(&localQPInfo, &localQPInfo, &qpQos), HCCL_SUCCESS);
AscendMrInfo localSyncMemPrepare;
localSyncMemPrepare.addr = 0x2;
localSyncMemPrepare.size = 4;
localSyncMemPrepare.key = 4;
AscendMrInfo localSyncMemDone;
localSyncMemDone.addr = 0x3;
localSyncMemDone.size = 4;
localSyncMemDone.key = 4;
AscendMrInfo localSyncMemAck;
localSyncMemAck.addr = 0x5;
localSyncMemAck.size = 4;
localSyncMemAck.key = 4;
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemPrepare.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemPrepare), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemDone.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemDone), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemAck.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemAck), HCCL_SUCCESS);
AscendMrInfo localMr;
localMr.addr = 0x11111111;
localMr.size = 8;
localMr.key = 4;
EXPECT_EQ(hcclAllocWindowMem((void**)(&localMr.addr), localMr.size), HCCL_SUCCESS);
EXPECT_EQ(hcclRegisterMem(&localMr), HCCL_SUCCESS);
AscendSendRecvLinkInfo linkInfo;
linkInfo.localSyncMemAck = &localSyncMemAck;
linkInfo.localQPinfo = &localQPInfo;
linkInfo.localSyncMemDone = &localSyncMemDone;
linkInfo.localSyncMemPrepare = &localSyncMemPrepare;
linkInfo.remoteSyncMemAck = &localSyncMemAck;
linkInfo.remoteSyncMemDone = &localSyncMemDone;
linkInfo.remoteSyncMemPrepare = &localSyncMemPrepare;
linkInfo.wqePerDoorbell = 2;
aclrtStream stream = (aclrtStream)0x87654321;
EXPECT_EQ(HcclBatchPutMRByAscendQP(1, &localMr, &localMr, &linkInfo, stream), HCCL_SUCCESS);
EXPECT_EQ(HcclWaitPutMRByAscendQP(&linkInfo, stream), HCCL_SUCCESS);
EXPECT_EQ(hcclDeRegisterMem(&localMr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeWindowMem((void*)localMr.addr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemPrepare.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemDone.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemAck.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclDestroyAscendQP(&localQPInfo), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaDeInit(), HCCL_SUCCESS);
EXPECT_EQ(hrtResetDevice(devId), HCCL_SUCCESS);
return (nullptr);
}
TEST_F(MultiThreadNpuGpu, EndtoEndMutiThread)
{
MOCKER(hrtRaTypicalQpCreate).stubs().will(invoke(stub_hrtRaTypicalQpCreate));
MOCKER(HrtRaQpDestroy).stubs().will(invoke(stub_hrtRaQpDestroy_1));
MOCKER(HrtRaGetNotifyBaseAddr).stubs().will(invoke(stub_HrtRaGetNotifyBaseAddr_3));
MOCKER(GetExternalInputRdmaTrafficClass).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaServerLevel).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaRetryCnt).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaTimeOut).stubs().will(returnValue(1));
MOCKER(hrtMemSyncCopy).stubs().will(returnValue(HCCL_SUCCESS));
MOCKER(hrtNotifyWaitWithTimeOut).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
MOCKER(hrtRDMADBSend).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
sal_thread_t tid[DEV_NUM];
for (int devId = 0; devId < DEV_NUM; devId++) {
tid[devId] = sal_thread_create("thread", ThreadHandleTypIcalQP, (void*)&devId);
EXPECT_NE(tid[devId], (sal_thread_t )nullptr);
}
for (s32 devId = 0; devId < DEV_NUM; ++devId)
{
while (sal_thread_is_running(tid[devId]))
{
SaluSleep(SAL_MILLISECOND_USEC * 10);
}
}
SaluSleep(SAL_MILLISECOND_USEC * 2);
for (int devId = 0; devId < DEV_NUM; devId++) {
(void)sal_thread_destroy(tid[devId]);
}
GlobalMockObject::verify();
}
TEST_F(MultiThreadNpuGpu, EndtoEndMutiThreadSwitchDevice)
{
MOCKER(hrtRaTypicalQpCreate).stubs().will(invoke(stub_hrtRaTypicalQpCreate));
MOCKER(HrtRaQpDestroy).stubs().will(invoke(stub_hrtRaQpDestroy_1));
MOCKER(HrtRaGetNotifyBaseAddr).stubs().will(invoke(stub_HrtRaGetNotifyBaseAddr_3));
MOCKER(GetExternalInputRdmaTrafficClass).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaServerLevel).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaRetryCnt).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaTimeOut).stubs().will(returnValue(1));
MOCKER(hrtMemSyncCopy).stubs().will(returnValue(HCCL_SUCCESS));
MOCKER(hrtNotifyWaitWithTimeOut).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
MOCKER(hrtRDMADBSend).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
for (int devId = 0; devId < 8; devId++) {
ThreadHandleTypIcalQP((void*)&devId);
}
GlobalMockObject::verify();
}
TEST_F(MultiThreadNpuGpu, EndtoEndOneProcessWithAttr)
{
MOCKER(hrtRaGetQpAttr).stubs().will(invoke(stub_hrtRaGetQpAttr));
MOCKER(hrtRaGetInterfaceVersion).stubs().will(invoke(stub_hrtRaGetInterfaceVersion_support));
MOCKER(hrtRaQpCreateWithAttrs).stubs().will(invoke(stub_hrtRaQpCreateWithAttrs));
MOCKER(HrtRaQpDestroy).stubs().will(invoke(stub_hrtRaQpDestroy_1));
MOCKER(HrtRaGetNotifyBaseAddr).stubs().will(invoke(stub_HrtRaGetNotifyBaseAddr_3));
MOCKER(GetExternalInputRdmaTrafficClass).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaServerLevel).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaRetryCnt).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaTimeOut).stubs().will(returnValue(1));
MOCKER(hrtMemSyncCopy).stubs().will(returnValue(HCCL_SUCCESS));
MOCKER(hrtNotifyWaitWithTimeOut).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
MOCKER(hrtRDMADBSend).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
for (int devId = 0; devId < 8; devId++) {
ThreadHandleQPWithAttr((void*)&devId);
}
GlobalMockObject::verify();
}
TEST_F(MultiThreadNpuGpu, EndtoEndOneProcessWithAttrMultiThread)
{
MOCKER(hrtRaGetQpAttr).stubs().will(invoke(stub_hrtRaGetQpAttr));
MOCKER(hrtRaGetInterfaceVersion).stubs().will(invoke(stub_hrtRaGetInterfaceVersion_support));
MOCKER(hrtRaQpCreateWithAttrs).stubs().will(invoke(stub_hrtRaQpCreateWithAttrs));
MOCKER(HrtRaQpDestroy).stubs().will(invoke(stub_hrtRaQpDestroy_1));
MOCKER(HrtRaGetNotifyBaseAddr).stubs().will(invoke(stub_HrtRaGetNotifyBaseAddr_3));
MOCKER(GetExternalInputRdmaTrafficClass).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaServerLevel).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaRetryCnt).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaTimeOut).stubs().will(returnValue(1));
MOCKER(hrtMemSyncCopy).stubs().will(returnValue(HCCL_SUCCESS));
MOCKER(hrtNotifyWaitWithTimeOut).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
MOCKER(hrtRDMADBSend).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
sal_thread_t tid[DEV_NUM];
for (int devId = 0; devId < DEV_NUM; devId++) {
tid[devId] = sal_thread_create("thread", ThreadHandleQPWithAttr, (void*)&devId);
EXPECT_NE(tid[devId], (sal_thread_t )nullptr);
}
for (s32 devId = 0; devId < DEV_NUM; ++devId)
{
while (sal_thread_is_running(tid[devId]))
{
SaluSleep(SAL_MILLISECOND_USEC * 10);
}
}
SaluSleep(SAL_MILLISECOND_USEC * 2);
for (int devId = 0; devId < DEV_NUM; devId++) {
(void)sal_thread_destroy(tid[devId]);
}
GlobalMockObject::verify();
}
TEST_F(MultiThreadNpuGpu, CreateQPWithAttrNotSupport)
{
QpConfigInfo qpConfig;
struct TypicalQp qpInfoTmp;
QpHandle qpHandle;
RdmaHandle rdmaHandle;
qpConfig.rq_depth = 128;
qpConfig.sq_depth = 128;
qpConfig.scq_depth = 128;
qpConfig.rcq_depth = 128;
EXPECT_EQ(CreateQpWithDepthConfig(rdmaHandle, OPBASE_QP_MODE, qpConfig, qpHandle, qpInfoTmp), HCCL_E_NOT_SUPPORT);
MOCKER(HrtRaGetNotifyBaseAddr).stubs().will(invoke(stub_HrtRaGetNotifyBaseAddr_3));
EXPECT_EQ(hrtSetDevice(0), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaInit(), HCCL_SUCCESS);
AscendQPInfo localQPInfo;
localQPInfo.qpn = 1;
localQPInfo.rq_depth = 128;
localQPInfo.sq_depth = 128;
localQPInfo.scq_depth = 128;
localQPInfo.rcq_depth = 128;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_INTERNAL);
EXPECT_EQ(hcclAscendRdmaDeInit(), HCCL_SUCCESS);
EXPECT_EQ(hrtResetDevice(0), HCCL_SUCCESS);
GlobalMockObject::verify();
}
TEST_F(MultiThreadNpuGpu, CreateQPWithAttrConfigError)
{
MOCKER(HrtRaGetNotifyBaseAddr).stubs().will(invoke(stub_HrtRaGetNotifyBaseAddr_3));
EXPECT_EQ(hrtSetDevice(0), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaInit(), HCCL_SUCCESS);
AscendQPInfo localQPInfo;
localQPInfo.qpn = 1;
localQPInfo.rq_depth = 128;
localQPInfo.scq_depth = 128;
localQPInfo.rcq_depth = 128;
localQPInfo.sq_depth = 32789;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.sq_depth = 2;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.sq_depth = 253;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.sq_depth = 128;
localQPInfo.rq_depth = 32789;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.rq_depth = 2;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.rq_depth = 253;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.rq_depth = 128;
localQPInfo.scq_depth = 32789;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.scq_depth = 2;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.scq_depth = 253;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.scq_depth = 128;
localQPInfo.rcq_depth = 32789;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.rcq_depth = 2;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.rcq_depth = 253;
EXPECT_EQ(hcclCreateAscendQPWithAttr(&localQPInfo), HCCL_E_PARA);
localQPInfo.rcq_depth = 128;
EXPECT_EQ(hcclAscendRdmaDeInit(), HCCL_SUCCESS);
EXPECT_EQ(hrtResetDevice(0), HCCL_SUCCESS);
GlobalMockObject::verify();
}
TEST_F(MultiThreadNpuGpu, OneSideEndtoEndOneProcess)
{
MOCKER(hrtRaTypicalQpCreate).stubs().will(invoke(stub_hrtRaTypicalQpCreate));
MOCKER(HrtRaQpDestroy).stubs().will(invoke(stub_hrtRaQpDestroy_1));
MOCKER(HrtRaGetNotifyBaseAddr).stubs().will(invoke(stub_HrtRaGetNotifyBaseAddr_3));
MOCKER(HrtRaGetHccnCfg).stubs().will(invoke(stub_HrtRaGetHccnCfg));
MOCKER(GetExternalInputRdmaTrafficClass).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaServerLevel).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaRetryCnt).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaTimeOut).stubs().will(returnValue(1));
MOCKER(hrtMemSyncCopy).stubs().will(returnValue(HCCL_SUCCESS));
MOCKER(hrtNotifyWaitWithTimeOut).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
MOCKER(hrtRDMADBSend).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
HcclResult ret = HCCL_SUCCESS;
EXPECT_EQ(hrtSetDevice(0), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaInit(), HCCL_SUCCESS);
AscendQPInfo localQPInfo;
EXPECT_EQ(hcclCreateAscendQP(&localQPInfo), HCCL_SUCCESS);
AscendQPInfo remoteQpInfo;
AscendQPQos qpQos;
qpQos.sl = 4;
qpQos.tc = 4;
EXPECT_EQ(hcclModifyAscendQPEx(&localQPInfo, &remoteQpInfo, &qpQos), HCCL_SUCCESS);
AscendMrInfo localSyncMemPrepare;
localSyncMemPrepare.addr = 0x2;
localSyncMemPrepare.size = 4;
localSyncMemPrepare.key = 4;
AscendMrInfo localSyncMemDone;
localSyncMemDone.addr = 0x3;
localSyncMemDone.size = 4;
localSyncMemDone.key = 4;
AscendMrInfo localSyncMemAck;
localSyncMemAck.addr = 0x5;
localSyncMemAck.size = 4;
localSyncMemAck.key = 4;
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemPrepare.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemPrepare), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemDone.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemDone), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemAck.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemAck), HCCL_SUCCESS);
AscendMrInfo localMr;
localMr.addr = 0x11111111;
localMr.size = 8;
localMr.key = 4;
EXPECT_EQ(hcclAllocWindowMem((void**)(&localMr.addr), localMr.size), HCCL_SUCCESS);
EXPECT_EQ(hcclRegisterMem(&localMr), HCCL_SUCCESS);
AscendSendLinkInfo linkInfo;
linkInfo.localSyncMemAck = &localSyncMemAck;
linkInfo.localQPinfo = &localQPInfo;
linkInfo.remoteNotifyValueMem = &localSyncMemAck;
linkInfo.wqePerDoorbell = 2;
aclrtStream stream = (aclrtStream)0x87654321;
EXPECT_EQ(HcclOneSideBatchPutByAscendQP(1, &localMr, &localMr, &linkInfo, stream), HCCL_SUCCESS);
EXPECT_EQ(hcclDeRegisterMem(&localMr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeWindowMem((void*)localMr.addr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemPrepare.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemDone.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemAck.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclDestroyAscendQP(&localQPInfo), HCCL_SUCCESS);
EXPECT_EQ(hrtResetDevice(0), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaDeInit(), HCCL_SUCCESS);
GlobalMockObject::verify();
}
TEST_F(MultiThreadNpuGpu, OneSideEndtoEndOneProcessVerifyFailed)
{
MOCKER(hrtRaTypicalQpCreate).stubs().will(invoke(stub_hrtRaTypicalQpCreate));
MOCKER(HrtRaQpDestroy).stubs().will(invoke(stub_hrtRaQpDestroy_1));
MOCKER(HrtRaGetNotifyBaseAddr).stubs().will(invoke(stub_HrtRaGetNotifyBaseAddr_3));
MOCKER(GetExternalInputRdmaTrafficClass).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaServerLevel).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaRetryCnt).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaTimeOut).stubs().will(returnValue(1));
MOCKER(hrtMemSyncCopy).stubs().will(returnValue(HCCL_SUCCESS));
MOCKER(hrtNotifyWaitWithTimeOut).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
MOCKER(hrtRDMADBSend).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
HcclResult ret = HCCL_SUCCESS;
EXPECT_EQ(hrtSetDevice(0), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaInit(), HCCL_SUCCESS);
AscendQPInfo localQPInfo;
EXPECT_EQ(hcclCreateAscendQP(&localQPInfo), HCCL_SUCCESS);
AscendQPInfo remoteQpInfo;
AscendQPQos qpQos;
qpQos.sl = 4;
qpQos.tc = 4;
EXPECT_EQ(hcclModifyAscendQPEx(&localQPInfo, &remoteQpInfo, &qpQos), HCCL_SUCCESS);
AscendMrInfo localSyncMemPrepare;
localSyncMemPrepare.addr = 0x2;
localSyncMemPrepare.size = 4;
localSyncMemPrepare.key = 4;
AscendMrInfo localSyncMemDone;
localSyncMemDone.addr = 0x3;
localSyncMemDone.size = 4;
localSyncMemDone.key = 4;
AscendMrInfo localSyncMemAck;
localSyncMemAck.addr = 0x5;
localSyncMemAck.size = 4;
localSyncMemAck.key = 4;
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemPrepare.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemPrepare), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemDone.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemDone), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemAck.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemAck), HCCL_SUCCESS);
AscendMrInfo localMr;
localMr.addr = 0x11111111;
localMr.size = 8;
localMr.key = 4;
EXPECT_EQ(hcclAllocWindowMem((void**)(&localMr.addr), localMr.size), HCCL_SUCCESS);
EXPECT_EQ(hcclRegisterMem(&localMr), HCCL_SUCCESS);
AscendMrInfo remoteNotifyValue;
remoteNotifyValue.addr = 0x5;
remoteNotifyValue.size = 9;
remoteNotifyValue.key = 4;
AscendSendLinkInfo linkInfo;
linkInfo.localSyncMemAck = &localSyncMemAck;
linkInfo.localQPinfo = &localQPInfo;
linkInfo.remoteNotifyValueMem = &remoteNotifyValue;
linkInfo.wqePerDoorbell = 2;
aclrtStream stream = (aclrtStream)0x87654321;
EXPECT_EQ(HcclOneSideBatchPutByAscendQP(1, &localMr, &localMr, &linkInfo, stream), HCCL_E_PARA);
EXPECT_EQ(hcclDeRegisterMem(&localMr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeWindowMem((void*)localMr.addr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemPrepare.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemDone.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemAck.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclDestroyAscendQP(&localQPInfo), HCCL_SUCCESS);
EXPECT_EQ(hrtResetDevice(0), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaDeInit(), HCCL_SUCCESS);
GlobalMockObject::verify();
}
void* OneSideThreadHandleTypIcalQP(void* args)
{
s32 devId = *(s32*)args;
HcclResult ret = HCCL_SUCCESS;
EXPECT_EQ(hrtSetDevice(devId), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaInit(), HCCL_SUCCESS);
AscendQPInfo localQPInfo;
EXPECT_EQ(hcclCreateAscendQP(&localQPInfo), HCCL_SUCCESS);
AscendQPInfo remoteQpInfo;
EXPECT_EQ(hcclCreateAscendQP(&remoteQpInfo), HCCL_SUCCESS);
AscendQPQos qpQos;
qpQos.sl = 4;
qpQos.tc = 4;
EXPECT_EQ(hcclModifyAscendQPEx(&localQPInfo, &remoteQpInfo, &qpQos), HCCL_SUCCESS);
AscendMrInfo localSyncMemPrepare;
localSyncMemPrepare.addr = 0x2;
localSyncMemPrepare.size = 4;
localSyncMemPrepare.key = 4;
AscendMrInfo localSyncMemDone;
localSyncMemDone.addr = 0x3;
localSyncMemDone.size = 4;
localSyncMemDone.key = 4;
AscendMrInfo localSyncMemAck;
localSyncMemAck.addr = 0x5;
localSyncMemAck.size = 4;
localSyncMemAck.key = 4;
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemPrepare.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemPrepare), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemDone.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemDone), HCCL_SUCCESS);
EXPECT_EQ(hcclAllocSyncMem(reinterpret_cast<int32_t**>(&(localSyncMemAck.addr))), HCCL_SUCCESS);
EXPECT_EQ(hcclGetSyncMemRegKey(&localSyncMemAck), HCCL_SUCCESS);
AscendMrInfo localMr;
localMr.addr = 0x11111111;
localMr.size = 8;
localMr.key = 4;
EXPECT_EQ(hcclAllocWindowMem((void**)(&localMr.addr), localMr.size), HCCL_SUCCESS);
EXPECT_EQ(hcclRegisterMem(&localMr), HCCL_SUCCESS);
AscendSendLinkInfo linkInfo;
linkInfo.localSyncMemAck = &localSyncMemAck;
linkInfo.localQPinfo = &localQPInfo;
linkInfo.remoteNotifyValueMem = &localSyncMemAck;
linkInfo.wqePerDoorbell = 2;
aclrtStream stream = (aclrtStream)0x87654321;
EXPECT_EQ(HcclOneSideBatchPutByAscendQP(1, &localMr, &localMr, &linkInfo, stream), HCCL_SUCCESS);
EXPECT_EQ(hcclDeRegisterMem(&localMr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeWindowMem((void*)localMr.addr), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemPrepare.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemDone.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclFreeSyncMem(reinterpret_cast<int32_t*>(localSyncMemAck.addr)), HCCL_SUCCESS);
EXPECT_EQ(hcclDestroyAscendQP(&localQPInfo), HCCL_SUCCESS);
EXPECT_EQ(hrtResetDevice(devId), HCCL_SUCCESS);
EXPECT_EQ(hcclAscendRdmaDeInit(), HCCL_SUCCESS);
return (nullptr);
}
TEST_F(MultiThreadNpuGpu, OneSideEndtoEndMutiThreadSwitchDevice)
{
MOCKER(hrtRaTypicalQpCreate).stubs().will(invoke(stub_hrtRaTypicalQpCreate));
MOCKER(HrtRaQpDestroy).stubs().will(invoke(stub_hrtRaQpDestroy_1));
MOCKER(HrtRaGetNotifyBaseAddr).stubs().will(invoke(stub_HrtRaGetNotifyBaseAddr_3));
MOCKER(GetExternalInputRdmaTrafficClass).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaServerLevel).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaRetryCnt).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaTimeOut).stubs().will(returnValue(1));
MOCKER(hrtMemSyncCopy).stubs().will(returnValue(HCCL_SUCCESS));
MOCKER(hrtNotifyWaitWithTimeOut).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
MOCKER(hrtRDMADBSend).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
for (int devId = 0; devId < 8; devId++) {
OneSideThreadHandleTypIcalQP((void*)&devId);
}
GlobalMockObject::verify();
}
TEST_F(MultiThreadNpuGpu, OneSideEndtoEndMutiThread)
{
MOCKER(hrtRaTypicalQpCreate).stubs().will(invoke(stub_hrtRaTypicalQpCreate));
MOCKER(HrtRaQpDestroy).stubs().will(invoke(stub_hrtRaQpDestroy_1));
MOCKER(HrtRaGetNotifyBaseAddr).stubs().will(invoke(stub_HrtRaGetNotifyBaseAddr_3));
MOCKER(GetExternalInputRdmaTrafficClass).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaServerLevel).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaRetryCnt).stubs().will(returnValue(1));
MOCKER(GetExternalInputRdmaTimeOut).stubs().will(returnValue(1));
MOCKER(hrtMemSyncCopy).stubs().will(returnValue(HCCL_SUCCESS));
MOCKER(hrtNotifyWaitWithTimeOut).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
MOCKER(hrtRDMADBSend).stubs().with(mockcpp::any()).will(returnValue(HCCL_SUCCESS));
sal_thread_t tid[DEV_NUM];
for (int devId = 0; devId < DEV_NUM; devId++) {
tid[devId] = sal_thread_create("thread", OneSideThreadHandleTypIcalQP, (void*)&devId);
EXPECT_NE(tid[devId], (sal_thread_t )nullptr);
}
for (s32 devId = 0; devId < DEV_NUM; ++devId)
{
while (sal_thread_is_running(tid[devId]))
{
SaluSleep(SAL_MILLISECOND_USEC * 10);
}
}
SaluSleep(SAL_MILLISECOND_USEC * 2);
for (int devId = 0; devId < DEV_NUM; devId++) {
(void)sal_thread_destroy(tid[devId]);
}
GlobalMockObject::verify();
}