/**
 * Copyright (c) 2025 Huawei Technologies Co., Ltd.
 * This program is free software, you can redistribute it and/or modify it under the terms and conditions of
 * CANN Open Software License Agreement Version 2.0 (the "License").
 * Please refer to the License for details. You may not use this file except in compliance with the License.
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
 * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
 * See LICENSE in the root of the software repository for the full text of the License.
 */

#include <map>
#include <mutex>
#include <algorithm>
#include <functional>
#include "acl_rt_impl.h"
#include "runtime/mem.h"
#include "runtime/rts/rts_mem.h"
#include "runtime/dev.h"
#include "runtime/rts/rts_device.h"
#include "runtime/rt_stars.h"
#include "runtime/rt_mem_queue.h"
#include "runtime/rt_inner_mem.h"
#include "runtime/inner_kernel.h"
#include "utils/math_utils.h"
#include "common/log_inner.h"
#include "common/error_codes_inner.h"
#include "common/prof_reporter.h"
#include "common/resource_statistics.h"
#include "utils/data_type_utils.h"

namespace {
constexpr uint32_t MEM_SIZE_MAX = 96U;
constexpr uint32_t MAX_PADDING_SIZE_STR_LEN = 32U;
constexpr int32_t STRTOUL_DECIMAL_BASE = 10;
constexpr size_t DATA_MEMORY_ALIGN_SIZE = 32UL;
constexpr size_t DATA_MEMORY_PADDING_SIZE = 32UL;
constexpr unsigned int FLAG_START_DYNAMIC_ALLOC_MEM = 0x200U;
constexpr uint32_t DRV_MEM_HOST_NUMA_SIDE = 2U;
constexpr size_t ALIGNMENT_4BYTE = 4;
constexpr size_t ALIGNMENT_4BYTE_MASK = ALIGNMENT_4BYTE - 1;  // 0x3

static const std::map<aclDataType, rtDataType> kMapDataType = {
    { ACL_FLOAT, RT_DATA_TYPE_FP32 },
    { ACL_FLOAT16, RT_DATA_TYPE_FP16 },
    { ACL_INT16, RT_DATA_TYPE_INT16 },
    { ACL_INT4, RT_DATA_TYPE_INT4 },
    { ACL_INT8, RT_DATA_TYPE_INT8 },
    { ACL_INT32, RT_DATA_TYPE_INT32 },
    { ACL_BF16, RT_DATA_TYPE_BFP16 },
    { ACL_UINT8, RT_DATA_TYPE_UINT8 },
    { ACL_UINT16, RT_DATA_TYPE_UINT16 },
    { ACL_UINT32, RT_DATA_TYPE_UINT32 },
};

using Handler = std::function<void(rtDrvMemProp_t&, bool, bool)>;
static const std::map<int32_t, Handler> memAttrHandlers = {
    // HBM (Direct Assign)
    {ACL_HBM_MEM_HUGE,    [](rtDrvMemProp_t& p, bool, bool) { p.pg_type = HUGE_PAGE_TYPE; p.mem_type = HBM_TYPE; }},
    {ACL_HBM_MEM_NORMAL,  [](rtDrvMemProp_t& p, bool, bool) { p.pg_type = NORMAL_PAGE_TYPE; p.mem_type = HBM_TYPE; }},
    {ACL_HBM_MEM_HUGE1G,  [](rtDrvMemProp_t& p, bool, bool) { p.pg_type = HUGE1G_PAGE_TYPE; p.mem_type = HBM_TYPE; }},

    // DDR (Host Only)
    {ACL_DDR_MEM_HUGE,        [](rtDrvMemProp_t& p, bool isHost, bool) { if(isHost) { p.pg_type = HUGE_PAGE_TYPE; p.mem_type = DDR_TYPE; } }},
    {ACL_DDR_MEM_NORMAL,      [](rtDrvMemProp_t& p, bool isHost, bool) { if(isHost) { p.pg_type = NORMAL_PAGE_TYPE; p.mem_type = DDR_TYPE; } }},
    {ACL_DDR_MEM_P2P_HUGE,    [](rtDrvMemProp_t& p, bool isHost, bool) { if(isHost) { p.pg_type = HUGE_PAGE_TYPE; p.mem_type = P2P_DDR_TYPE; } }},
    {ACL_DDR_MEM_P2P_NORMAL,  [](rtDrvMemProp_t& p, bool isHost, bool) { if(isHost) { p.pg_type = NORMAL_PAGE_TYPE; p.mem_type = P2P_DDR_TYPE; } }},

    // Generic (Host / Device)
    {ACL_MEM_NORMAL,      [](rtDrvMemProp_t& p, bool isHost, bool isDev) { if(isHost) { p.pg_type = NORMAL_PAGE_TYPE; p.mem_type = DDR_TYPE; } else if(isDev) { p.pg_type = NORMAL_PAGE_TYPE; p.mem_type = HBM_TYPE; } }},
    {ACL_MEM_HUGE,        [](rtDrvMemProp_t& p, bool isHost, bool isDev) { if(isHost) { p.pg_type = HUGE_PAGE_TYPE; p.mem_type = DDR_TYPE; } else if(isDev) { p.pg_type = HUGE_PAGE_TYPE; p.mem_type = HBM_TYPE; } }},
    {ACL_MEM_HUGE1G,      [](rtDrvMemProp_t& p, bool isHost, bool isDev) { if(isHost) { p.pg_type = HUGE1G_PAGE_TYPE; p.mem_type = DDR_TYPE; } else if(isDev) { p.pg_type = HUGE1G_PAGE_TYPE; p.mem_type = HBM_TYPE; } }},

    // P2P (Host / Device)
    {ACL_MEM_P2P_NORMAL,  [](rtDrvMemProp_t& p, bool isHost, bool isDev) { if(isHost) { p.pg_type = NORMAL_PAGE_TYPE; p.mem_type = P2P_DDR_TYPE; } else if(isDev) { p.pg_type = NORMAL_PAGE_TYPE; p.mem_type = P2P_HBM_TYPE; } }},
    {ACL_MEM_P2P_HUGE,    [](rtDrvMemProp_t& p, bool isHost, bool isDev) { if(isHost) { p.pg_type = HUGE_PAGE_TYPE; p.mem_type = P2P_DDR_TYPE; } else if(isDev) { p.pg_type = HUGE_PAGE_TYPE; p.mem_type = P2P_HBM_TYPE; } }},
    {ACL_MEM_P2P_HUGE1G,  [](rtDrvMemProp_t& p, bool isHost, bool isDev) { if(isHost) { p.pg_type = HUGE1G_PAGE_TYPE; p.mem_type = P2P_DDR_TYPE; } else if(isDev) { p.pg_type = HUGE1G_PAGE_TYPE; p.mem_type = P2P_HBM_TYPE; } }}
};

inline aclError MemcpyKindTranslate(const aclrtMemcpyKind kind, rtMemcpyKind_t &rtKind)
{
    switch (kind) {
        case ACL_MEMCPY_HOST_TO_DEVICE: {
            rtKind = RT_MEMCPY_HOST_TO_DEVICE;
            break;
        }
        case ACL_MEMCPY_DEVICE_TO_DEVICE: {
            rtKind = RT_MEMCPY_DEVICE_TO_DEVICE;
            break;
        }
        case ACL_MEMCPY_DEVICE_TO_HOST: {
            rtKind = RT_MEMCPY_DEVICE_TO_HOST;
            break;
        }
        case ACL_MEMCPY_HOST_TO_HOST: {
            rtKind = RT_MEMCPY_HOST_TO_HOST;
            break;
        }
        case ACL_MEMCPY_DEFAULT: {
            rtKind = RT_MEMCPY_DEFAULT;
            break;
        }
        case ACL_MEMCPY_HOST_TO_BUF_TO_DEVICE: {
            rtKind = RT_MEMCPY_HOST_TO_DEVICE_EX;
            break;
        }
        default: {
            ACL_LOG_ERROR("[Check][MemcpyKindTranslate]param kind invalid, which is %d.", static_cast<int32_t>(kind));
            acl::AclErrorLogManager::ReportInputError(acl::INVALID_VALUE_MSG,
                std::vector<const char *>({"func", "value", "param", "expect"}),
                std::vector<const char *>({__func__, acl::GetMemcpyKindDesc(kind), "kind",
                "ACL_MEMCPY_HOST_TO_DEVICE or "
                "ACL_MEMCPY_DEVICE_TO_DEVICE or ACL_MEMCPY_DEVICE_TO_HOST or "
                "ACL_MEMCPY_HOST_TO_HOST or ACL_MEMCPY_DEFAULT or ACL_MEMCPY_HOST_TO_BUF_TO_DEVICE."}));
            return ACL_ERROR_INVALID_PARAM;
        }
    }
    return ACL_SUCCESS;
}

inline bool IsZeroSizeMemcpy(const size_t count)
{
    return count == 0UL;
}

inline bool IsZeroSizeMemcpy2d(const size_t width, const size_t height)
{
    return (width == 0UL) || (height == 0UL);
}

bool IsAllZeroSizeBatch(const size_t * const sizes, const size_t numBatches)
{
    return std::all_of(sizes, sizes + numBatches, [](const size_t size) {
        return size == 0UL;
    });
}

aclError CheckMemcpy2dParam(const void *const dst, const size_t dpitch, const void *const src, const size_t spitch,
    const size_t width, const size_t height, const aclrtMemcpyKind kind, rtMemcpyKind_t &rtKind)
{
    ACL_LOG_DEBUG("start to execute CheckMemcpy2dParam");
    switch (kind) {
        case ACL_MEMCPY_HOST_TO_DEVICE: {
            rtKind = RT_MEMCPY_HOST_TO_DEVICE;
            break;
        }
        case ACL_MEMCPY_DEVICE_TO_HOST: {
            rtKind = RT_MEMCPY_DEVICE_TO_HOST;
            break;
        }
        case ACL_MEMCPY_DEVICE_TO_DEVICE: {
            rtKind = RT_MEMCPY_DEVICE_TO_DEVICE;
            break;
        }
        case ACL_MEMCPY_DEFAULT: {
            rtKind = RT_MEMCPY_DEFAULT;
            break;
        }
        default: {
            ACL_LOG_ERROR("[Check][Kind]invalid kind of memcpy, kind = %d", static_cast<int32_t>(kind));
            acl::AclErrorLogManager::ReportInputError(acl::INVALID_VALUE_MSG,
                std::vector<const char *>({"func", "value", "param", "expect"}),
                std::vector<const char *>({__func__, acl::GetMemcpyKindDesc(kind),
                    "kind", "ACL_MEMCPY_HOST_TO_DEVICE or ACL_MEMCPY_DEVICE_TO_HOST or ACL_MEMCPY_DEVICE_TO_DEVICE or ACL_MEMCPY_DEFAULT"}));
            return ACL_ERROR_INVALID_PARAM;
        }
    }

    if (IsZeroSizeMemcpy2d(width, height)) {
        return ACL_SUCCESS;
    }

    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dst);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(src);

    if ((width > spitch) || (width > dpitch)) {
        ACL_LOG_ERROR("[Check][Width]input param width[%zu] must be smaller than spitch[%zu] and dpitch[%zu]",
            width, spitch, dpitch);
        const std::string widthVal = std::to_string(width);
        std::string errMsg = acl::AclErrorLogManager::FormatStr("must be less than spitch and dpitch, spitch=%zu, dpitch=%zu",
            spitch, dpitch);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
            std::vector<const char *>({"func", "value", "param", "reason"}),
            std::vector<const char *>({__func__, widthVal.c_str(), "width", errMsg.c_str()}));
        return ACL_ERROR_INVALID_PARAM;
    }
    return ACL_SUCCESS;
}
}

namespace acl {
void GetPaddingSize(size_t *paddingSize)
{
    const char* AI_CORE_SPEC_STR = "AICoreSpec";
    const char* PADDING_SIZE_STR = "padding_size";
    char paddingSizeStr[MAX_PADDING_SIZE_STR_LEN] = {0};
    const rtError_t error = rtGetSocSpec(AI_CORE_SPEC_STR, PADDING_SIZE_STR, paddingSizeStr, sizeof(paddingSizeStr));
    if (error != RT_ERROR_NONE) {
        ACL_LOG_EVENT("rtGetSocSpec did not complete successfully, ret=%d.", error);
        return;
    }
    char *endPtr = NULL;
    errno = 0;
    *paddingSize = static_cast<size_t>(strtoul(paddingSizeStr, &endPtr, STRTOUL_DECIMAL_BASE));
    if (errno == ERANGE || endPtr == paddingSizeStr || *endPtr != '\0') {
        *paddingSize = DATA_MEMORY_PADDING_SIZE;
        ACL_LOG_EVENT("paddingSizeStr could not be converted, paddingSizeStr[%s] is invalid.", paddingSizeStr);
    }
}

aclError GetAlignedAndPaddingSize(const size_t size, const bool isPadding, size_t &alignedSize)
{
    static std::once_flag hasReadPaddingSize;
    static size_t paddingSize = DATA_MEMORY_PADDING_SIZE;
    std::call_once(hasReadPaddingSize, [&]() {
        GetPaddingSize(&paddingSize);
    });
    // align size to multiple of 32 and paddingSize if needed
    const size_t appendSize = isPadding ? DATA_MEMORY_ALIGN_SIZE + paddingSize : DATA_MEMORY_ALIGN_SIZE;
    
    // check overflow before alignment calculation
    if ((size + appendSize) < size) {
        ACL_LOG_INNER_ERROR("[Check][Size]size too large: %zu", size);
        return ACL_ERROR_INVALID_PARAM;
    }
    
    alignedSize = (size + appendSize - 1UL) / DATA_MEMORY_ALIGN_SIZE * DATA_MEMORY_ALIGN_SIZE;
    return ACL_SUCCESS;
}

aclError aclMallocMemInner(void **devPtr, const size_t size, bool isPadding,
                           const aclrtMemMallocPolicy policy, const uint16_t moduleId)
{
    ACL_ADD_APPLY_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    ACL_LOG_DEBUG("start to execute aclMallocMemInner, size = %zu", size);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);
    ACL_REQUIRES_POSITIVE_REPORT(size);
    size_t alignedSize = size;
    const bool huge1g = (policy == ACL_MEM_MALLOC_HUGE1G_ONLY) || (policy == ACL_MEM_MALLOC_HUGE1G_ONLY_P2P);
    isPadding = !huge1g && isPadding;
    ACL_REQUIRES_OK(acl::GetAlignedAndPaddingSize(size, isPadding, alignedSize));
    uint32_t flags = RT_MEMORY_DEFAULT;
    if (policy == ACL_MEM_MALLOC_HUGE_FIRST) {
        flags |= RT_MEMORY_POLICY_HUGE_PAGE_FIRST;
    } else if (policy == ACL_MEM_MALLOC_HUGE_ONLY) {
        flags |= RT_MEMORY_POLICY_HUGE_PAGE_ONLY;
    } else if (policy == ACL_MEM_MALLOC_NORMAL_ONLY) {
        flags |= RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY;
    } else if (policy == ACL_MEM_MALLOC_HUGE_FIRST_P2P) {
        flags |= RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P;
    } else if (policy == ACL_MEM_MALLOC_HUGE_ONLY_P2P) {
        flags |= RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P;
    } else if (policy == ACL_MEM_MALLOC_NORMAL_ONLY_P2P) {
        flags |= RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P;
    } else if (policy == ACL_MEM_MALLOC_HUGE1G_ONLY) {
        flags |= RT_MEMORY_POLICY_HUGE1G_PAGE_ONLY;
    } else if (policy == ACL_MEM_MALLOC_HUGE1G_ONLY_P2P) {
        flags |= RT_MEMORY_POLICY_HUGE1G_PAGE_ONLY_P2P;
    } else {
        flags = RT_MEMORY_DEFAULT;
    }
    const rtError_t rtErr = rtMalloc(devPtr, alignedSize, flags, moduleId);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("alloc device memory failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_APPLY_SUCCESS_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    return ACL_SUCCESS;
}

aclError aclrtMallocInnerWithCfg(void **devPtr, const size_t size, aclrtMemMallocPolicy policy, rtMallocAdvise advise,
    aclrtMallocConfig *cfg)
{
    ACL_ADD_APPLY_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    ACL_LOG_DEBUG("start to execute aclrtMallocInnerWithCfg, size = %zu", size);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);

    // check attrs pointer
    if ((cfg != nullptr) && (cfg->numAttrs != 0) && (cfg->attrs == nullptr)) {
        const std::string numAttrsVal = std::to_string(cfg->numAttrs);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
            std::vector<const char *>({"func", "value", "param", "reason"}),
            std::vector<const char *>({__func__, numAttrsVal.c_str(), "cfg->numAttrs",
                "cfg->attrs must not be null when cfg->numAttrs is not 0"}));
        return ACL_ERROR_INVALID_PARAM;
    }
    // size must be greater than zero
    ACL_REQUIRES_POSITIVE_REPORT(size);

    rtError_t rtErr = rtsMalloc(devPtr, size, static_cast<rtMallocPolicy>(policy), advise,
        reinterpret_cast<rtMallocConfig_t*>(cfg));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("alloc memory failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_APPLY_SUCCESS_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    return ACL_SUCCESS;
}
} // namespace acl

#ifdef __cplusplus
extern "C" {
#endif

aclError aclrtMallocImpl(void **devPtr, size_t size, aclrtMemMallocPolicy policy)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMalloc);
    ACL_LOG_DEBUG("start to execute aclrtMalloc, size = %zu", size);
    return acl::aclMallocMemInner(devPtr, size, true, policy, acl::APP_MODE_ID_U16);
}

aclError aclrtMallocAlign32Impl(void **devPtr, size_t size, aclrtMemMallocPolicy policy)
{
    ACL_LOG_DEBUG("start to execute aclrtMallocAlign32, size = %zu", size);
    return acl::aclMallocMemInner(devPtr, size, false, policy, acl::APP_MODE_ID_U16);
}

aclError aclrtMallocCachedImpl(void **devPtr, size_t size, aclrtMemMallocPolicy policy)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMallocCached);
    ACL_ADD_APPLY_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    ACL_LOG_DEBUG("start to execute aclrtMallocCached, size = %zu", size);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);

    ACL_REQUIRES_POSITIVE_REPORT(size);
    size_t alignedSize = size;
    const bool huge1g = (policy == ACL_MEM_MALLOC_HUGE1G_ONLY) || (policy == ACL_MEM_MALLOC_HUGE1G_ONLY_P2P);
    const bool isPadding = !huge1g;
    ACL_REQUIRES_OK(acl::GetAlignedAndPaddingSize(size, isPadding, alignedSize));
    uint32_t cacheFlags = RT_MEMORY_DEFAULT;
    if (policy == ACL_MEM_MALLOC_HUGE_FIRST) {
        cacheFlags |= RT_MEMORY_POLICY_HUGE_PAGE_FIRST;
    } else if (policy == ACL_MEM_MALLOC_HUGE_ONLY) {
        cacheFlags |= RT_MEMORY_POLICY_HUGE_PAGE_ONLY;
    } else if (policy == ACL_MEM_MALLOC_NORMAL_ONLY) {
        cacheFlags |= RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY;
    } else if (policy == ACL_MEM_MALLOC_HUGE1G_ONLY) {
        cacheFlags |= RT_MEMORY_POLICY_HUGE1G_PAGE_ONLY;
    } else {
        cacheFlags = RT_MEMORY_DEFAULT;
    }
    const rtError_t rtErr = rtMallocCached(devPtr, alignedSize, cacheFlags, acl::APP_MODE_ID_U16);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("alloc device memory with cache failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_APPLY_SUCCESS_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    return ACL_SUCCESS;
}

aclError aclrtMallocWithCfgImpl(void **devPtr, size_t size, aclrtMemMallocPolicy policy, aclrtMallocConfig *cfg)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMallocWithCfg);
    ACL_ADD_APPLY_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    ACL_LOG_DEBUG("start to execute aclrtMallocWithCfg, size = %zu", size);
    return acl::aclrtMallocInnerWithCfg(devPtr, size, policy, RT_MEM_ADVISE_NONE, cfg);
}

aclError aclrtMallocForTaskSchedulerImpl(void **devPtr, size_t size, aclrtMemMallocPolicy policy,
                                         aclrtMallocConfig *cfg)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMallocForTaskScheduler);
    ACL_LOG_DEBUG("start to execute aclrtMallocForTaskScheduler, size = %zu", size);
    return acl::aclrtMallocInnerWithCfg(devPtr, size, policy, RT_MEM_ADVISE_TS, cfg);
}

aclError aclrtMallocHostWithCfgImpl(void **ptr, uint64_t size, aclrtMallocConfig *cfg)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMallocHostWithCfg);
    ACL_ADD_APPLY_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    ACL_LOG_DEBUG("start to execute aclrtMallocHostWithCfg, size = %zu", size);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_POSITIVE_REPORT(size);
    const rtError_t rtErr = rtsMallocHost(ptr, size, reinterpret_cast<rtMallocConfig_t*>(cfg));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("alloc host memory with cfg failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_APPLY_SUCCESS_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    return ACL_SUCCESS;
}

aclError aclrtPointerGetAttributesImpl(const void *ptr, aclrtPtrAttributes *attributes)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtPointerGetAttributes);
    ACL_LOG_DEBUG("start to execute aclrtPointerGetAttributes");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(attributes);
    const rtError_t rtErr = rtsPointerGetAttributes(ptr, reinterpret_cast<rtPtrAttributes_t*>(attributes));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsPointerGetAttributes failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemManagedGetAttrImpl(aclrtMemManagedRangeAttribute attribute, const void *ptr, size_t size, void *data,
                                    size_t dataSize)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemManagedGetAttr);
    ACL_LOG_DEBUG("start to execute aclrtMemManagedGetAttr");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(data);
    ACL_REQUIRES_POSITIVE_REPORT(size);
    ACL_REQUIRES_CALL_RTS_OK(rtMemManagedGetAttr(static_cast<rtMemManagedRangeAttribute>(attribute), ptr, size, data, dataSize), rtMemManagedGetAttr);
    return ACL_SUCCESS;
}

aclError aclrtMemManagedGetAttrsImpl(aclrtMemManagedRangeAttribute *attributes, size_t numAttributes, const void *ptr,
                                    size_t size, void **data, size_t *dataSizes)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemManagedGetAttrs);
    ACL_LOG_DEBUG("start to execute aclrtMemManagedGetAttrs");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(attributes);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(data);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dataSizes);
    ACL_REQUIRES_POSITIVE_REPORT(size);
    ACL_REQUIRES_CALL_RTS_OK(rtMemManagedGetAttrs(reinterpret_cast<rtMemManagedRangeAttribute *>(attributes), 
                                            numAttributes, ptr, size, data, dataSizes), rtMemManagedGetAttrs);
    return ACL_SUCCESS;
}

aclError aclrtHostRegisterImpl(void *ptr, uint64_t size, aclrtHostRegisterType type, void **devPtr)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtHostRegister);
    ACL_LOG_DEBUG("start to execute aclrtHostRegister");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);
    // size must be greater than zero
    ACL_REQUIRES_POSITIVE_REPORT(size);
    const rtError_t rtErr = rtsHostRegister(ptr, size, static_cast<rtHostRegisterType>(type), devPtr);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsHostRegister failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtHostRegisterV2Impl(void *ptr, uint64_t size, uint32_t flag)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtHostRegisterV2);
    ACL_LOG_DEBUG("start to execute aclrtHostRegisterV2");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_POSITIVE_REPORT(size);
    const rtError_t rtErr = rtHostRegisterV2(ptr, size, flag);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtHostRegisterV2 failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtHostGetDevicePointerImpl(void *pHost, void **pDevice, uint32_t flag)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtHostGetDevicePointer);
    ACL_LOG_DEBUG("start to execute aclrtHostGetDevicePointer");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(pHost);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(pDevice);
    ACL_CHECK_RESERVED_PARAM_REPORT_RET(flag, 0, ACL_ERROR_INVALID_PARAM);
    const rtError_t rtErr = rtHostGetDevicePointer(pHost, pDevice, flag);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call aclrtHostGetDevicePointer failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtHostUnregisterImpl(void *ptr)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtHostUnregister);
    ACL_LOG_DEBUG("start to execute aclrtHostUnregister");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    const rtError_t rtErr = rtsHostUnregister(ptr);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsHostUnregister failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtHostMemMapCapabilitiesImpl(uint32_t deviceId, aclrtHacType hacType,
    aclrtHostMemMapCapability *capabilities)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtHostMemMapCapabilities);
    ACL_LOG_DEBUG("start to execute aclrtHostMemMapCapabilities, deviceId = %u", deviceId);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(capabilities);
    const rtError_t rtErr = rtHostMemMapCapabilities(deviceId, static_cast<rtHacType>(hacType),
        reinterpret_cast<rtHostMemMapCapability*>(capabilities));
    if (rtErr != RT_ERROR_NONE) {
        if (rtErr == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) {
            ACL_LOG_WARN("rtHostMemMapCapabilities not support this feature, runtime result = %d", static_cast<int32_t>(rtErr));
        } else {
            ACL_LOG_CALL_ERROR("call rtHostMemMapCapabilities failed, runtime result = %d", static_cast<int32_t>(rtErr));
        }
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemFlushImpl(void *devPtr, size_t size)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemFlush);
    ACL_LOG_DEBUG("start to execute aclrtMemFlush, size = %zu", size);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);

    ACL_REQUIRES_POSITIVE_REPORT(size);
    const rtError_t rtErr = rtFlushCache(devPtr, size);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("flush cache data to ddr failed, runtime result = %d, size = %zu",
            static_cast<int32_t>(rtErr), size);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemInvalidateImpl(void *devPtr, size_t size)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemInvalidate);
    ACL_LOG_INFO("start to execute aclrtMemInvalidate, size = %zu", size);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);

    ACL_REQUIRES_POSITIVE_REPORT(size);
    const rtError_t rtErr = rtInvalidCache(devPtr, size);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("invalidate cache data failed, runtime result = %d, size = %zu",
            static_cast<int32_t>(rtErr), size);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtFreeImpl(void *devPtr)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtFree);
    ACL_ADD_RELEASE_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    ACL_LOG_DEBUG("start to execute aclrtFree");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);

    const rtError_t rtErr = rtFree(devPtr);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("free device memory failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_RELEASE_SUCCESS_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    return ACL_SUCCESS;
}

aclError aclrtMallocHostImpl(void **hostPtr, size_t size)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMallocHost);
    ACL_ADD_APPLY_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE_HOST);
    ACL_LOG_DEBUG("start to execute aclrtMallocHost, size = %zu", size);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(hostPtr);
    // size must be greater than zero
    ACL_REQUIRES_POSITIVE_REPORT(size);
    const rtError_t rtErr = rtMallocHost(hostPtr, size, acl::APP_MODE_ID_U16);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("alloc host memory failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_APPLY_SUCCESS_COUNT(acl::ACL_STATISTICS_MALLOC_FREE_HOST);
    return ACL_SUCCESS;
}

aclError aclrtMemAllocManagedImpl(void **ptr, uint64_t size, uint32_t flag)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemAllocManaged);
    ACL_LOG_DEBUG("start to execute aclrtMemAllocManaged");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_PARAM_EQUAL_REPORT(flag, ACL_RT_MEM_ATTACH_GLOBAL);
    const rtError_t rtErr = rtMemAllocManaged(ptr, size, RT_MEMORY_ATTACH_GLOBAL, acl::APP_MODE_ID_U16);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("alloc uvm memory failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemManagedAdviseImpl(const void *const ptr, uint64_t size, aclrtMemManagedAdviseType advise, 
                                    aclrtMemManagedLocation location)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemManagedAdvise);
    ACL_LOG_DEBUG("start to execute aclrtMemManagedAdvise");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_POSITIVE_REPORT(size);

    rtMemManagedLocation memLocation;
    memLocation.id = location.id;
    memLocation.type = static_cast<rtMemManagedLocationType>(location.type);
    
    ACL_REQUIRES_CALL_RTS_OK(rtMemManagedAdvise(ptr, size, advise, memLocation), rtMemManagedAdvise);
    return ACL_SUCCESS;
}

aclError aclrtFreeHostImpl(void *hostPtr)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtFreeHost);
    ACL_ADD_RELEASE_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE_HOST);
    ACL_LOG_DEBUG("start to execute aclrtFreeHost");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(hostPtr);
    const rtError_t rtErr = rtFreeHost(hostPtr);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("free host memory failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_RELEASE_SUCCESS_COUNT(acl::ACL_STATISTICS_MALLOC_FREE_HOST);
    return ACL_SUCCESS;
}

aclError aclrtFreeWithDevSyncImpl(void *devPtr)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtFreeWithDevSync);
    ACL_ADD_RELEASE_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    ACL_LOG_DEBUG("start to execute aclrtFreeWithDevSync");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);

    const rtError_t rtErr = rtFreeWithDevSync(devPtr);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("free device memory with device synchronize failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_RELEASE_SUCCESS_COUNT(acl::ACL_STATISTICS_MALLOC_FREE);
    return ACL_SUCCESS;
}

aclError aclrtFreeHostWithDevSyncImpl(void *hostPtr)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtFreeHostWithDevSync);
    ACL_ADD_RELEASE_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE_HOST);
    ACL_LOG_DEBUG("start to execute aclrtFreeHostWithDevSync");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(hostPtr);
    const rtError_t rtErr = rtFreeHostWithDevSync(hostPtr);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("free host memory with device synchronize failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_RELEASE_SUCCESS_COUNT(acl::ACL_STATISTICS_MALLOC_FREE_HOST);
    return ACL_SUCCESS;
}

aclError aclrtMemcpyImpl(void *dst,
                         size_t destMax,
                         const void *src,
                         size_t count,
                         aclrtMemcpyKind kind)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpy);
    rtMemcpyKind_t rtKind = RT_MEMCPY_RESERVED;
    const aclError ret = MemcpyKindTranslate(kind, rtKind);
    if (ret != ACL_SUCCESS) {
        ACL_LOG_ERROR("invalid kind of memcpy, kind = %d", static_cast<int32_t>(kind));
        return ret;
    }
    if (IsZeroSizeMemcpy(count)) {
        ACL_LOG_INFO("count is zero, no memory copy will be performed");
        return ACL_SUCCESS;
    }
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dst);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(src);

    const rtError_t rtErr = rtMemcpy(dst, destMax, src, count, rtKind);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("synchronized memcpy failed, kind = %d, runtime result = %d",
            static_cast<int32_t>(kind), static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemsetImpl(void *devPtr, size_t maxCount, int32_t value, size_t count)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemset);
    ACL_LOG_DEBUG("start to execute aclrtMemset, maxSize = %zu, size = %zu, value = %d",
        maxCount, count, value);
    if (IsZeroSizeMemcpy(count)) {
        ACL_LOG_INFO("zero-size memset, no memory set will be performed");
        return ACL_SUCCESS;
    }
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);

    const rtError_t rtErr = rtMemset(devPtr, maxCount, static_cast<uint32_t>(value), count);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("set memory failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemcpyAsyncImpl(void *dst,
                              size_t destMax,
                              const void *src,
                              size_t count,
                              aclrtMemcpyKind kind,
                              aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyAsync);
    rtMemcpyKind_t rtKindVal = RT_MEMCPY_RESERVED;
    const aclError ret = MemcpyKindTranslate(kind, rtKindVal);
    if (ret != ACL_SUCCESS) {
        ACL_LOG_ERROR("invalid kind of memcpy, kind = %d", static_cast<int32_t>(kind));
        return ret;
    }
    if (IsZeroSizeMemcpy(count)) {
        ACL_LOG_INFO("count is zero, no memory copy async will be performed");
        return ACL_SUCCESS;
    }
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dst);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(src);

    ACL_REQUIRES_CALL_RTS_OK(rtMemcpyAsync(dst, destMax, src, count, rtKindVal, static_cast<rtStream_t>(stream)),
                             rtMemcpyAsync);
    return ACL_SUCCESS;
}

aclError aclrtMemcpyAsyncWithConditionImpl(void *dst,
                                           size_t destMax,
                                           const void *src,
                                           size_t count,
                                           aclrtMemcpyKind kind,
                                           aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyAsyncWithCondition);
    ACL_LOG_DEBUG("start to execute aclrtMemcpyAsyncWithCondition, destMaxSize = %zu, srcSize = %zu, kind = %d",
                  destMax, count, static_cast<int32_t>(kind));
    rtMemcpyKind_t rtKindValue = RT_MEMCPY_RESERVED;
    const aclError ret = MemcpyKindTranslate(kind, rtKindValue);
    if (ret != ACL_SUCCESS) {
        ACL_LOG_ERROR("invalid kind of memcpy, kind = %d", static_cast<int32_t>(kind));
        return ret;
    }
    if (IsZeroSizeMemcpy(count)) {
        ACL_LOG_INFO("zero-size memcpy, no memory copy async will be performed");
        return ACL_SUCCESS;
    }
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dst);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(src);

    rtMemcpyAttributeValue_t memcpyAttrValue;
    // bit0 standing for not checking matching between address and kind, bit1 standing for checking page-locked addr
    memcpyAttrValue.checkBitmap = 0x00000002U;
    rtMemcpyAttribute_t memcpyAttr = {
        .id = RT_MEMCPY_ATTRIBUTE_CHECK,
        .value = memcpyAttrValue
    };
    rtMemcpyConfig_t memcpyConfig = {
        .attrs = &memcpyAttr,
        .numAttrs = 1U
    };

    ACL_REQUIRES_CALL_RTS_OK(rtMemcpyAsyncEx(dst, destMax, src, count, rtKindValue, static_cast<rtStream_t>(stream),
                             &memcpyConfig), rtMemcpyAsyncEx);
    return ACL_SUCCESS;
}

aclError aclrtMemsetAsyncImpl(void *devPtr, size_t maxCount, int32_t value, size_t count, aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemsetAsync);
    ACL_LOG_DEBUG("start to execute aclrtMemsetAsync, maxCount = %zu, value = %d, count = %zu",
        maxCount, value, count);
    if (IsZeroSizeMemcpy(count)) {
        ACL_LOG_INFO("zero-size memset, no memory set async will be performed");
        return ACL_SUCCESS;
    }
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);

    ACL_REQUIRES_CALL_RTS_OK(rtMemsetAsync(devPtr, maxCount, static_cast<uint32_t>(value), count, stream),
                             rtMemsetAsync);
    return ACL_SUCCESS;
}

// Determine whether it is ACL-allocated pinned memory (Host pinned memory or Device memory)
static aclError IsAclPinnedMemory(const void* ptr, bool& isAclMem)
{
    if (ptr == nullptr) {
        isAclMem = false;
        return ACL_SUCCESS;
    }
    aclrtPtrAttributes attr;
    aclError ret = aclrtPointerGetAttributesImpl(ptr, &attr);
    if (ret != ACL_SUCCESS) {
        isAclMem = false;
        return ret;
    }
    isAclMem = (attr.location.type == ACL_MEM_LOCATION_TYPE_HOST ||
                attr.location.type == ACL_MEM_LOCATION_TYPE_DEVICE ||
                attr.location.type == ACL_MEM_LOCATION_TYPE_HOST_NUMA);
    return ACL_SUCCESS;
}

aclError aclrtMemsetD32Impl(void* ptr, size_t memSize, uint32_t value, size_t N)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemsetD32);

    ACL_LOG_DEBUG("start to execute aclrtMemsetD32, memSize = %zu, N = %zu, value = 0x%x",
                  memSize, N, value);

    if (IsZeroSizeMemcpy(N)) {
        ACL_LOG_INFO("zero-size memsetD32, no memory set will be performed");
        return ACL_SUCCESS;
    }
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_POSITIVE(N);
    
    // Check byte alignment
    if ((reinterpret_cast<uintptr_t>(ptr) & ALIGNMENT_4BYTE_MASK) != 0) {
        ACL_LOG_ERROR("Pointer ptr=%p is not 4-byte aligned", ptr);
        return ACL_ERROR_INVALID_PARAM;
    }

    const size_t requiredBytes = N * sizeof(uint32_t);
    if (memSize < requiredBytes) {
        ACL_LOG_INNER_ERROR("memory size is not enough, required %zu bytes but only %zu bytes",
                            requiredBytes, memSize);
        const std::vector<const char*> paramNames = {"param", "value", "reason"};
        const std::vector<const char*> paramValues = {"N", std::to_string(N).c_str(),
                                                      "N * 4 must be less than or equal to memSize"};
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_MSG, paramNames, paramValues);
        return ACL_ERROR_INVALID_PARAM;
    }

    bool isAclMem = false;
    aclError ret = IsAclPinnedMemory(ptr, isAclMem);
    if (ret != ACL_SUCCESS) {
        ACL_LOG_INNER_ERROR("Failed to check memory type, ret=%d", ret);
        return ret; 
    }
    if (!isAclMem) {
        ACL_LOG_INNER_ERROR("Only memory allocated by aclrtMalloc or aclrtMallocHost is supported.");
        return ACL_ERROR_INVALID_PARAM;
    }

    const rtError_t rtErr = rtMemsetD32(ptr, static_cast<uint64_t>(memSize), value, N);
    if (rtErr != RT_ERROR_NONE) {
        if (rtErr == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) {
            ACL_LOG_WARN("rtMemsetD32 not support this feature, runtime result = %d", rtErr);
        } else {
            ACL_LOG_CALL_ERROR("call rtMemsetD32 failed, runtime result = %d", rtErr);
            return ACL_GET_ERRCODE_RTS(rtErr);
        }
    }

    return ACL_SUCCESS;
}

aclError aclrtMemsetD32AsyncImpl(void* ptr, size_t memSize, uint32_t value,
                             size_t N, aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemsetD32Async);

    ACL_LOG_DEBUG("start to execute aclrtMemsetD32Async, memSize = %zu, N = %zu, value = 0x%x",
                  memSize, N, value);

    if (IsZeroSizeMemcpy(N)) {
        ACL_LOG_INFO("zero-size memsetD32 async, no memory set will be performed");
        return ACL_SUCCESS;
    }
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_POSITIVE(N);

    // Check byte alignment
    if ((reinterpret_cast<uintptr_t>(ptr) & ALIGNMENT_4BYTE_MASK) != 0) {
        ACL_LOG_ERROR("Pointer ptr=%p is not 4-byte aligned", ptr);
        return ACL_ERROR_INVALID_PARAM;
    }

    const size_t requiredBytes = N * sizeof(uint32_t);
    if (memSize < requiredBytes) {
        ACL_LOG_INNER_ERROR("memory size is not enough, required %zu bytes but only %zu bytes",
                            requiredBytes, memSize);
        const std::vector<const char*> paramNames = {"param", "value", "reason"};
        const std::vector<const char*> paramValues = {"N", std::to_string(N).c_str(),
                                                      "N * 4 must be less than or equal to memSize"};
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_MSG, paramNames, paramValues);
        return ACL_ERROR_INVALID_PARAM;
    }

    bool isAclMem = false;
    aclError ret = IsAclPinnedMemory(ptr, isAclMem);
    if (ret != ACL_SUCCESS) {
        ACL_LOG_INNER_ERROR("Failed to check memory type, ret=%d", ret);
        return ret; 
    }
    if (!isAclMem) {
        ACL_LOG_INNER_ERROR("Only memory allocated by aclrtMalloc or aclrtMallocHost is supported.");
        return ACL_ERROR_INVALID_PARAM;
    }

    const rtError_t rtErr = rtMemsetD32Async(ptr, static_cast<uint64_t>(memSize),
                                             value, N, static_cast<rtStream_t>(stream));
    if (rtErr != RT_ERROR_NONE) {
        if (rtErr == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) {
            ACL_LOG_WARN("rtMemsetD32Async not support this feature, runtime result = %d", rtErr);
        } else {
            ACL_LOG_CALL_ERROR("call rtMemsetD32Async failed, runtime result = %d", rtErr);
            return ACL_GET_ERRCODE_RTS(rtErr);
        }
    }

    return ACL_SUCCESS;
}

aclError aclrtDeviceCanAccessPeerImpl(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtDeviceCanAccessPeer);
    ACL_LOG_INFO("start to execute aclrtDeviceCanAccessPeer");

    if (deviceId == peerDeviceId) {
        ACL_LOG_ERROR("deviceId %d cannot be equal to peerDeviceId %d", deviceId, peerDeviceId);
        const std::string deviceIdVal = std::to_string(deviceId);
        std::string errMsg = acl::AclErrorLogManager::FormatStr("deviceId %d cannot be equal to peerDeviceId %d",
            deviceId, peerDeviceId);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
            std::vector<const char *>({"func", "value", "param", "reason"}),
            std::vector<const char *>({__func__, deviceIdVal.c_str(), "deviceId", errMsg.c_str()}));
        return ACL_ERROR_INVALID_PARAM;
    }

    uint32_t peerPhyId = 0U;
    rtError_t rtErr = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(peerDeviceId), &peerPhyId);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtGetDevicePhyIdByIndex failed, deviceId = %d, peerDeviceId = %d, "
            "runtime result = %d", deviceId, peerDeviceId, static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    rtErr = rtDeviceCanAccessPeer(canAccessPeer, static_cast<uint32_t>(deviceId), peerPhyId);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtDeviceCanAccessPeer failed, deviceId = %d, peerPhyId = %u, "
            "runtime result = %d", deviceId, peerPhyId, static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    return ACL_SUCCESS;
}

aclError aclrtDeviceEnablePeerAccessImpl(int32_t peerDeviceId, uint32_t flags)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtDeviceEnablePeerAccess);
    ACL_LOG_INFO("start to execute aclrtDeviceEnablePeerAccess");
    ACL_CHECK_RESERVED_PARAM_REPORT_RET(flags, 0U, ACL_ERROR_FEATURE_UNSUPPORTED);

    int32_t deviceId = 0;
    rtError_t rtErr = rtGetDevice(&deviceId);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtGetDevice failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    if (deviceId == peerDeviceId) {
        ACL_LOG_ERROR("deviceId %d cannot be equal to peerDeviceId %d", deviceId, peerDeviceId);
        const std::string deviceIdVal = std::to_string(deviceId);
        std::string errMsg = acl::AclErrorLogManager::FormatStr("deviceId %d cannot be equal to peerDeviceId %d",
            deviceId, peerDeviceId);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
            std::vector<const char *>({"func", "value", "param", "reason"}),
            std::vector<const char *>({__func__, deviceIdVal.c_str(), "deviceId", errMsg.c_str()}));
        return ACL_ERROR_INVALID_PARAM;
    }

    uint32_t peerPhyId = 0U;
    rtErr = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(peerDeviceId), &peerPhyId);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtGetDevicePhyIdByIndex failed, peerDeviceId = %d, runtime result = %d",
            peerDeviceId, static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    rtErr = rtEnableP2P(static_cast<uint32_t>(deviceId), peerPhyId, flags);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtEnableP2P failed, deviceId = %d, peerPhyId = %u, runtime result = %d",
            deviceId, peerPhyId, static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    return ACL_SUCCESS;
}

aclError aclrtDeviceDisablePeerAccessImpl(int32_t peerDeviceId)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtDeviceDisablePeerAccess);
    ACL_LOG_INFO("start to execute aclrtDeviceDisablePeerAccess");

    int32_t deviceId = 0;
    rtError_t rtErr = rtGetDevice(&deviceId);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtGetDevice failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    if (deviceId == peerDeviceId) {
        ACL_LOG_ERROR("deviceId %d cannot be equal to peerDeviceId %d", deviceId, peerDeviceId);
        const std::string deviceIdVal = std::to_string(deviceId);
        std::string errMsg = acl::AclErrorLogManager::FormatStr("deviceId %d cannot be equal to peerDeviceId %d",
            deviceId, peerDeviceId);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
            std::vector<const char *>({"func", "value", "param", "reason"}),
            std::vector<const char *>({__func__, deviceIdVal.c_str(), "deviceId", errMsg.c_str()}));
        return ACL_ERROR_INVALID_PARAM;
    }

    uint32_t peerPhyId = 0U;
    rtErr = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(peerDeviceId), &peerPhyId);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtGetDevicePhyIdByIndex failed, peerDeviceId = %u, runtime result = %d",
            peerDeviceId, static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    rtErr = rtDisableP2P(static_cast<uint32_t>(deviceId), peerPhyId);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtDisableP2P failed, deviceId = %d, peerPhyId = %u, runtime result = %d",
            deviceId, peerPhyId, static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    return ACL_SUCCESS;
}

aclError aclrtGetMemInfoImpl(aclrtMemAttr attr, size_t *free, size_t *total)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtGetMemInfo);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(free);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(total);
    ACL_LOG_DEBUG("start to execute aclrtGetMemInfo, memory attribute = %d", static_cast<int32_t>(attr));

    const rtError_t rtErr = rtMemGetInfoEx(static_cast<rtMemInfoType_t>(attr), free, total);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("get memory information failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    ACL_LOG_DEBUG("successfully execute aclrtGetMemInfo, memory attribute = %d, free memory = %zu bytes, "
        "total memory = %zu bytes", static_cast<int32_t>(attr), *free, *total);
    return ACL_SUCCESS;
}

aclError aclrtGetMemUsageInfoImpl(int32_t deviceId, aclrtMemUsageInfo *memUsageInfo, size_t inputNum, size_t *outputNum)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtGetMemUsageInfo);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(memUsageInfo);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(outputNum);
    ACL_LOG_DEBUG("start to execute aclrtGetMemUsageInfo, deviceId = %d, inputNum = %zu", static_cast<int32_t>(deviceId), inputNum);

    const rtError_t rtErr = rtGetMemUsageInfo(static_cast<uint32_t>(deviceId), reinterpret_cast<rtMemUsageInfo_t*>(memUsageInfo), inputNum, outputNum);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("get memory usage information failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    ACL_LOG_DEBUG("successfully execute aclrtGetMemUsageInfo, deviceId = %d, inputNum = %zu", deviceId, inputNum);
    return ACL_SUCCESS;
}

aclError aclrtMemcpy2dImpl(void *dst,
                           size_t dpitch,
                           const void *src,
                           size_t spitch,
                           size_t width,
                           size_t height,
                           aclrtMemcpyKind kind)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpy2d);
    ACL_LOG_DEBUG("start to execute aclrtMemcpy2d, dpitch = %zu, spitch = %zu, width = %zu, height = %zu, kind = %d",
        dpitch, spitch, width, height, static_cast<int32_t>(kind));

    rtMemcpyKind_t rtKind = RT_MEMCPY_RESERVED;
    const aclError ret = CheckMemcpy2dParam(dst, dpitch, src, spitch, width, height, kind, rtKind);
    if (ret != ACL_SUCCESS) {
        return ret;
    }

    const rtError_t rtErr = rtMemcpy2d(dst, dpitch, src, spitch, width, height, rtKind);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("[Synchronized][Memcpy]synchronized memcpy failed, kind = %d, runtime result = %d",
            static_cast<int32_t>(kind), static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    ACL_LOG_DEBUG("Successfuly execute aclrtMemcpy2d, dpitch = %zu, spitch = %zu, width = %zu, height = %zu, "
        "kind = %d", dpitch, spitch, width, height, static_cast<int32_t>(kind));
    return ACL_SUCCESS;
}

aclError aclrtMemcpy2dAsyncImpl(void *dst,
                                size_t dpitch,
                                const void *src,
                                size_t spitch,
                                size_t width,
                                size_t height,
                                aclrtMemcpyKind kind,
                                aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpy2dAsync);
    ACL_LOG_DEBUG("start to execute aclrtMemcpy2dAsync, dpitch = %zu, spitch = %zu, width = %zu, height = %zu,"
        " kind = %d", dpitch, spitch, width, height, static_cast<int32_t>(kind));

    rtMemcpyKind_t rtKindVal = RT_MEMCPY_RESERVED;
    const aclError ret = CheckMemcpy2dParam(dst, dpitch, src, spitch, width, height, kind, rtKindVal);
    if (ret != ACL_SUCCESS) {
        return ret;
    }

    ACL_REQUIRES_CALL_RTS_OK(rtMemcpy2dAsync(dst, dpitch, src, spitch, width, height, rtKindVal, stream),
                             rtMemcpy2dAsync);

    ACL_LOG_DEBUG("Successfuly execute aclrtMemcpy2dAsync, dpitch = %zu, spitch = %zu, width = %zu, height = %zu, "
        "kind = %d", dpitch, spitch, width, height, static_cast<int32_t>(kind));
    return ACL_SUCCESS;
}

aclError aclrtReserveMemAddressImpl(void **virPtr,
                                    size_t size,
                                    size_t alignment,
                                    void *expectPtr,
                                    uint64_t flags)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtReserveMemAddress);
    ACL_ADD_APPLY_TOTAL_COUNT(acl::ACL_STATISTICS_RESERVE_RELEASE_MEMORY_ADDRESS);
    ACL_LOG_DEBUG("start to execute aclrtReserveMemAddress, size = %zu", size);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(virPtr);

    ACL_REQUIRES_POSITIVE_REPORT(size);
    // flags参数取1,为了早期接口兼容性保留
    ACL_CHECK_INVALID_VALUE_WITH_EXPECT((flags == 0ULL) || (flags == 1ULL), flags, "0");

    const rtError_t rtErr = rtReserveMemAddress(virPtr, size, alignment, expectPtr, flags);
    if (rtErr != RT_ERROR_NONE) {
        if (rtErr == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) {
            ACL_LOG_WARN("reserve memory address unsupport, runtime result = %d", static_cast<int32_t>(rtErr));
        } else {
            ACL_LOG_CALL_ERROR("reserve memory address failed, runtime result = %d", static_cast<int32_t>(rtErr));
        }   
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_APPLY_SUCCESS_COUNT(acl::ACL_STATISTICS_RESERVE_RELEASE_MEMORY_ADDRESS);
    return ACL_SUCCESS;
}

aclError aclrtReleaseMemAddressImpl(void *virPtr)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtReleaseMemAddress);
    ACL_ADD_RELEASE_TOTAL_COUNT(acl::ACL_STATISTICS_RESERVE_RELEASE_MEMORY_ADDRESS);
    ACL_LOG_DEBUG("start to execute aclrtReleaseMemAddress");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(virPtr);

    const rtError_t rtErr = rtReleaseMemAddress(virPtr);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("release memory address failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_RELEASE_SUCCESS_COUNT(acl::ACL_STATISTICS_RESERVE_RELEASE_MEMORY_ADDRESS);
    return ACL_SUCCESS;
}

aclError aclrtMallocPhysicalImpl(aclrtDrvMemHandle *handle,
                                 size_t size,
                                 const aclrtPhysicalMemProp *prop,
                                 uint64_t flags)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMallocPhysical);
    ACL_ADD_APPLY_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE_PHYSICAL_MEMORY);
    ACL_LOG_DEBUG("start to execute aclrtMallocPhysical, size = %zu", size);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(handle);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(prop);

    ACL_REQUIRES_POSITIVE_REPORT(size);
    ACL_CHECK_RESERVED_PARAM_REPORT_RET(flags, 0, ACL_ERROR_INVALID_PARAM);
    ACL_REQUIRES_PARAM_EQUAL_REPORT(prop->handleType, ACL_MEM_HANDLE_TYPE_NONE);
    ACL_REQUIRES_PARAM_EQUAL_REPORT(prop->allocationType, ACL_MEM_ALLOCATION_TYPE_PINNED);
    ACL_CHECK_INVALID_PARAM_WITH_REASON_DESC_RET(
        prop->location.type == ACL_MEM_LOCATION_TYPE_UNREGISTERED,
        acl::GetMemLocationTypeDesc(prop->location.type),
        "prop->location.type",
        "location type does not support ACL_MEM_LOCATION_TYPE_UNREGISTERED",
        ACL_ERROR_INVALID_PARAM);

    rtDrvMemProp_t rtProp = {};
    rtProp.side = prop->location.type;
    rtProp.devid = prop->location.id;
    rtProp.module_id = acl::APP_MODE_ID_U16;
    rtProp.reserve = prop->reserve;
    // host alloc
    bool isHostAlloc = (prop->location.type == ACL_MEM_LOCATION_TYPE_HOST) || (prop->location.type == ACL_MEM_LOCATION_TYPE_HOST_NUMA);
    // device alloc
    bool isDeviceAlloc = (prop->location.type == ACL_MEM_LOCATION_TYPE_DEVICE);
    if (isDeviceAlloc && ((prop->memAttr == ACL_DDR_MEM_HUGE) || (prop->memAttr == ACL_DDR_MEM_NORMAL) || (prop->memAttr == ACL_DDR_MEM_P2P_HUGE) 
        || (prop->memAttr == ACL_DDR_MEM_P2P_NORMAL))) {
        ACL_LOG_ERROR("memAttr [%d] only support ACL_MEM_LOCATION_TYPE_HOST or ACL_MEM_LOCATION_TYPE_HOST_NUMA.", static_cast<int32_t>(prop->memAttr));
        const std::string memAttrVal = std::to_string(prop->memAttr);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_VALUE_MSG,
            std::vector<const char *>({"func", "value", "param", "expect"}),
            std::vector<const char *>({__func__, memAttrVal.c_str(), "memAttr",
                "ACL_MEM_LOCATION_TYPE_HOST or ACL_MEM_LOCATION_TYPE_HOST_NUMA"}));
        return ACL_ERROR_INVALID_PARAM;
    }
    auto it = memAttrHandlers.find(static_cast<int32_t>(prop->memAttr));
    if (it != memAttrHandlers.end()) {
        it->second(rtProp, isHostAlloc, isDeviceAlloc);
    } else {
        ACL_LOG_ERROR("memAttr [%d] not support. "
                      "For details, please refer to the manual.",
                      static_cast<int32_t>(prop->memAttr));
        const std::string memAttrVal = std::to_string(prop->memAttr);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
            std::vector<const char *>({"func", "value", "param", "reason"}),
            std::vector<const char *>({__func__, memAttrVal.c_str(), "memAttr",
                "The current physical memory attribute is not supported"}));
        return ACL_ERROR_INVALID_PARAM;
    }

    const rtError_t rtErr = rtMallocPhysical(reinterpret_cast<rtDrvMemHandle*>(handle), size, &rtProp, flags);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("malloc physical memory failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_APPLY_SUCCESS_COUNT(acl::ACL_STATISTICS_MALLOC_FREE_PHYSICAL_MEMORY);
    return ACL_SUCCESS;
}

aclError aclrtFreePhysicalImpl(aclrtDrvMemHandle handle)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtFreePhysical);
    ACL_ADD_RELEASE_TOTAL_COUNT(acl::ACL_STATISTICS_MALLOC_FREE_PHYSICAL_MEMORY);
    ACL_LOG_DEBUG("start to execute aclrtFreePhysical");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(handle);

    const rtError_t rtErr = rtFreePhysical(reinterpret_cast<rtDrvMemHandle>(handle));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("free physical memory failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_RELEASE_SUCCESS_COUNT(acl::ACL_STATISTICS_MALLOC_FREE_PHYSICAL_MEMORY);
    return ACL_SUCCESS;
}

aclError aclrtMapMemImpl(void *virPtr,
                         size_t size,
                         size_t offset,
                         aclrtDrvMemHandle handle,
                         uint64_t flags)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMapMem);
    ACL_ADD_APPLY_TOTAL_COUNT(acl::ACL_STATISTICS_MAP_UNMAP_MEMORY);
    ACL_LOG_DEBUG("start to execute aclrtMapMem, size = %zu, offset = %zu", size, offset);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(virPtr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(handle);

    ACL_REQUIRES_POSITIVE_REPORT(size);
    ACL_REQUIRES_PARAM_EQUAL_REPORT(flags, 0);
    const rtError_t rtErr = rtMapMem(virPtr, size, offset, reinterpret_cast<rtDrvMemHandle>(handle), flags);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("map memory failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_APPLY_SUCCESS_COUNT(acl::ACL_STATISTICS_MAP_UNMAP_MEMORY);
    return ACL_SUCCESS;
}

aclError aclrtUnmapMemImpl(void *virPtr)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtUnmapMem);
    ACL_ADD_RELEASE_TOTAL_COUNT(acl::ACL_STATISTICS_MAP_UNMAP_MEMORY);
    ACL_LOG_DEBUG("start to execute aclrtUnmapMem");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(virPtr);

    const rtError_t rtErr = rtUnmapMem(virPtr);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("unmap memory failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_ADD_RELEASE_SUCCESS_COUNT(acl::ACL_STATISTICS_MAP_UNMAP_MEMORY);
    return ACL_SUCCESS;
}

aclError aclrtMemGetAccessImpl(void *virPtr, aclrtMemLocation *location, uint64_t *flag) 
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemGetAccess);
    ACL_LOG_DEBUG("start to execute aclrtMemGetAccess");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(virPtr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(location);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(flag);

    const rtError_t rtErr = rtMemGetAccess(virPtr, reinterpret_cast<rtMemLocation*>(location), flag);
    if (rtErr != ACL_RT_SUCCESS) {
        ACL_LOG_CALL_ERROR("get access failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemExportToShareableHandleImpl(aclrtDrvMemHandle handle, aclrtMemHandleType handleType,
                                             uint64_t flags, uint64_t *shareableHandle)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemExportToShareableHandle);
    ACL_LOG_DEBUG("start to execute aclrtMemExportToShareableHandle");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(handle);
    ACL_REQUIRES_PARAM_EQUAL_REPORT(handleType, ACL_MEM_HANDLE_TYPE_NONE);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(shareableHandle);

    const rtError_t rtErr = rtsMemExportToShareableHandle(reinterpret_cast<rtDrvMemHandle>(handle),
                                                        RT_MEM_HANDLE_TYPE_NONE, flags, shareableHandle);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("export shareable handle failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemExportToShareableHandleV2Impl(aclrtDrvMemHandle handle, uint64_t flags, 
    aclrtMemSharedHandleType shareType, void *shareableHandle) 
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemExportToShareableHandleV2);
    ACL_LOG_DEBUG("start to execute aclrtMemExportToShareableHandleV2");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(handle);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(shareableHandle);

    const rtError_t rtErr = rtMemExportToShareableHandleV2(reinterpret_cast<rtDrvMemHandle>(handle),
        static_cast<rtMemSharedHandleType>(shareType), flags, shareableHandle);
    if (rtErr != ACL_RT_SUCCESS) {
        ACL_LOG_CALL_ERROR("export shareable handle failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}  

aclError aclrtMemImportFromShareableHandleImpl(uint64_t shareableHandle,
                                               int32_t deviceId, aclrtDrvMemHandle *handle)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemImportFromShareableHandle);
    ACL_LOG_DEBUG("start to execute aclrtMemImportFromShareableHandle");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(handle);

    const rtError_t rtErr = rtMemImportFromShareableHandle(shareableHandle, deviceId,
                                                            reinterpret_cast<rtDrvMemHandle*>(handle));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("import from shareable handle failed, shareableHandle[%lu], deviceId[%d], runtime result = %d",
                        shareableHandle, deviceId, static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemImportFromShareableHandleV2Impl(void *shareableHandle, aclrtMemSharedHandleType shareType,
    uint64_t flags, aclrtDrvMemHandle *handle)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemImportFromShareableHandleV2);
    ACL_LOG_DEBUG("start to execute aclrtMemImportFromShareableHandleV2");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(shareableHandle);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(handle);
    ACL_CHECK_RESERVED_PARAM_REPORT_RET(flags, 0, ACL_ERROR_INVALID_PARAM);

    int32_t deviceId = 0;
    const rtError_t rtRet = rtsGetDevice(&deviceId);
    if (rtRet != ACL_RT_SUCCESS) {
        return rtRet;
    }

    const rtError_t rtErr = rtMemImportFromShareableHandleV2(shareableHandle, 
        static_cast<rtMemSharedHandleType>(shareType), flags, deviceId, reinterpret_cast<rtDrvMemHandle*>(handle));
    if (rtErr != ACL_RT_SUCCESS) {
        ACL_LOG_CALL_ERROR("import from shareable handle failed, shareableHandle[%lu], deviceId[%d], runtime result = %d",
            *(static_cast<uint64_t*>(shareableHandle)), deviceId, static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemSetPidToShareableHandleImpl(uint64_t shareableHandle, int32_t *pid, size_t pidNum)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemSetPidToShareableHandle);
    ACL_LOG_DEBUG("start to execute aclrtMemSetPidToShareableHandle");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(pid);
    ACL_REQUIRES_POSITIVE_REPORT(pidNum);
    const rtError_t rtErr = rtMemSetPidToShareableHandle(shareableHandle, pid, static_cast<uint32_t>(pidNum));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("set pid to shareable handle failed, shareableHandle[%lu], pidNum[%zu], runtime result = %d",
                        shareableHandle, pidNum, static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemSetPidToShareableHandleV2Impl(void *shareableHandle, aclrtMemSharedHandleType shareType,
    int32_t *pid, size_t pidNum)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemSetPidToShareableHandleV2);
    ACL_LOG_DEBUG("start to execute AclrtMemSetPidToShareableHandleV2");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(shareableHandle);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(pid);
    ACL_REQUIRES_POSITIVE_REPORT(pidNum);

    const rtError_t rtErr = rtMemSetPidToShareableHandleV2(shareableHandle,
        static_cast<rtMemSharedHandleType>(shareType), pid, static_cast<uint32_t>(pidNum));
    if (rtErr != ACL_RT_SUCCESS) {
        ACL_LOG_CALL_ERROR("set pid to shareable handle failed, shareableHandle[%lu], pidNum[%zu], runtime result = %d",
            *(static_cast<uint64_t*>(shareableHandle)), pidNum, static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemGetAllocationGranularityImpl(aclrtPhysicalMemProp *prop, aclrtMemGranularityOptions option,
                                              size_t *granularity)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemGetAllocationGranularity);
    ACL_LOG_DEBUG("start to execute aclrtMemGetAllocationGranularity");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(prop);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(granularity);

    rtDrvMemProp_t rtProp1 = {};
    rtProp1.side = prop->location.type;
    rtProp1.devid = prop->location.id;
    rtProp1.module_id = acl::APP_MODE_ID_U16;
    rtProp1.reserve = prop->reserve;
    // host alloc
    bool isHostAlloc = (prop->location.type == ACL_MEM_LOCATION_TYPE_HOST) || (prop->location.type == ACL_MEM_LOCATION_TYPE_HOST_NUMA);
    // device alloc
    bool isDeviceAlloc = (prop->location.type == ACL_MEM_LOCATION_TYPE_DEVICE);
    if (isDeviceAlloc && ((prop->memAttr == ACL_DDR_MEM_HUGE) || (prop->memAttr == ACL_DDR_MEM_NORMAL) || (prop->memAttr == ACL_DDR_MEM_P2P_HUGE) 
        || (prop->memAttr == ACL_DDR_MEM_P2P_NORMAL))) {
        ACL_LOG_ERROR("memAttr [%d] only support ACL_MEM_LOCATION_TYPE_HOST or ACL_MEM_LOCATION_TYPE_HOST_NUMA.", static_cast<int32_t>(prop->memAttr));
        const std::string memAttrVal = std::to_string(prop->memAttr);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_VALUE_MSG,
            std::vector<const char *>({"func", "value", "param", "expect"}),
            std::vector<const char *>({__func__, memAttrVal.c_str(), "memAttr",
                "ACL_MEM_LOCATION_TYPE_HOST or ACL_MEM_LOCATION_TYPE_HOST_NUMA"}));
        return ACL_ERROR_INVALID_PARAM;
    }
    auto it = memAttrHandlers.find(static_cast<int32_t>(prop->memAttr));
    if (it != memAttrHandlers.end()) {
        it->second(rtProp1, isHostAlloc, isDeviceAlloc);
    } else {
        ACL_LOG_ERROR("memAttr [%d] not support. "
                      "For details, please refer to the manual.",
                      static_cast<int32_t>(prop->memAttr));
        const std::string memAttrVal2 = std::to_string(prop->memAttr);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
            std::vector<const char *>({"func", "value", "param", "reason"}),
            std::vector<const char *>({__func__, memAttrVal2.c_str(), "memAttr",
                "The current physical memory attribute is not supported"}));
        return ACL_ERROR_INVALID_PARAM;
    }

    const rtError_t rtErr = rtMemGetAllocationGranularity(&rtProp1,
        static_cast<rtDrvMemGranularityOptions>(option), granularity);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("Get Allocation Granularity failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtDeviceGetBareTgidImpl(int32_t *pid)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtDeviceGetBareTgid);
    ACL_LOG_DEBUG("start to execute aclrtDeviceGetBareTgid");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(pid);

    const rtError_t rtErr = rtDeviceGetBareTgid(reinterpret_cast<uint32_t *>(pid));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("Get Bare Tgid Falied, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtCmoAsyncImpl(void *src, size_t size, aclrtCmoType cmoType, aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtCmoAsync);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(src);
    ACL_REQUIRES_POSITIVE_REPORT(size);
    const rtCmoOpCode_t type = static_cast<rtCmoOpCode_t>(static_cast<uint32_t>(cmoType) +
        (static_cast<uint32_t>(RT_CMO_PREFETCH) - static_cast<uint32_t>(ACL_RT_CMO_TYPE_PREFETCH)));
    ACL_REQUIRES_CALL_RTS_OK(rtCmoAsync(src, size, type, stream), rtCmoAsync);
    return ACL_SUCCESS;
}

aclError aclrtGetMemcpyDescSizeImpl(aclrtMemcpyKind kind, size_t *descSize)
{
    ACL_LOG_INFO("start to execute aclrtGetMemcpyDescSize");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(descSize);
    ACL_CHECK_INVALID_VALUE_WITH_DESC(
        static_cast<uint32_t>(kind) < static_cast<uint32_t>(RT_MEMCPY_KIND_MAX),
        acl::GetMemcpyKindDesc(kind), "kind",
        "[RT_MEMCPY_KIND_HOST_TO_HOST, RT_MEMCPY_KIND_MAX)",
        ACL_ERROR_INVALID_PARAM);
    const auto rt_mem_kind = static_cast<rtMemcpyKind>(static_cast<uint32_t>(kind));
    const auto rtErr = rtsGetMemcpyDescSize(rt_mem_kind, descSize);
    if (rtErr != RT_ERROR_NONE) {
       ACL_LOG_CALL_ERROR("Get memcpy desc size Failed, runtime result = %d", rtErr);
       return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtSetMemcpyDescImpl(void *desc, aclrtMemcpyKind kind, void *srcAddr, void *dstAddr, size_t count,
                                void *config)
{
    ACL_LOG_INFO("start to execute aclrtSetMemcpyDesc");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(desc);
    ACL_CHECK_INVALID_VALUE_WITH_DESC(
        static_cast<uint32_t>(kind) < static_cast<uint32_t>(RT_MEMCPY_KIND_MAX),
        acl::GetMemcpyKindDesc(kind), "kind",
        "[RT_MEMCPY_KIND_HOST_TO_HOST, RT_MEMCPY_KIND_MAX)",
        ACL_ERROR_INVALID_PARAM);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(srcAddr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dstAddr);
    ACL_REQUIRES_POSITIVE_REPORT(count);
    ACL_CHECK_INVALID_PARAM_NO_VALUE(config == nullptr, "reserve", "config is a reserved parameter and must be nullptr");

    const auto rt_mem_kind = static_cast<rtMemcpyKind>(static_cast<uint32_t>(kind));
    const auto rtErr = rtsSetMemcpyDesc(static_cast<rtMemcpyDesc_t>(desc), rt_mem_kind, srcAddr, dstAddr,
                                         count, nullptr);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("Set memcpy desc Failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemcpyAsyncWithDescImpl(void *desc, aclrtMemcpyKind kind, aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyAsyncWithDesc);
    ACL_LOG_INFO("start to execute aclrtMemcpyAsyncWithDesc");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(desc);
    ACL_CHECK_INVALID_VALUE_WITH_DESC(
        static_cast<uint32_t>(kind) < static_cast<uint32_t>(RT_MEMCPY_KIND_MAX),
        acl::GetMemcpyKindDesc(kind), "kind",
        "[RT_MEMCPY_KIND_HOST_TO_HOST, RT_MEMCPY_KIND_MAX)",
        ACL_ERROR_INVALID_PARAM);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(stream);

    const auto rt_mem_kind = static_cast<rtMemcpyKind>(static_cast<int32_t>(kind));
    const auto rtErr = rtsMemcpyAsyncWithDesc(static_cast<rtMemcpyDesc_t>(desc), rt_mem_kind, nullptr, stream);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("Async memcpy with desc Failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemcpyAsyncWithOffsetImpl(void **dst, size_t destMax, size_t dstDataOffset, const void **src,
    size_t count, size_t srcDataOffset, aclrtMemcpyKind kind, aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyAsyncWithOffset);
    ACL_LOG_INFO("start to execute aclrtMemcpyAsyncWithOffset");
    if (kind != ACL_MEMCPY_INNER_DEVICE_TO_DEVICE) {
        ACL_LOG_ERROR("[Check][Kind]invalid kind of memcpy with offset, kind = %d", static_cast<int32_t>(kind));
        return ACL_ERROR_INVALID_PARAM;
    }
    if (IsZeroSizeMemcpy(count)) {
        ACL_LOG_INFO("zero-size memcpy, no memory copy async with offsetwill be performed");
        return ACL_SUCCESS;
    }
    const auto memKind = static_cast<rtMemcpyKind>(static_cast<int32_t>(kind));
    const auto rtErr = rtMemcpyAsyncWithOffset(dst, destMax, dstDataOffset, src, count, srcDataOffset, memKind, stream);
    if (rtErr != RT_ERROR_NONE) {
        if (rtErr == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) {
            ACL_LOG_WARN("rtMemcpyAsyncWithOffset unsupport, runtime result = %d", static_cast<int32_t>(rtErr));
        } else {
            ACL_LOG_CALL_ERROR("call rtMemcpyAsyncWithOffset Failed, runtime result = %d", rtErr);
        }
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_LOG_INFO("successfully execute aclrtMemcpyAsyncWithOffset");
    return ACL_SUCCESS;
}

aclError aclrtValueWriteImpl(void* devAddr, uint64_t value, uint32_t flag, aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtValueWrite);
    ACL_LOG_INFO("start to execute aclrtValueWrite");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devAddr);

    const auto rtErr = rtsValueWrite(devAddr, value, flag, static_cast<rtStream_t>(stream));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsValueWrite Failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtValueWaitImpl(void* devAddr, uint64_t value, uint32_t flag, aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtValueWait);
    ACL_LOG_INFO("start to execute aclrtValueWait");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devAddr);

    const auto rtErr = rtsValueWait(devAddr, value, flag, static_cast<rtStream_t>(stream));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsValueWait Failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtReduceAsyncImpl(void *dst, const void *src, uint64_t count, aclrtReduceKind kind,
    aclDataType type, aclrtStream stream, void *reserve)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtReduceAsync);
    ACL_LOG_DEBUG("start to execute aclrtReduceAsync, count = [%lu], kind = [%u], type = [%u]", count,
        static_cast<uint32_t>(kind), static_cast<uint32_t>(type));
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dst);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(src);
    ACL_CHECK_INVALID_PARAM_NO_VALUE(reserve == nullptr, "reserve", "reserve is a reserved parameter and must be nullptr");

    rtDataType dataType;
    if (kMapDataType.count(type) > 0) {
        dataType = kMapDataType.at(type);
    } else {
        ACL_LOG_ERROR("[Check][param]param type [%d] is invalid.", static_cast<int32_t>(type));
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
            std::vector<const char *>({"func", "value", "param", "reason"}),
            std::vector<const char *>({__func__, acl::GetDataTypeDesc(type), "type", "The data type is currently not supported"}));
        return ACL_ERROR_INVALID_PARAM;
    }

    rtReduceInfo_t reduceInfo;
    reduceInfo.dst = dst;
    reduceInfo.src = const_cast<void*>(src);
    reduceInfo.count = static_cast<size_t>(count);
    reduceInfo.kind = static_cast<rtReduceKind>(kind);
    reduceInfo.type = dataType;
    const rtError_t rtErr = rtsLaunchReduceAsyncTask(&reduceInfo, static_cast<rtStream_t>(stream), reserve);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsLaunchReduceAsyncTask failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtGetBufFromChainImpl(aclrtMbuf headBuf, uint32_t index, aclrtMbuf *buf)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtGetBufFromChain);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(headBuf);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(buf);
    ACL_REQUIRES_CALL_RTS_OK(rtMbufChainGetMbuf(headBuf, index, buf), rtMbufChainGetMbuf);
    return ACL_SUCCESS;
}

aclError aclrtGetBufChainNumImpl(aclrtMbuf headBuf, uint32_t *num)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtGetBufChainNum);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(headBuf);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(num);
    ACL_REQUIRES_CALL_RTS_OK(rtMbufChainGetMbufNum(headBuf, num), rtMbufChainGetMbufNum);
    return ACL_SUCCESS;
}

aclError aclrtAppendBufChainImpl(aclrtMbuf headBuf, aclrtMbuf buf)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtAppendBufChain);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(headBuf);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(buf);
    ACL_REQUIRES_CALL_RTS_OK(rtMbufChainAppend(headBuf, buf), rtMbufChainAppend);
    return ACL_SUCCESS;
}

aclError aclrtCopyBufRefImpl(const aclrtMbuf buf, aclrtMbuf *newBuf)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtCopyBufRef);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(buf);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(newBuf);
    ACL_REQUIRES_CALL_RTS_OK(rtMbufCopyBufRef(buf, newBuf), rtMbufCopyBufRef);
    return ACL_SUCCESS;
}

aclError aclrtGetBufUserDataImpl(const aclrtMbuf buf, void *dataPtr, size_t size, size_t offset)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtGetBufUserData);
    // The current default private data area size is 96B, if offset+size exceeds 96, an error is reported
    if (size + offset > MEM_SIZE_MAX) {
        ACL_LOG_ERROR("%s failed because the sum of size and offset greater than %u, size=%zu, offset=%zu.", __func__,
            MEM_SIZE_MAX, size, offset);
        const std::string sizeVal = std::to_string(size);
        std::string errMsg = acl::AclErrorLogManager::FormatStr("the sum of size and offset greater than %u, size=%zu, offset=%zu.",
            MEM_SIZE_MAX, size, offset);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
            std::vector<const char *>({"func", "value", "param", "reason"}),
            std::vector<const char *>({__func__, sizeVal.c_str(), "size", errMsg.c_str()}));
        return ACL_ERROR_INVALID_PARAM;
    }

    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(buf);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dataPtr);
    uint64_t bufSize = 0U;
    void *tmpDataPtr = nullptr;
    ACL_REQUIRES_CALL_RTS_OK(rtMbufGetPrivInfo(buf, &tmpDataPtr, &bufSize), rtMbufGetPrivInfo);
    ACL_CHECK_LESS_UINT(size + offset, static_cast<size_t>(bufSize));
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(tmpDataPtr);
    const auto ret = memcpy_s(dataPtr, size, (static_cast<uint8_t *>(tmpDataPtr) + offset), size);
    if (ret != EOK) {
        const std::string retVal = std::to_string(ret);
        const std::string extendInfo = "src=" + std::to_string(reinterpret_cast<uintptr_t>(tmpDataPtr)) + 
            ",dst=" + std::to_string(reinterpret_cast<uintptr_t>(dataPtr)) +
            ",dstLen=" + std::to_string(size) + ",srcLen=" + std::to_string(size);
        acl::AclErrorLogManager::ReportInputError(acl::STANDARD_FUNC_FAILED_MSG,
            std::vector<const char *>({"func1", "func2", "ret_code", "reason", "extend_info"}),
            std::vector<const char *>({__func__, "memcpy_s", retVal.c_str(),
                strerror(ret), extendInfo.c_str()}));
        ACL_LOG_ERROR("call memcpy_s failed, result = %d, size = %zu, bufSize = %lu, offset = %zu",
                            ret, size, bufSize, offset);
        return ACL_ERROR_FAILURE;
    }
    return ACL_SUCCESS;
}

aclError aclrtSetBufUserDataImpl(aclrtMbuf buf, const void *dataPtr, size_t size, size_t offset)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtSetBufUserData);
    // The current default private data area size is 96B, if offset+size exceeds 96, an error is reported
    if (size + offset > MEM_SIZE_MAX) {
        ACL_LOG_ERROR("%s failed because the sum of size and offset greater than %u, size=%zu, offset=%zu.", __func__,
            MEM_SIZE_MAX, size, offset);
        const std::string sizeVal = std::to_string(size);
        std::string errMsg = acl::AclErrorLogManager::FormatStr("the sum of size and offset greater than %u, size=%zu, offset=%zu",
            MEM_SIZE_MAX, size, offset);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
            std::vector<const char *>({"func", "value", "param", "reason"}),
            std::vector<const char *>({__func__, sizeVal.c_str(), "size", errMsg.c_str()}));
        return ACL_ERROR_INVALID_PARAM;
    }
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(buf);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dataPtr);
    uint64_t bufSize = 0U;
    void *tmpDataPtr = nullptr;
    ACL_REQUIRES_CALL_RTS_OK(rtMbufGetPrivInfo(buf, &tmpDataPtr, &bufSize), rtMbufGetPrivInfo);
    ACL_CHECK_LESS_UINT(size + offset, static_cast<size_t>(bufSize));
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(tmpDataPtr);
    const auto ret = memcpy_s((static_cast<uint8_t *>(tmpDataPtr) + offset),
                              (static_cast<size_t>(bufSize) - offset),
                              dataPtr, size);
    if (ret != EOK) {
        const std::string retVal = std::to_string(ret);
        const std::string extendInfo = "src=" + std::to_string(reinterpret_cast<uintptr_t>(dataPtr)) + 
            ", dst=" + std::to_string(reinterpret_cast<uintptr_t>(tmpDataPtr)) +
            ", dstLen=" + std::to_string(bufSize - offset) + ", srcLen=" + std::to_string(size);
        acl::AclErrorLogManager::ReportInputError(acl::STANDARD_FUNC_FAILED_MSG,
            std::vector<const char *>({"func1", "func2", "ret_code", "reason", "extend_info"}),
            std::vector<const char *>({__func__, "memcpy_s", retVal.c_str(),
                strerror(ret), extendInfo.c_str()}));
        ACL_LOG_ERROR("call memcpy_s failed, result = %d, size = %zu, bufSize = %lu, offset = %zu",
                            ret, size, bufSize, offset);
        return ACL_ERROR_FAILURE;
    }
    return ACL_SUCCESS;
}

aclError aclrtGetBufDataImpl(const aclrtMbuf buf, void **dataPtr, size_t *size)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtGetBufData);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(buf);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dataPtr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(size);
    ACL_REQUIRES_CALL_RTS_OK(rtMbufGetBuffAddr(buf, dataPtr), rtMbufGetBuffAddr);
    uint64_t bufSize = 0U;
    ACL_REQUIRES_CALL_RTS_OK(rtMbufGetBuffSize(buf, &bufSize), rtMbufGetBuffSize);
    *size = static_cast<size_t>(bufSize);
    return ACL_SUCCESS;
}

aclError aclrtGetBufDataLenImpl(aclrtMbuf buf, size_t *len)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtGetBufDataLen);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(buf);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(len);
    uint64_t dataLen = 0U;
    ACL_REQUIRES_CALL_RTS_OK(rtMbufGetDataLen(buf, &dataLen), rtMbufGetDataLen);
    *len = static_cast<size_t>(dataLen);
    return ACL_SUCCESS;
}

aclError aclrtSetBufDataLenImpl(aclrtMbuf buf, size_t len)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtSetBufDataLen);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(buf);
    ACL_REQUIRES_CALL_RTS_OK(rtMbufSetDataLen(buf, len), rtMbufSetDataLen);
    return ACL_SUCCESS;
}

aclError aclrtFreeBufImpl(aclrtMbuf buf)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtFreeBuf);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(buf);
    const rtError_t rtRet = rtMbufFree(buf);
    if (rtRet != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("[Free][buf]Failed to call rtMbufFree, result is [%d].", rtRet);
        return rtRet;
    }
    buf = nullptr;
    return ACL_SUCCESS;
}

aclError aclrtAllocBufImpl(aclrtMbuf *buf, size_t size)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtAllocBuf);
    ACL_LOG_INFO("start to execute aclrtAllocBuf, size is [%zu]", size);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(buf);
    // size must be greater than zero
    ACL_REQUIRES_POSITIVE_REPORT(size);
    const rtError_t rtRet = rtMbufAlloc(buf, size);
    if (rtRet != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("[Alloc][buf]Failed to call rtMbufAlloc, result is [%d].", rtRet);
        return rtRet;
    }
    return ACL_SUCCESS;
}

aclError aclrtCmoAsyncWithBarrierImpl(void *src, size_t size, aclrtCmoType cmoType, uint32_t barrierId,
    aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtCmoAsyncWithBarrier);
    ACL_LOG_INFO("start to execute aclrtCmoAsyncWithBarrier, size is [%zu], cmoType is [%u], barrierId is [%u]",
        size, static_cast<uint32_t>(cmoType), barrierId);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(src);
    const rtCmoOpCode rtCmoType = static_cast<rtCmoOpCode>(static_cast<uint32_t>(cmoType) +
        (static_cast<uint32_t>(RT_CMO_PREFETCH) - static_cast<uint32_t>(ACL_RT_CMO_TYPE_PREFETCH)));
    const auto rtErr = rtsCmoAsyncWithBarrier(src, size, rtCmoType, barrierId, static_cast<rtStream_t>(stream));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsCmoAsyncWithBarrier Failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

static aclError ValidateMemcpyBatchParams(void **dsts, size_t *destMaxs, void **srcs, size_t *sizes,
    size_t numBatches, aclrtMemcpyBatchAttr *attrs, size_t *attrsIndexes, size_t numAttrs)
{
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dsts);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(destMaxs);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(srcs);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(sizes);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(attrs);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(attrsIndexes);

    ACL_REQUIRES_POSITIVE_REPORT(numBatches);

    for (size_t i = 0UL; i < numBatches; i++) {
        if (destMaxs[i] < sizes[i]) {
            ACL_LOG_ERROR("element of destMaxs must be equal to or greater than corresponding element of sizes");
            const std::string destMaxsVal = std::to_string(destMaxs[i]);
            std::string errMsg = acl::AclErrorLogManager::FormatStr("The memory copy size %zu at index %zu exceeds the size %zu of the destination buffer",
                sizes[i], i, destMaxs[i]);
            acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
                std::vector<const char *>({"func", "value", "param", "reason"}),
                std::vector<const char *>({__func__, destMaxsVal.c_str(), "destMaxs",
                    errMsg.c_str()}));
            return ACL_ERROR_INVALID_PARAM;
        }
    }

    constexpr uint32_t rsvMaxSize = sizeof(aclrtMemcpyBatchAttr::rsv) / sizeof(uint8_t);
    for (size_t idx = 0UL; idx < numAttrs; idx++) {
        for (uint32_t i = 0U; i < rsvMaxSize; i++) {
            if (attrs[idx].rsv[i] != 0U) {
                ACL_LOG_ERROR("rsv field of attrs[%zu] must be zero", idx);
                const std::string rsvVal = std::to_string(attrs[idx].rsv[i]);
                acl::AclErrorLogManager::ReportInputError(acl::INVALID_VALUE_MSG,
                    std::vector<const char *>({"func", "value", "param", "expect"}),
                    std::vector<const char *>({__func__, rsvVal.c_str(), "attrs.rsv", "0"}));
                return ACL_ERROR_INVALID_PARAM;
            }
        }
    }

    return ACL_SUCCESS;
}

static aclError MemcpyBatchImpl(void **dsts, size_t *destMaxs, void **srcs, size_t *sizes, size_t numBatches,
    aclrtMemcpyBatchAttr *attrs, size_t *attrsIndexes, size_t numAttrs, size_t *failIndex,
    aclrtStream stream, bool async, const char *apiName)
{
    const aclError ret = ValidateMemcpyBatchParams(dsts, destMaxs, srcs, sizes, numBatches, attrs, attrsIndexes,
        numAttrs);
    if (ret != ACL_SUCCESS) {
        return ret;
    }
    if (IsAllZeroSizeBatch(sizes, numBatches)) {
        if (failIndex != nullptr) {
            *failIndex = SIZE_MAX;
        }
        ACL_LOG_INFO("successfully execute %s", apiName);
        return ACL_SUCCESS;
    }

    if (async) {
        const auto rtErr = rtsMemcpyBatchAsync(dsts, destMaxs, srcs, sizes, numBatches, reinterpret_cast<rtMemcpyBatchAttr*>(attrs),
            attrsIndexes, numAttrs, failIndex, stream);
        if (rtErr != RT_ERROR_NONE) {
            if (rtErr == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) {
                ACL_LOG_WARN("rtsMemcpyBatchAsync unsupport, runtime result = %d.", static_cast<int32_t>(rtErr));
            } else {
                ACL_LOG_CALL_ERROR("call rtsMemcpyBatchAsync failed, runtime result = %d.", static_cast<int32_t>(rtErr));
            }   
            return ACL_GET_ERRCODE_RTS(rtErr);
        }
        ACL_LOG_INFO("successfully execute %s", apiName);
    } else {
        const auto rtErr = rtsMemcpyBatch(dsts, srcs, sizes, numBatches, reinterpret_cast<rtMemcpyBatchAttr*>(attrs),
            attrsIndexes, numAttrs, failIndex);
        if (rtErr != RT_ERROR_NONE) {
            if (rtErr == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) {
                ACL_LOG_WARN("rtsMemcpyBatch unsupport, runtime result = %d.", static_cast<int32_t>(rtErr));
            } else {
                ACL_LOG_CALL_ERROR("call rtsMemcpyBatch failed, runtime result = %d.", static_cast<int32_t>(rtErr));
            }   
            return ACL_GET_ERRCODE_RTS(rtErr);
        }
        ACL_LOG_INFO("successfully execute %s", apiName);
    }

    return ACL_SUCCESS;
}

aclError aclrtIpcMemGetExportKeyImpl(void *devPtr, size_t size, char *key, size_t len, uint64_t flags)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtIpcMemGetExportKey);
    ACL_LOG_INFO("start to execute aclrtIpcMemGetExportKey, size is [%zu], len is [%zu], flags is [%lu]",
        size, len, flags);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(key);
    const auto rtErr = rtsIpcMemGetExportKey(devPtr, size, key, static_cast<uint32_t>(len), flags);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsIpcMemGetExportKey failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtIpcMemCloseImpl(const char *key)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtIpcMemClose);
    ACL_LOG_INFO("start to execute aclrtIpcMemClose");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(key);
    const auto rtErr = rtsIpcMemClose(key);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsIpcMemClose failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtIpcMemImportByKeyImpl(void **devPtr, const char *key, uint64_t flags)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtIpcMemImportByKey);
    ACL_LOG_INFO("start to execute aclrtIpcMemImportByKey, flags is [%lu]", flags);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(key);
    const auto rtErr = rtsIpcMemImportByKey(devPtr, key, flags);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsIpcMemImportByKey failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemcpyBatchImpl(void **dsts, size_t *destMaxs, void **srcs, size_t *sizes, size_t numBatches,
    aclrtMemcpyBatchAttr *attrs, size_t *attrsIndexes, size_t numAttrs, size_t *failIndex)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyBatch);
    ACL_LOG_INFO("start to execute aclrtMemcpyBatch");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(failIndex);
    return MemcpyBatchImpl(dsts, destMaxs, srcs, sizes, numBatches, attrs, attrsIndexes, numAttrs, failIndex,
        nullptr, false, "aclrtMemcpyBatch");
}

aclError aclrtMemcpyBatchV2Impl(void **dsts, size_t *destMaxs, void **srcs, size_t *sizes, size_t numBatches,
    aclrtMemcpyBatchAttr *attrs, size_t *attrsIndexes, size_t numAttrs)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyBatchV2);
    ACL_LOG_INFO("start to execute aclrtMemcpyBatchV2");
    return MemcpyBatchImpl(dsts, destMaxs, srcs, sizes, numBatches, attrs, attrsIndexes, numAttrs, nullptr,
        nullptr, false, "aclrtMemcpyBatchV2");
}

aclError aclrtMemcpyBatchAsyncImpl(void **dsts, size_t *destMaxs, void **srcs, size_t *sizes,
    size_t numBatches, aclrtMemcpyBatchAttr *attrs, size_t *attrsIndexes, size_t numAttrs, size_t *failIndex,
    aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyBatchAsync);
    ACL_LOG_INFO("start to execute aclrtMemcpyBatchAsync");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(failIndex);
    return MemcpyBatchImpl(dsts, destMaxs, srcs, sizes, numBatches, attrs, attrsIndexes, numAttrs, failIndex,
        stream, true, "aclrtMemcpyBatchAsync");
}

aclError aclrtMemcpyBatchAsyncV2Impl(void **dsts, size_t *destMaxs, void **srcs, size_t *sizes,
    size_t numBatches, aclrtMemcpyBatchAttr *attrs, size_t *attrsIndexes, size_t numAttrs, aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyBatchAsyncV2);
    ACL_LOG_INFO("start to execute aclrtMemcpyBatchAsyncV2");
    return MemcpyBatchImpl(dsts, destMaxs, srcs, sizes, numBatches, attrs, attrsIndexes, numAttrs, nullptr,
        stream, true, "aclrtMemcpyBatchAsyncV2");
}

aclError aclrtIpcMemSetImportPidImpl(const char *key, int32_t *pid, size_t num)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtIpcMemSetImportPid);
    ACL_LOG_INFO("start to execute aclrtIpcMemSetImportPid, num is [%zu]", num);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(key);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(pid);
    const auto rtErr = rtsIpcMemSetImportPid(key, pid, static_cast<int32_t>(num));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsIpcMemSetImportPid failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtIpcMemSetAttrImpl(const char *key, aclrtIpcMemAttrType type, uint64_t attr)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtIpcMemSetAttr);
    ACL_LOG_INFO("start to execute aclrtIpcMemSetAttr, type is [%d], attr is [%lu]", type, attr);
    const auto rtErr = rtIpcSetMemoryAttr(key, type, attr);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtIpcSetMemoryAttr failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_LOG_INFO("successfully execute aclrtIpcMemSetAttr");
    return ACL_SUCCESS;
}

aclError aclrtIpcMemImportPidInterServerImpl(const char *key, aclrtServerPid *serverPids, size_t num)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtIpcMemImportPidInterServer);
    ACL_LOG_INFO("start to execute aclrtIpcMemImportPidInterServer, num is [%zu]", num);
    const auto rtErr = rtIpcMemImportPidInterServer(key, reinterpret_cast<const rtServerPid *>(serverPids), num);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call aclrtIpcMemImportPidInterServer failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_LOG_INFO("successfully execute aclrtIpcMemImportPidInterServer");
    return ACL_SUCCESS;
}

aclError aclrtCheckMemTypeImpl(void** addrList, uint32_t size, uint32_t memType, uint32_t *checkResult, uint32_t reserve)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtCheckMemType);
    ACL_LOG_INFO("start to execute AclrtCheckMemType, size is [%u], memType is [%u], reserve is [%u]", size, memType, reserve);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(addrList);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(checkResult);
    const auto rtErr = rtsCheckMemType(addrList, size, memType, checkResult, reserve);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsCheckMemType failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtDevicePeerAccessStatusImpl(int32_t deviceId, int32_t peerDeviceId, int32_t *status)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtDevicePeerAccessStatus);
    ACL_LOG_INFO("start to execute aclrtDevicePeerAccessStatus");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(status);
    const rtError_t rtErr = rtsGetP2PStatus(
        static_cast<uint32_t>(deviceId), static_cast<uint32_t>(peerDeviceId), reinterpret_cast<uint32_t *>(status));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsGetP2PStatus failed, runtime result = %d.", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_LOG_INFO("successfully execute aclrtDevicePeerAccessStatus");
    return ACL_SUCCESS;
}

aclError aclrtCmoGetDescSizeImpl(size_t *size)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtCmoGetDescSize);
    ACL_LOG_DEBUG("start to execute aclrtCmoGetDescSize");
    const rtError_t rtErr = rtsGetCmoDescSize(size);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsGetCmoDescSize failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_LOG_INFO("successfully execute aclrtCmoGetDescSize");
    return ACL_SUCCESS;
}

aclError aclrtCmoSetDescImpl(void *cmoDesc, void *src, size_t size)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtCmoSetDesc);
    ACL_LOG_DEBUG("start to execute aclrtCmoSetDesc, memLen =%zu", size);
    const rtError_t rtErr = rtsSetCmoDesc(cmoDesc, src, size);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsSetCmoDesc failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_LOG_INFO("successfully execute aclrtCmoSetDesc");
    return ACL_SUCCESS;
}

static rtCmoOpCode ConvertCmoType(aclrtCmoType cmoType)
{
    constexpr uint32_t offset =
        static_cast<uint32_t>(RT_CMO_PREFETCH) - static_cast<uint32_t>(ACL_RT_CMO_TYPE_PREFETCH);
    return static_cast<rtCmoOpCode>(static_cast<uint32_t>(cmoType) + offset);
}

aclError aclrtCmoAsyncWithDescImpl(void *cmoDesc, aclrtCmoType cmoType, aclrtStream stream, const void *reserve)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtCmoAsyncWithDesc);
    ACL_LOG_DEBUG("start to execute aclrtCmoAsyncWithDesc");
    const rtCmoOpCode rtCmoType = ConvertCmoType(cmoType);
    const rtError_t rtErr = rtsLaunchCmoAddrTask(cmoDesc, stream, rtCmoType, reserve);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtsLaunchCmoAddrTask failed, runtime result = %d", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_LOG_INFO("successfully execute aclrtCmoAsyncWithDesc");
    return ACL_SUCCESS;
}

aclError aclrtMemSetAccessImpl(void *virPtr, size_t size, aclrtMemAccessDesc *desc, size_t count)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemSetAccess);
    ACL_LOG_INFO("start to execute aclrtMemSetAccess");
    
    const rtError_t rtErr = rtMemSetAccess(virPtr, size, reinterpret_cast<rtMemAccessDesc*>(desc), count);
    if (rtErr != RT_ERROR_NONE) {
        if (rtErr == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) {
            ACL_LOG_WARN("call aclrtMemSetAccess failed, runtime result = %d.", static_cast<int32_t>(rtErr));
        } else {
            ACL_LOG_CALL_ERROR("call aclrtMemSetAccess failed, runtime result = %d.", static_cast<int32_t>(rtErr));
        }   
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_LOG_INFO("successfully execute aclrtMemSetAccess");
    return ACL_SUCCESS;
}

aclError aclrtMemRetainAllocationHandleImpl(void* virPtr, aclrtDrvMemHandle *handle) 
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemRetainAllocationHandle);
    ACL_LOG_DEBUG("start to execute aclrtMemRetainAllocationHandle");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(virPtr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(handle);

    const rtError_t rtErr = rtMemRetainAllocationHandle(virPtr, reinterpret_cast<rtDrvMemHandle*>(handle));
    if (rtErr != ACL_RT_SUCCESS) {
        ACL_LOG_CALL_ERROR("get handle failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

//initialize the mapping table
static const MemAttrMapping mapping[] {
    {HUGE_PAGE_TYPE, HBM_TYPE, true, ACL_HBM_MEM_HUGE},
    {NORMAL_PAGE_TYPE, HBM_TYPE, true, ACL_HBM_MEM_NORMAL},
    {HUGE1G_PAGE_TYPE, HBM_TYPE, true, ACL_HBM_MEM_HUGE1G},
    {NORMAL_PAGE_TYPE, P2P_DDR_TYPE, true, ACL_DDR_MEM_P2P_NORMAL},
    {NORMAL_PAGE_TYPE, DDR_TYPE, true, ACL_MEM_NORMAL},
    {HUGE_PAGE_TYPE, DDR_TYPE, true, ACL_MEM_HUGE},
    {HUGE1G_PAGE_TYPE, DDR_TYPE, true, ACL_MEM_HUGE1G},
    {HUGE_PAGE_TYPE, P2P_DDR_TYPE, true, ACL_MEM_P2P_HUGE},
    {HUGE1G_PAGE_TYPE, P2P_DDR_TYPE, true, ACL_MEM_P2P_HUGE1G},
    {NORMAL_PAGE_TYPE, HBM_TYPE, false, ACL_HBM_MEM_NORMAL},
    {HUGE_PAGE_TYPE, HBM_TYPE, false, ACL_HBM_MEM_HUGE},
    {HUGE1G_PAGE_TYPE, HBM_TYPE, false, ACL_HBM_MEM_HUGE1G},
    {NORMAL_PAGE_TYPE, DDR_TYPE, false, ACL_MEM_NORMAL},
    {HUGE_PAGE_TYPE, DDR_TYPE, false, ACL_MEM_HUGE},
    {HUGE1G_PAGE_TYPE, DDR_TYPE, false, ACL_MEM_HUGE1G},
    {NORMAL_PAGE_TYPE, P2P_HBM_TYPE, false, ACL_MEM_P2P_NORMAL},
    {HUGE_PAGE_TYPE, P2P_HBM_TYPE, false, ACL_MEM_P2P_HUGE},
    {HUGE1G_PAGE_TYPE, P2P_HBM_TYPE, false, ACL_MEM_P2P_HUGE1G}
};

aclError aclrtMemGetAllocationPropertiesFromHandleImpl(aclrtDrvMemHandle handle, aclrtPhysicalMemProp* prop)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemGetAllocationPropertiesFromHandle);
    ACL_LOG_DEBUG("start to execute AclrtMemGetAllocationPropertiesFromHandle");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(handle);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(prop);

    rtDrvMemProp_t rtProp = {};
    const rtError_t rtErr = rtMemGetAllocationPropertiesFromHandle(reinterpret_cast<rtDrvMemHandle>(handle), &rtProp);
    if (rtErr != ACL_RT_SUCCESS) {
        ACL_LOG_CALL_ERROR("get handle failed, runtime result = %d", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    prop->handleType = ACL_MEM_HANDLE_TYPE_NONE;
    prop->allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED;
    if (rtProp.side == DRV_MEM_HOST_NUMA_SIDE) {
        // convert drv side to acl locationtype
        prop->location.type = ACL_MEM_LOCATION_TYPE_HOST_NUMA;
    } else {
        prop->location.type = static_cast<aclrtMemLocationType>(rtProp.side);
    }
    prop->location.id = rtProp.devid;
    prop->reserve = rtProp.reserve;

    //host alloc
    bool isHostAlloc = (prop->location.type == ACL_MEM_LOCATION_TYPE_HOST) || (prop->location.type == ACL_MEM_LOCATION_TYPE_HOST_NUMA);

    const auto& it = std::find_if(std::begin(mapping), std::end(mapping),
        [rtProp, isHostAlloc](const MemAttrMapping& entry) {
            return (entry.pgType == rtProp.pg_type) &&
                (entry.memType == rtProp.mem_type) &&
                (entry.isHostAlloc == isHostAlloc);
        });
    if (it != std::end(mapping)) {
        prop->memAttr = it->memAttr;
    } else {
        ACL_LOG_ERROR("memAttr not found for pg_type=%u, mem_type=%u, isHostAlloc=%u",
                      rtProp.pg_type, rtProp.mem_type, isHostAlloc);
        return ACL_ERROR_INVALID_PARAM;
    }
    return ACL_SUCCESS;
}

aclError aclrtReserveMemAddressNoUCMemoryImpl(void **virPtr, size_t size, size_t alignment, void *expectPtr, uint64_t flags)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtReserveMemAddressNoUCMemory);
    ACL_LOG_DEBUG("start to execute aclrtReserveMemAddressNoUCMemory, size = %zu", size);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(virPtr);

    ACL_REQUIRES_POSITIVE_REPORT(size);
    // flags参数取1,为了早期接口兼容性保留
    ACL_CHECK_INVALID_VALUE_WITH_EXPECT((flags == 0ULL) || (flags == 1ULL), flags, "0");

    flags = flags | FLAG_START_DYNAMIC_ALLOC_MEM; // bit 9置1
    const rtError_t rtErr = rtReserveMemAddress(virPtr, size, alignment, expectPtr, flags);
    if (rtErr != RT_ERROR_NONE) {
        if (rtErr == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) {
            ACL_LOG_WARN("reserve memory address without UCMemeory unsupport, runtime result = %d", static_cast<int32_t>(rtErr));
        } else {
            ACL_LOG_CALL_ERROR("reserve memory address without UCMemeory failed, runtime result = %d", static_cast<int32_t>(rtErr));
        }   
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemGetAddressRangeImpl(void *ptr, void **pbase, size_t *psize)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemGetAddressRange);
    ACL_LOG_DEBUG("start to execute aclrtMemGetAddressRange");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    const rtError_t rtErr = rtMemGetAddressRange(ptr, pbase, psize);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call  aclrtMemGetAddressRange failed, runtime result = %d.", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_LOG_INFO("successfully execute aclrtMemGetAddressRange");
    return ACL_SUCCESS;
}

aclError aclrtMemP2PMapImpl(void *devPtr, size_t size, int32_t dstDevId, uint64_t flags)
{
    ACL_PROFILING_REG(acl::AclProfType::aclrtMemP2PMap);
    ACL_LOG_INFO("start to execute aclrtMemP2PMap");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);
    ACL_REQUIRES_POSITIVE_REPORT(size);
    ACL_CHECK_RESERVED_PARAM_REPORT_RET(flags, 0, ACL_ERROR_INVALID_PARAM);
    uint32_t phyId = 0U;
    rtError_t rtErr = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(dstDevId), &phyId);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtGetDevicePhyIdByIndex failed, dstDevId = %u, runtime result = %d",
            dstDevId, static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    rtErr = rtMemPrefetchToDevice(devPtr, size, phyId);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call  aclrtMemP2PMap failed, runtime result = %d.", static_cast<int32_t>(rtErr));
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_LOG_INFO("successfully execute aclrtMemP2PMap");
    return ACL_SUCCESS;
}

aclError aclrtMemPoolCreateImpl(aclrtMemPool *memPool, const aclrtMemPoolProps *poolProps)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemPoolCreate);
    ACL_LOG_INFO("start to execute aclrtMemPoolCreate.");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(memPool);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(poolProps);

    ACL_CHECK_INVALID_VALUE_WITH_DESC(poolProps->allocType == aclrtMemAllocationType::ACL_MEM_ALLOCATION_TYPE_PINNED,
        acl::GetMemAllocationTypeDesc(poolProps->allocType), "poolProps->allocType",
        "ACL_MEM_ALLOCATION_TYPE_PINNED", ACL_ERROR_INVALID_PARAM);

    ACL_CHECK_INVALID_VALUE_WITH_DESC(poolProps->location.type == aclrtMemLocationType::ACL_MEM_LOCATION_TYPE_DEVICE,
        acl::GetMemLocationTypeDesc(poolProps->location.type), "poolProps->location.type",
        "ACL_MEM_LOCATION_TYPE_DEVICE", ACL_ERROR_INVALID_PARAM);

    rtMemPoolProps rtPoolProps;
    rtPoolProps.side = ACL_MEM_LOCATION_TYPE_DEVICE;
    rtPoolProps.devId = poolProps->location.id;
    rtPoolProps.handleType = static_cast<rtDrvMemHandleType>(poolProps->handleType);
    rtPoolProps.maxSize = poolProps->maxSize;
    rtPoolProps.reserve = 0;

    uint8_t zeros[sizeof(poolProps->reserved)] = {0};
    ACL_CHECK_INVALID_PARAM_NO_VALUE(memcmp(poolProps->reserved, zeros, sizeof(poolProps->reserved)) == 0,
        "poolProps->reserved", "poolProps->reserved is a reserved parameter and must be nullptr");

    const auto rtErr = rtMemPoolCreate(reinterpret_cast<rtMemPool_t*>(memPool), &rtPoolProps);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtMemPoolCreate failed, runtime result = %d.", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemPoolDestroyImpl(const aclrtMemPool memPool)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemPoolDestroy);
    ACL_LOG_INFO("start to execute aclrtMemPoolDestroy.");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(memPool);

    const auto rtErr = rtMemPoolDestroy(static_cast<rtMemPool_t>(memPool));
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtMemPoolDestroy failed, runtime result = %d.", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemPoolSetAttrImpl(aclrtMemPool memPool, aclrtMemPoolAttr attr, void *value)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemPoolSetAttr);
    ACL_LOG_INFO("start to execute aclrtMemPoolSetAttr.");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(memPool);

    const auto rtErr = rtMemPoolSetAttr(static_cast<rtMemPool_t>(memPool), static_cast<rtMemPoolAttr>(attr), value);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtMemPoolSetAttr failed, runtime result = %d.", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemPoolGetAttrImpl(aclrtMemPool memPool, aclrtMemPoolAttr attr, void *value)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemPoolGetAttr);
    ACL_LOG_INFO("start to execute aclrtMemPoolGetAttr.");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(memPool);

    const auto rtErr = rtMemPoolGetAttr(static_cast<rtMemPool_t>(memPool), static_cast<rtMemPoolAttr>(attr), value);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("call rtMemPoolGetAttr failed, runtime result = %d.", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}


aclError aclrtMemPoolMallocAsyncImpl(void ** ptr, size_t size, aclrtMemPool memPool, aclrtStream stream)
{   
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemPoolMallocAsync);
    ACL_LOG_INFO("Start to execute aclrtMemPoolMallocAsync.");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(memPool);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(stream);
    if (size == 0) {
        return ACL_SUCCESS;
    }
    
    const auto rtErr = rtMemPoolMallocAsync(ptr, size, memPool, stream);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("Call rtMemPoolMallocAsync failed, runtime result = %d, ptr = %p, size = %zu", rtErr, ptr, size);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
 
    return ACL_SUCCESS;
}
 
aclError aclrtMemPoolFreeAsyncImpl(void * ptr, aclrtStream stream) 
{   
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemPoolFreeAsync);
    ACL_LOG_INFO("Start to execute aclrtMemPoolFreeAsync.");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
 
    const auto rtErr = rtMemPoolFreeAsync(ptr, stream);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("Free memory pool failed, runtime result = %d.", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemPoolTrimToImpl(aclrtMemPool memPool, size_t minBytesToKeep)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemPoolTrimTo);
    ACL_LOG_INFO("Start to execute aclrtMemPoolTrimTo.");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(memPool);

    const auto rtErr = rtMemPoolTrimTo(static_cast<rtMemPool_t>(memPool), minBytesToKeep);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("Call rtMemPoolTrimTo failed, runtime result = %d.", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

static rtMemManagedLocationType ConvertMemManagedLocationType(aclrtMemManagedLocationType const locationType)
{
    switch (locationType) {
        case ACL_MEM_LOCATIONTYPE_DEVICE:
            return rtMemLocationTypeDevice;
        case ACL_MEM_LOCATIONTYPE_HOST:
            return rtMemLocationTypeHost;
        case ACL_MEM_LOCATIONTYPE_HOST_NUMA:
            return rtMemLocationTypeHostNuma;
        case ACL_MEM_LOCATIONTYPE_HOST_NUMA_CURRENT:
            return rtMemLocationTypeHostNumaCurrent;
        default:
            return rtMemLocationTypeInvalid;
    }
}

aclError aclrtMemManagedPrefetchAsyncImpl(const void* ptr, size_t size, aclrtMemManagedLocation location, uint32_t flags,
    aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemManagedPrefetchAsync);
    ACL_LOG_DEBUG("start to execute aclrtMemManagedPrefetchAsync");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptr);
    ACL_REQUIRES_POSITIVE_REPORT(size);
    ACL_CHECK_RESERVED_PARAM_REPORT_RET(flags, 0, ACL_ERROR_INVALID_PARAM);
    rtMemManagedLocation uvmLocation = { ConvertMemManagedLocationType(location.type), location.id };
    ACL_REQUIRES_CALL_RTS_OK(rtMemManagedPrefetchAsync(ptr, size, uvmLocation, flags, static_cast<rtStream_t>(stream)),
        rtMemManagedPrefetchAsync);
    return ACL_SUCCESS;
}

aclError aclrtMemManagedPrefetchBatchAsyncImpl(const void** ptrs, size_t* sizes, size_t count,
    aclrtMemManagedLocation* prefetchLocs, size_t* prefetchLocIdxs, size_t numPrefetchLocs, uint64_t flags,
    aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemManagedPrefetchBatchAsync);
    ACL_LOG_DEBUG("start to execute aclrtMemManagedPrefetchBatchAsync");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(ptrs);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(sizes);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(prefetchLocs);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(prefetchLocIdxs);

    ACL_REQUIRES_POSITIVE_REPORT(count);
    ACL_REQUIRES_POSITIVE_REPORT(numPrefetchLocs);
    ACL_CHECK_RESERVED_PARAM_REPORT_RET(flags, 0, ACL_ERROR_INVALID_PARAM);
    if (count < numPrefetchLocs) {
        ACL_LOG_ERROR("[Check][PARAM]count must be greater than or equal to numPrefetchLocs");
        const std::string countVal = std::to_string(count);
        std::string errMsg = acl::AclErrorLogManager::FormatStr("must be greater than or equal to numPrefetchLocs %zu", numPrefetchLocs);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_REASON_MSG,
            std::vector<const char *>({"func", "value", "param", "reason"}),
            std::vector<const char *>({__func__, countVal.c_str(), "count", errMsg.c_str()}));
        return ACL_ERROR_INVALID_PARAM;
    }

    rtMemManagedLocation* uvmPrefetchLocs = new(std::nothrow) rtMemManagedLocation[numPrefetchLocs];
    ACL_CHECK_MALLOC_RESULT_REPORT_RET(uvmPrefetchLocs, sizeof(rtMemManagedLocation) * numPrefetchLocs, ACL_ERROR_BAD_ALLOC);

    for (size_t numPrefetchIdx = 0; numPrefetchIdx < numPrefetchLocs; numPrefetchIdx++) {
        uvmPrefetchLocs[numPrefetchIdx].id = prefetchLocs[numPrefetchIdx].id;
        uvmPrefetchLocs[numPrefetchIdx].type = ConvertMemManagedLocationType(prefetchLocs[numPrefetchIdx].type);
    }

    const rtError_t rtErr = rtMemManagedPrefetchBatchAsync(ptrs, sizes, count, uvmPrefetchLocs, prefetchLocIdxs, numPrefetchLocs, flags, static_cast<rtStream_t>(stream));
    ACL_DELETE_ARRAY_AND_SET_NULL(uvmPrefetchLocs);
    ACL_REQUIRES_CALL_RTS_OK(rtErr, rtMemManagedPrefetchBatchAsync);
    return ACL_SUCCESS;
}

aclError aclrtGetSymbolAddressImpl(const void *symbol, void **devPtr)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtGetSymbolAddress);
    ACL_LOG_DEBUG("start to execute aclrtGetSymbolAddress.");

    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(symbol);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(devPtr);

    size_t size = 0UL;
    const rtError_t rtErr = rtSymbolLookup(symbol, devPtr, &size);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("rtSymbolLookup failed, runtime result = %d.", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtGetSymbolSizeImpl(const void *symbol, size_t *size)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtGetSymbolSize);
    ACL_LOG_DEBUG("start to execute aclrtGetSymbolSize.");

    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(symbol);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(size);

    void *devPtr = nullptr;
    const rtError_t rtErr = rtSymbolLookup(symbol, &devPtr, size);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("rtSymbolLookup failed, runtime result = %d.", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    return ACL_SUCCESS;
}

static aclError GetSymbolInfo(const void *symbol, size_t count, size_t offset,
                                void **symbolAddr, size_t *symbolSize)
{
    *symbolAddr = nullptr;
    *symbolSize = 0UL;
    const rtError_t rtErr = rtSymbolLookup(symbol, symbolAddr, symbolSize);
    if (rtErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("rtSymbolLookup failed, runtime result = %d.", rtErr);
        return ACL_GET_ERRCODE_RTS(rtErr);
    }

    size_t totalSize = 0UL;
    ACL_CHECK_ASSIGN_SIZET_ADD(offset, count, totalSize);
    if (totalSize > *symbolSize) {
        ACL_LOG_ERROR("[Check][Offset]offset[%zu] + count[%zu] must be <= symbolSize[%zu].",
            offset, count, *symbolSize);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_MSG,
            std::vector<const char *>({"param", "value", "reason"}),
            std::vector<const char *>({"offset+count", std::to_string(totalSize).c_str(),
            "must be <= symbolSize"}));
        return ACL_ERROR_INVALID_PARAM;
    }

    return ACL_SUCCESS;
}

static aclError CheckMemcpyFromSymbol(void *dst, const void *symbol, size_t count, size_t dstMax,
                                      aclrtMemcpyKind kind)
{
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(symbol);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(dst);

    if (count > dstMax) {
        ACL_LOG_ERROR("[Check][Count]count[%zu] must not be greater than dstMax[%zu].", count, dstMax);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_MSG,
            std::vector<const char *>({"param", "value", "reason"}),
            std::vector<const char *>({"count", std::to_string(count).c_str(), "must not be greater than dstMax"}));
        return ACL_ERROR_INVALID_PARAM;
    }

    if ((kind != ACL_MEMCPY_DEVICE_TO_HOST) && (kind != ACL_MEMCPY_DEFAULT)) {
        ACL_LOG_ERROR("[Check][Kind]kind[%d] only support ACL_MEMCPY_DEVICE_TO_HOST or ACL_MEMCPY_DEFAULT",
            static_cast<int32_t>(kind));
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_VALUE_MSG,
            std::vector<const char *>({"func", "value", "param", "expect"}),
            std::vector<const char *>({__func__, acl::GetMemcpyKindDesc(kind), "kind",
            "ACL_MEMCPY_DEVICE_TO_HOST or ACL_MEMCPY_DEFAULT"}));
        return ACL_ERROR_INVALID_PARAM;
    }

    return ACL_SUCCESS;
}

aclError aclrtMemcpyFromSymbolImpl(void *dst, size_t dstMax, const void *symbol,
                                    size_t count, size_t offset, aclrtMemcpyKind kind)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyFromSymbol);
    ACL_LOG_DEBUG("start to execute aclrtMemcpyFromSymbol, count = %zu, offset = %zu.", count, offset);

    aclError ret = CheckMemcpyFromSymbol(dst, symbol, count, dstMax, kind);
    if (ret != ACL_SUCCESS) {
        return ret;
    }
    
    void *symbolAddr = nullptr;
    size_t symbolSize = 0UL;
    ret = GetSymbolInfo(symbol, count, offset, &symbolAddr, &symbolSize);
    if (ret != ACL_SUCCESS) {
        return ret;
    }

    void *srcAddr = static_cast<void *>(static_cast<uint8_t *>(symbolAddr) + offset);
    const rtError_t copyErr = rtMemcpy(dst, dstMax, srcAddr, count, RT_MEMCPY_DEVICE_TO_HOST);
    if (copyErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("rtMemcpy failed, runtime result = %d.", copyErr);
        return ACL_GET_ERRCODE_RTS(copyErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemcpyFromSymbolAsyncImpl(void *dst, size_t dstMax, const void *symbol,
                                         size_t count, size_t offset, aclrtMemcpyKind kind,
                                         aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyFromSymbolAsync);
    ACL_LOG_DEBUG("start to execute aclrtMemcpyFromSymbolAsync, count = %zu, offset = %zu.", count, offset);

    aclError aclErr = CheckMemcpyFromSymbol(dst, symbol, count, dstMax, kind);
    if (aclErr != ACL_SUCCESS) {
        return aclErr;
    }

    void *symbolAddr = nullptr;
    size_t symbolSize = 0UL;
    aclErr = GetSymbolInfo(symbol, count, offset, &symbolAddr, &symbolSize);
    if (aclErr != ACL_SUCCESS) {
        return aclErr;
    }

    void *srcAddr = static_cast<void *>(static_cast<uint8_t *>(symbolAddr) + offset);
    const rtError_t copyErr = rtMemcpyAsync(dst, dstMax, srcAddr, count, RT_MEMCPY_DEVICE_TO_HOST, stream);
    if (copyErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("rtMemcpyAsync failed, runtime result = %d.", copyErr);
        return ACL_GET_ERRCODE_RTS(copyErr);
    }
    return ACL_SUCCESS;
}

static aclError CheckMemcpyToSymbol(const void *symbol, const void *src,
                                    aclrtMemcpyKind kind)
{
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(symbol);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(src);

    if ((kind != ACL_MEMCPY_HOST_TO_DEVICE) && (kind != ACL_MEMCPY_DEFAULT)) {
        ACL_LOG_ERROR("[Check][Kind]kind[%d] only support ACL_MEMCPY_HOST_TO_DEVICE or ACL_MEMCPY_DEFAULT",
            static_cast<int32_t>(kind));
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_VALUE_MSG,
            std::vector<const char *>({"func", "value", "param", "expect"}),
            std::vector<const char *>({__func__, acl::GetMemcpyKindDesc(kind), "kind",
            "ACL_MEMCPY_HOST_TO_DEVICE or ACL_MEMCPY_DEFAULT"}));
        return ACL_ERROR_INVALID_PARAM;
    }
    return ACL_SUCCESS;
}

aclError aclrtMemcpyToSymbolImpl(const void *symbol, const void *src, size_t count,
                                 size_t offset, aclrtMemcpyKind kind)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyToSymbol);
    ACL_LOG_DEBUG("start to execute aclrtMemcpyToSymbol, count = %zu, offset = %zu.", count, offset);

    aclError ret = CheckMemcpyToSymbol(symbol, src, kind);
    if (ret != ACL_SUCCESS) {
        return ret;
    }

    void *symbolAddr = nullptr;
    size_t symbolSize = 0UL;
    ret = GetSymbolInfo(symbol, count, offset, &symbolAddr, &symbolSize);
    if (ret != ACL_SUCCESS) {
        return ret;
    }

    void *dstAddr = static_cast<void *>(static_cast<uint8_t *>(symbolAddr) + offset);
    const rtError_t copyErr = rtMemcpy(dstAddr, symbolSize - offset, src, count, RT_MEMCPY_HOST_TO_DEVICE);
    if (copyErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("rtMemcpy failed, runtime result = %d.", copyErr);
        return ACL_GET_ERRCODE_RTS(copyErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemcpyToSymbolAsyncImpl(const void *symbol, const void *src, size_t count,
                                      size_t offset, aclrtMemcpyKind kind, aclrtStream stream)
{
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemcpyToSymbolAsync);
    ACL_LOG_DEBUG("start to execute aclrtMemcpyToSymbolAsync, count = %zu, offset = %zu.", count, offset);

    aclError aclErr = CheckMemcpyToSymbol(symbol, src, kind);
    if (aclErr != ACL_SUCCESS) {
        return aclErr;
    }

    void *symbolAddr = nullptr;
    size_t symbolSize = 0UL;
    aclErr = GetSymbolInfo(symbol, count, offset, &symbolAddr, &symbolSize);
    if (aclErr != ACL_SUCCESS) {
        return aclErr;
    }

    void *dstAddr = static_cast<void *>(static_cast<uint8_t *>(symbolAddr) + offset);
    const rtError_t copyErr = rtMemcpyAsync(dstAddr, symbolSize - offset, src, count, RT_MEMCPY_HOST_TO_DEVICE, stream);
    if (copyErr != RT_ERROR_NONE) {
        ACL_LOG_CALL_ERROR("rtMemcpyAsync failed, runtime result = %d.", copyErr);
        return ACL_GET_ERRCODE_RTS(copyErr);
    }
    return ACL_SUCCESS;
}

aclError aclrtMemMapSelectedLinkImpl(void *virPtrDst, size_t size, void *virPtrSrc, uint32_t linkIdx)
{   
    ACL_PROFILING_REG(acl::AclProfType::AclrtMemMapSelectedLink);
    ACL_LOG_INFO("start to execute aclrtMemMapSelectedLink.");
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(virPtrDst);
    ACL_REQUIRES_NOT_NULL_WITH_INPUT_REPORT(virPtrSrc);
    if (size == 0UL) {
        ACL_LOG_ERROR("size is [%zu], size must be greater than zero", size);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_MSG,
            std::vector<const char *>({"param", "value", "reason"}),
            std::vector<const char *>({"size", std::to_string(size).c_str(), "size must be greater than zero"}));
        return ACL_ERROR_INVALID_PARAM;
    }
    if (linkIdx > ACL_RT_MEM_LINK_IDX_1) {
        ACL_LOG_ERROR("linkIdx is [%u], linkIdx in aclrtMemMapSelectedLink must be 0 or 1", linkIdx);
        acl::AclErrorLogManager::ReportInputError(acl::INVALID_PARAM_MSG,
            std::vector<const char *>({"param", "value", "reason"}),
            std::vector<const char *>({"linkIdx", std::to_string(linkIdx).c_str(), "linkIdx in aclrtMemMapSelectedLink must be 0 or 1"}));
        return ACL_ERROR_INVALID_PARAM;
    }

    const auto rtErr = rtMemMapSelectedLink(virPtrDst, size, virPtrSrc, linkIdx);
    if (rtErr != RT_ERROR_NONE) {
        if (rtErr == ACL_ERROR_RT_FEATURE_NOT_SUPPORT) {
            ACL_LOG_WARN("call aclrtMemMapSelectedLink failed, runtime result = %d.", static_cast<int32_t>(rtErr));
        } else {
            ACL_LOG_CALL_ERROR("call aclrtMemMapSelectedLink failed, runtime result = %d.", static_cast<int32_t>(rtErr));
        }   
        return ACL_GET_ERRCODE_RTS(rtErr);
    }
    ACL_LOG_INFO("successfully execute aclrtMemMapSelectedLink");
    return ACL_SUCCESS;
}
#ifdef __cplusplus	 
}	 
#endif // __cplusplus