#include "bev_pool.h"
using namespace AscendC;
namespace BEVPool {
template<typename T, bool Align32B>
__aicore__ inline void BEVPoolKernel<T, Align32B>::DoProcess()
{
LocalTensor<T> outT = outQue_.AllocTensor<T>();
Duplicate(outT, T(0.f), this->alignUpCCount_);
for (int32_t i = 0; i < this->length_; ++i) {
LocalTensor<T> featT = featQue_.AllocTensor<T>();
DataCopy(featT, this->fGm_[this->featOffset_], this->cpFeatParams_);
featQue_.EnQue(featT);
featT = featQue_.DeQue<T>();
Add(outT, featT, outT, this->alignUpCCount_);
featQue_.FreeTensor(featT);
this->featOffset_ += this->stride0_;
}
outQue_.EnQue(outT);
outT = outQue_.DeQue<T>();
if (Align32B) {
DataCopy(this->oGm_[this->outOffset_], outT, this->cpFeatParams_);
} else {
DataCopyPad(this->oGm_[this->outOffset_], outT, this->cpPadParams_);
}
outQue_.FreeTensor(outT);
}
}
extern "C" __global__ __aicore__ void bev_pool(GM_ADDR feat, GM_ADDR geomFeat, GM_ADDR intervalLengths,
GM_ADDR intervalStarts, GM_ADDR out, GM_ADDR workspace, GM_ADDR tiling)
{
GET_TILING_DATA(bevPoolTiling, tiling);
int32_t blkIdx = GetBlockIdx();
int32_t c = bevPoolTiling.stride0;
#if __CCE_AICORE__ == 220
KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_AIV_ONLY);
#endif
if (TILING_KEY_IS(3)) {
const int32_t cBytes = c * sizeof(float);
const int32_t divCeilC = DivCeil(cBytes, ONE_BLK_SIZE);
const int32_t alignUpCBytes = divCeilC * ONE_BLK_SIZE;
BEVPool::BEVPoolKernel<float, true> op(blkIdx, cBytes, divCeilC, alignUpCBytes, feat, geomFeat, intervalLengths,
intervalStarts, out, bevPoolTiling);
op.Process();
} else if (TILING_KEY_IS(2)) {
const int32_t cBytes = c * sizeof(float);
const int32_t divCeilC = DivCeil(cBytes, ONE_BLK_SIZE);
const int32_t alignUpCBytes = divCeilC * ONE_BLK_SIZE;
BEVPool::BEVPoolKernel<float, false> op(blkIdx, cBytes, divCeilC, alignUpCBytes, feat, geomFeat,
intervalLengths, intervalStarts, out, bevPoolTiling);
op.Process();
}
PipeBarrier<PIPE_ALL>();
}