* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include <array>
#include <vector>
#include <iostream>
#include <string>
#include <cstdint>
#include "gtest/gtest.h"
#include "tikicpulib.h"
#include "data_utils.h"
#include "../../../op_host/sinkhorn_tiling.h"
using namespace std;
extern "C" __global__ __aicore__ void sinkhorn(GM_ADDR cost, GM_ADDR p, GM_ADDR workspace, GM_ADDR tiling);
class SinkhornTest : public testing::Test {
protected:
static void SetUpTestCase() {
cout << "SinkhornTest SetUp\n" << endl;
}
static void TearDownTestCase() {
cout << "SinkhornTest TearDown\n" << endl;
}
};
template<typename T>
void checkTotalP(T *pp, int shapeSize) {
float totalP = 0;
for(int i = 0; i < shapeSize; i++) {
totalP += (float)(pp[i]);
}
EXPECT_NEAR(totalP, 1.0f, 0.01f);
}
TEST_F(SinkhornTest, sinkhorn_float_48_2) {
size_t shapeSize = 48 * 2;
size_t inputCostByteSize = shapeSize * sizeof(float);
size_t outputPByteSize = shapeSize * sizeof(float);
size_t tilingDataSize = sizeof(SinkhornTilingDataUT);
uint8_t* cost = (uint8_t*)AscendC::GmAlloc(inputCostByteSize);
uint8_t* p = (uint8_t*)AscendC::GmAlloc(outputPByteSize);
uint8_t* workspace = (uint8_t*)AscendC::GmAlloc(2*16*1024*1024);
uint8_t* tiling = (uint8_t*)AscendC::GmAlloc(tilingDataSize);
uint32_t numBlocks = 1;
float *fp = (float *)cost;
for(int i = 0; i < shapeSize; i++) {
fp[i] = 1.0f;
}
SinkhornTilingDataUT* tilingData = reinterpret_cast<SinkhornTilingDataUT*>(tiling);
tilingData->formerNum = 1;
tilingData->formerRow = 48;
tilingData->formerLength = 96;
tilingData->formerTileNum = 1;
tilingData->formerLastTileRow = 48;
tilingData->formerLastTileLength = 96;
tilingData->tailNum = 0;
tilingData->tailRow = 0;
tilingData->tailLength = 0;
tilingData->tailTileNum = 0;
tilingData->tailLastTileRow = 0;
tilingData->tailLastTileLength = 0;
tilingData->tileRow = 1959;
tilingData->tileLength = 3918;
tilingData->totalRow = 48;
tilingData->totalCol = 2;
tilingData->totalColAligned = 8;
tilingData->tol = 0.0001;
ICPU_SET_TILING_KEY(0);
AscendC::SetKernelMode(KernelMode::AIV_MODE);
ICPU_RUN_KF(sinkhorn, numBlocks, cost, p, workspace, (uint8_t*)(tilingData));
checkTotalP((float *)p, shapeSize);
AscendC::GmFree(cost);
AscendC::GmFree(p);
AscendC::GmFree(workspace);
AscendC::GmFree(tiling);
}
TEST_F(SinkhornTest, sinkhorn_float_8_2) {
size_t shapeSize = 8 * 2;
size_t inputCostByteSize = shapeSize * sizeof(float);
size_t outputPByteSize = shapeSize * sizeof(float);
size_t tilingDataSize = sizeof(SinkhornTilingDataUT);
uint8_t* cost = (uint8_t*)AscendC::GmAlloc(inputCostByteSize);
uint8_t* p = (uint8_t*)AscendC::GmAlloc(outputPByteSize);
uint8_t* workspace = (uint8_t*)AscendC::GmAlloc(2*16*1024*1024);
uint8_t* tiling = (uint8_t*)AscendC::GmAlloc(tilingDataSize);
uint32_t numBlocks = 1;
float *fp = (float *)cost;
float testCost[] = {45.0f, 48.0f, 65.0f, 68.0f, 68.0f, 10.0f, 84.0f, 22.0f, 37.0f, 71.0f, 13.0f, 59.0f, 66.0f, 40.0f, 47.0f, 82.0f};
for (int i = 0; i < shapeSize; i++) {
fp[i] = testCost[i];
}
SinkhornTilingDataUT* tilingData = reinterpret_cast<SinkhornTilingDataUT*>(tiling);
tilingData->formerNum = 1;
tilingData->formerRow = 8;
tilingData->formerLength = 16;
tilingData->formerTileNum = 3;
tilingData->formerLastTileRow = 2;
tilingData->formerLastTileLength = 4;
tilingData->tailNum = 0;
tilingData->tailRow = 0;
tilingData->tailLength = 0;
tilingData->tailTileNum = 0;
tilingData->tailLastTileRow = 0;
tilingData->tailLastTileLength = 0;
tilingData->tileRow = 3;
tilingData->tileLength = 6;
tilingData->totalRow = 8;
tilingData->totalCol = 2;
tilingData->totalColAligned = 8;
tilingData->tol = 0.0001;
ICPU_SET_TILING_KEY(0);
ICPU_RUN_KF(sinkhorn, numBlocks, cost, p, workspace, (uint8_t*)(tilingData));
checkTotalP((float *)p, shapeSize);
AscendC::GmFree(cost);
AscendC::GmFree(p);
AscendC::GmFree(workspace);
AscendC::GmFree(tiling);
}
TEST_F(SinkhornTest, sinkhorn_float16_48_2) {
size_t shapeSize = 48 * 2;
size_t inputCostByteSize = shapeSize * sizeof(float);
size_t outputPByteSize = shapeSize * sizeof(float);
size_t tilingDataSize = sizeof(SinkhornTilingDataUT);
uint8_t* cost = (uint8_t*)AscendC::GmAlloc(inputCostByteSize);
uint8_t* p = (uint8_t*)AscendC::GmAlloc(outputPByteSize);
uint8_t* workspace = (uint8_t*)AscendC::GmAlloc(2*16*1024*1024);
uint8_t* tiling = (uint8_t*)AscendC::GmAlloc(tilingDataSize);
uint32_t numBlocks = 1;
half *fp = (half *)cost;
for(int i = 0; i < shapeSize; i++) {
fp[i] = (half)1.0f;
}
SinkhornTilingDataUT* tilingData = reinterpret_cast<SinkhornTilingDataUT*>(tiling);
tilingData->formerNum = 1;
tilingData->formerRow = 48;
tilingData->formerLength = 96;
tilingData->formerTileNum = 1;
tilingData->formerLastTileRow = 48;
tilingData->formerLastTileLength = 96;
tilingData->tailNum = 0;
tilingData->tailRow = 0;
tilingData->tailLength = 0;
tilingData->tailTileNum = 0;
tilingData->tailLastTileRow = 0;
tilingData->tailLastTileLength = 0;
tilingData->tileRow = 1959;
tilingData->tileLength = 3918;
tilingData->totalRow = 48;
tilingData->totalCol = 2;
tilingData->totalColAligned = 16;
tilingData->tol = 0.0001;
ICPU_SET_TILING_KEY(1);
ICPU_RUN_KF(sinkhorn, numBlocks, cost, p, workspace, (uint8_t*)(tilingData));
AscendC::GmFree(cost);
AscendC::GmFree(p);
AscendC::GmFree(workspace);
AscendC::GmFree(tiling);
}