/**

 * Copyright (c) 2025 Huawei Technologies Co., Ltd.

 * This program is free software, you can redistribute it and/or modify it under the terms and conditions of

 * CANN Open Software License Agreement Version 2.0 (the "License").

 * Please refer to the License for details. You may not use this file except in compliance with the License.

 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,

 * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.

 * See LICENSE in the root of the software repository for the full text of the License.

 */



/*!

 * \file concat_v2.cpp

 * \brief

 */



#include "../concat/arch35/one_axis_concat_all_align.h"

#include "../concat/arch35/one_axis_concat_no_align_same_shape_copy.h"

#include "../concat/arch35/one_axis_concat_no_align_same_shape_gather.h"

#include "../concat/arch35/one_axis_concat_no_align_diff_shape.h"

#include "../concat/arch35/one_axis_concat_pure_copy.h"

#include "../concat/arch35/one_axis_concat_simt.h"



#define NOTFIRST_ALIGN_SAME_BITWIDTH_1 2111

#define NOTFIRST_ALIGN_SAME_BITWIDTH_2 2112

#define NOTFIRST_ALIGN_SAME_BITWIDTH_4 2114

#define NOTFIRST_ALIGN_SAME_BITWIDTH_8 2118



#define NOTFIRST_ALIGN_DIFF_BITWIDTH_1 2121

#define NOTFIRST_ALIGN_DIFF_BITWIDTH_2 2122

#define NOTFIRST_ALIGN_DIFF_BITWIDTH_4 2124

#define NOTFIRST_ALIGN_DIFF_BITWIDTH_8 2128



#define NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_1 2211

#define NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_2 2212

#define NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_4 2214



#define NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_1 2311

#define NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_2 2312

#define NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_4 2314



#define NOTFIRST_NOALIGN_DIFF_BITWIDTH_1 2221

#define NOTFIRST_NOALIGN_DIFF_BITWIDTH_2 2222

#define NOTFIRST_NOALIGN_DIFF_BITWIDTH_4 2224

#define NOTFIRST_NOALIGN_DIFF_BITWIDTH_8 2228



#define SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_1 12111

#define SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_2 12112

#define SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_4 12114

#define SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_8 12118



#define SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_1 12121

#define SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_2 12122

#define SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_4 12124

#define SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_8 12128



#define SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_1 12211

#define SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_2 12212

#define SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_4 12214



#define SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_1 12311

#define SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_2 12312

#define SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_4 12314



#define SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_1 12221

#define SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_2 12222

#define SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_4 12224

#define SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_8 12228

#define PURE_COPY_NO_SPLIT_DIM1_TILINGKEY 20001

#define PURE_COPY_SPLIT_DIM1_TILINGKEY 20002

#define SIMT_TILINGKEY_1 30001

#define SIMT_TILINGKEY_2 30002

#define SIMT_TILINGKEY_4 30004

#define SIMT_TILINGKEY_8 30008



using namespace Concat;



extern "C" __global__ __aicore__ void concat_v2(GM_ADDR x, GM_ADDR dim, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling)

{

    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_AIV_ONLY);

    TPipe pipe;

    #if ORIG_DTYPE_X == DT_UINT8 || ORIG_DTYPE_X == DT_INT8 || ORIG_DTYPE_X == DT_BOOL || ORIG_DTYPE_X == DT_FLOAT8_E4M3FN || ORIG_DTYPE_X == DT_FLOAT8_E5M2 || ORIG_DTYPE_X == DT_HIFLOAT8 || ORIG_DTYPE_X == DT_FLOAT8_E8M0 || ORIG_DTYPE_X == DT_BF16 || ORIG_DTYPE_X == DT_FLOAT16 || ORIG_DTYPE_X == DT_INT16 || ORIG_DTYPE_X == DT_UINT16

        TILING_KEY_IS(NOTFIRST_ALIGN_SAME_BITWIDTH_1);

        TILING_KEY_IS(NOTFIRST_ALIGN_DIFF_BITWIDTH_1);

        TILING_KEY_IS(NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_1);

        TILING_KEY_IS(NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_1);

        TILING_KEY_IS(NOTFIRST_NOALIGN_DIFF_BITWIDTH_1);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_1);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_1);

        TILING_KEY_IS(SIMT_TILINGKEY_1);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_1);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_1);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_1);

        TILING_KEY_IS(NOTFIRST_ALIGN_SAME_BITWIDTH_2);

        TILING_KEY_IS(NOTFIRST_ALIGN_DIFF_BITWIDTH_2);

        TILING_KEY_IS(NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_2);

        TILING_KEY_IS(NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_2);

        TILING_KEY_IS(NOTFIRST_NOALIGN_DIFF_BITWIDTH_2);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_2);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_2);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_2);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_2);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_2);

        TILING_KEY_IS(SIMT_TILINGKEY_2);

        TILING_KEY_IS(PURE_COPY_NO_SPLIT_DIM1_TILINGKEY);

        TILING_KEY_IS(PURE_COPY_SPLIT_DIM1_TILINGKEY);

        #if TILING_KEY_VAR == SIMT_TILINGKEY_1

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataForSimt, tilingData, tiling);

            Concat::OneAxisConcatSimt<uint8_t> op(tilingData);

            op.ProcessForSimt(x, y);

            return;

        #elif TILING_KEY_VAR == NOTFIRST_ALIGN_SAME_BITWIDTH_1

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int8_t, true> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_ALIGN_DIFF_BITWIDTH_1

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int8_t, false> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_1

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatNoAlignCopy<uint8_t> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_1

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatNoAlignGather<uint8_t, uint16_t> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_NOALIGN_DIFF_BITWIDTH_1

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatNoAlignDiffShape<uint8_t, uint16_t> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_1

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int8_t, true, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_1

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int8_t, false, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_1

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatNoAlignCopy<uint8_t, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_1

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatNoAlignGather<uint8_t, uint16_t, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_1

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatNoAlignDiffShape<uint8_t, uint16_t, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SIMT_TILINGKEY_2

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataForSimt, tilingData, tiling);

            Concat::OneAxisConcatSimt<uint16_t> op(tilingData);

            op.ProcessForSimt(x, y);

            return;

        #elif TILING_KEY_VAR == NOTFIRST_ALIGN_SAME_BITWIDTH_2

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int16_t, true> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_ALIGN_DIFF_BITWIDTH_2

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int16_t, false> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_2

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatNoAlignCopy<uint16_t> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_NOALIGN_DIFF_BITWIDTH_2

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatNoAlignDiffShape<uint16_t, uint16_t> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_2

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatNoAlignGather<uint16_t, uint16_t> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_2

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int16_t, true, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_2

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int16_t, false, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_2

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatNoAlignCopy<uint16_t, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_2

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatNoAlignGather<uint16_t, uint16_t, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_2

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatNoAlignDiffShape<uint16_t, uint16_t, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == PURE_COPY_NO_SPLIT_DIM1_TILINGKEY

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatPureCopy<ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == PURE_COPY_SPLIT_DIM1_TILINGKEY

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatPureCopy<ConcatTilingData> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #endif

    #endif



    #if ORIG_DTYPE_X == DT_UINT32 || ORIG_DTYPE_X == DT_INT32 || ORIG_DTYPE_X == DT_FLOAT || ORIG_DTYPE_X == DT_UINT64 || ORIG_DTYPE_X == DT_INT64 || ORIG_DTYPE_X == DT_DOUBLE || ORIG_DTYPE_X == DT_COMPLEX64

        TILING_KEY_IS(NOTFIRST_ALIGN_SAME_BITWIDTH_4);

        TILING_KEY_IS(NOTFIRST_ALIGN_DIFF_BITWIDTH_4);

        TILING_KEY_IS(NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_4);

        TILING_KEY_IS(NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_4);

        TILING_KEY_IS(NOTFIRST_NOALIGN_DIFF_BITWIDTH_4);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_4);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_4);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_4);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_4);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_4);

        TILING_KEY_IS(SIMT_TILINGKEY_4);

        TILING_KEY_IS(NOTFIRST_ALIGN_SAME_BITWIDTH_8);

        TILING_KEY_IS(NOTFIRST_ALIGN_DIFF_BITWIDTH_8);

        TILING_KEY_IS(NOTFIRST_NOALIGN_DIFF_BITWIDTH_8);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_8);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_8);

        TILING_KEY_IS(SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_8);

        TILING_KEY_IS(SIMT_TILINGKEY_8);

        TILING_KEY_IS(PURE_COPY_NO_SPLIT_DIM1_TILINGKEY);

        TILING_KEY_IS(PURE_COPY_SPLIT_DIM1_TILINGKEY);

        #if TILING_KEY_VAR == SIMT_TILINGKEY_4

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataForSimt, tilingData, tiling);

            Concat::OneAxisConcatSimt<uint32_t> op(tilingData);

            op.ProcessForSimt(x, y);

            return;

        #elif TILING_KEY_VAR == NOTFIRST_ALIGN_SAME_BITWIDTH_4

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int32_t, true> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_ALIGN_DIFF_BITWIDTH_4

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int32_t, false> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_4

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatNoAlignCopy<uint32_t> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_4

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatNoAlignGather<uint32_t, uint32_t> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_NOALIGN_DIFF_BITWIDTH_4

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatNoAlignDiffShape<uint32_t, uint32_t> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_4

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int32_t, true, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_4

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int32_t, false, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_COPY_BITWIDTH_4

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatNoAlignCopy<uint32_t, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_NALIGN_SAME_GATHER_BITWIDTH_4

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatNoAlignGather<uint32_t, uint32_t, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_4

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatNoAlignDiffShape<uint32_t, uint32_t, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SIMT_TILINGKEY_8

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataForSimt, tilingData, tiling);

            Concat::OneAxisConcatSimt<uint64_t> op(tilingData);

            op.ProcessForSimt(x, y);

            return;

        #elif TILING_KEY_VAR == NOTFIRST_ALIGN_SAME_BITWIDTH_8

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int64_t, true> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_ALIGN_DIFF_BITWIDTH_8

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int64_t, false> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == NOTFIRST_NOALIGN_DIFF_BITWIDTH_8

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatNoAlignDiffShape<uint32_t, uint32_t> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_ALIGN_SAME_BITWIDTH_8

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int64_t, true, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_ALIGN_DIFF_BITWIDTH_8

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatAllAlign<int64_t, false, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == SPLIT_CORE_DIM0_NOTFIRST_NOALIGN_DIFF_BITWIDTH_8

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatNoAlignDiffShape<uint32_t, uint32_t, ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == PURE_COPY_NO_SPLIT_DIM1_TILINGKEY

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingDataNoArray, tilingData, tiling);

            Concat::OneAxisConcatPureCopy<ConcatTilingDataNoArray> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #elif TILING_KEY_VAR == PURE_COPY_SPLIT_DIM1_TILINGKEY

            GET_TILING_DATA_WITH_STRUCT(ConcatTilingData, tilingData, tiling);

            Concat::OneAxisConcatPureCopy<ConcatTilingData> op(tilingData, pipe);

            op.Init(x, y);

            op.Process();

        #endif

    #endif

}