DefaultBlockConfig

功能说明

默认的block层schedule配置参数的数据结构。

所属头文件链接

/include/elewise/block/schedule.h

数据结构

struct DefaultBlockConfig {
    uint32_t wholeLoop = 0;
    uint32_t tileCnt = 0;
    uint32_t basicNum = 0;
    uint32_t totalElemCnt = 0;
};

数据结构成员说明

成员名称 成员类型 成员说明 默认值
wholeLoop uint32_t 当前核上tile块的轮询次数 0
tileCnt uint32_t 当前轮询处理的tile块中包含的元素个数,整块tile块时,为0 0
basicNum uint32_t 整块tile块包含的元素个数 0
totalElemCnt uint32_t 当前核上处理的总元素个数 0

约束说明

NA

使用示例

template <typename InputDtype, typename OutputDtype>
struct AddSubConfig {
    struct AddSubCompute {
        template <template <typename> class Tensor>
        __host_aicore__ constexpr auto Compute() const
        {
            auto in1 = Atvoss::PlaceHolder<1, Tensor<InputDtype>, Atvoss::ParamUsage::IN>();
            auto in2 = Atvoss::PlaceHolder<2, Tensor<InputDtype>, Atvoss::ParamUsage::IN>();
            auto in3 = Atvoss::PlaceHolder<3, InputDtype, Atvoss::ParamUsage::IN>();
            auto out = Atvoss::PlaceHolder<4, Tensor<OutputDtype>, Atvoss::ParamUsage::OUT>();
            return (out = in1 + in2 - in3);
        };
    };

    static constexpr Atvoss::Ele::DefaultBlockPolicy<TileShape> blockPolicy{TileShape{}};
    using ArchTag = Atvoss::Arch::DAV_3510;

    using BlockOp = Atvoss::Ele::BlockBuilder<
      AddSubCompute, 
      ArchTag, 
      blockPolicy, 

      // 🔥🔥🔥 使用示例 🔥🔥🔥
      Atvoss::Ele::DefaultBlockConfig
      // 🔥🔥🔥 使用示例 🔥🔥🔥

      >;
    using KernelOp = Atvoss::Ele::KernelBuilder<BlockOp>;
    using DeviceOp = Atvoss::DeviceAdapter<KernelOp>;
};

template <typename InputDtype, typename OutputDtype>
static void Run() {
    /* ACL init and stream create */
    ...

    Atvoss::Tensor<InputDtype> in1(deviceIn1, {{3, 4, 0, 0, 0, 0, 0, 0}}, 2);
    Atvoss::Tensor<InputDtype> in2(deviceIn2, {{3, 4, 0, 0, 0, 0, 0, 0}}, 2);
    InputDtype in3 = 5.0;
    Atvoss::Tensor<OutputDtype> out(deviceOut, {{3, 4, 0, 0, 0, 0, 0, 0}}, 2);

    auto arguments = Atvoss::ArgumentsBuilder{}.inputOutput(in1, in2, in3, out).attr("dim", 5).build();

    using DeviceOp = typename AddSubConfig<InputDtype, OutputDtype>::DeviceOp;
    DeviceOp deviceOp;
    deviceOp.Run(arguments, stream);
}

int main(int argc, char const* argv[]) {
    Run<float, float>();
    return 0;
}