* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#ifndef __ASCENDC_API_AXPY_H__
#define __ASCENDC_API_AXPY_H__
template <typename T>
inline __aicore__ void AxpyExtend(const AscendC::LocalTensor<T> &dst, const AscendC::LocalTensor<T> &src_0,
const AscendC::LocalTensor<T> &src_1, const float alpha,
const uint32_t count, const AscendC::LocalTensor<uint8_t> &tmp_buf) {
if constexpr (AscendC::IsSameType<T, half>::value) {
constexpr uint32_t tmp_buf_splits = 2U;
constexpr uint32_t cast_dst_align = 32U;
auto offset = tmp_buf.GetSize() / tmp_buf_splits / cast_dst_align * cast_dst_align;
LocalTensor<float> cast_float_src_0 = tmp_buf[0].template ReinterpretCast<float>();
LocalTensor<float> cast_float_src_1 = tmp_buf[offset].template ReinterpretCast<float>();
AscendC::Cast(cast_float_src_0, src_0, RoundMode::CAST_NONE, count);
AscendC::Cast(cast_float_src_1, src_1, RoundMode::CAST_NONE, count);
AscendC::PipeBarrier<PIPE_V>();
AscendC::Muls(cast_float_src_1, cast_float_src_1, alpha, count);
AscendC::Add(cast_float_src_0, cast_float_src_0, cast_float_src_1, count);
AscendC::PipeBarrier<PIPE_V>();
AscendC::Cast(dst, cast_float_src_0, RoundMode::CAST_RINT, count);
} else {
AscendC::Muls(dst, src_1, alpha, count);
AscendC::Add(dst, src_0, dst, count);
}
}
#endif