* Copyright (c) 2026 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#ifndef ATVOSS_TILE_ASCENDC_MATH_H
#define ATVOSS_TILE_ASCENDC_MATH_H
#include "lib/math/power.h"
#include "common/arch.h"
#include "tile_shape.h"
namespace Atvoss::Tile {
* \brief dst[i] = src0[i] + src1[i]
* \param[in] src0, Input LocalTensor
* \param[in] src1, Input LocalTensor
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void AddAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src0, const AscendC::LocalTensor<T>& src1,
OperationShape& operationShape)
{
AscendC::Add(dst, src0, src1, operationShape.axis0);
}
* \brief dst[i] = src[i] + scalar
* \param[in] src, Input LocalTensor
* \param[in] scalar, Input scalar
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void AddsAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src, T scalar, OperationShape& operationShape)
{
AscendC::Adds(dst, src, scalar, operationShape.axis0);
}
* \brief dst[i] = src0[i] - src1[i]
* \param[in] src0, Input LocalTensor
* \param[in] src1, Input LocalTensor
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void SubAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src0, const AscendC::LocalTensor<T>& src1,
OperationShape& operationShape)
{
AscendC::Sub(dst, src0, src1, operationShape.axis0);
}
#if _ATVOSS_ARCH35_
* \brief dst[i] = src[i] - scalar
* \param[in] src, Input LocalTensor
* \param[in] scalar, Input scalar
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void SubsAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src, T scalar, OperationShape& operationShape)
{
AscendC::Subs(dst, src, scalar, operationShape.axis0);
}
* \brief dst[i] = scalar - src[i]
* \param[in] src, Input LocalTensor
* \param[in] scalar, Input scalar
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void SubsAssign(
AscendC::LocalTensor<T>& dst, T scalar, const AscendC::LocalTensor<T>& src, OperationShape& operationShape)
{
AscendC::Subs(dst, scalar, src, operationShape.axis0);
}
#endif
* \brief dst[i] = src0[i] * src1[i]
* \param[in] src0, Input LocalTensor
* \param[in] src1, Input LocalTensor
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void MulAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src0, const AscendC::LocalTensor<T>& src1,
OperationShape& operationShape)
{
AscendC::Mul(dst, src0, src1, operationShape.axis0);
}
* \brief dst[i] = src[i] * src1
* \param[in] src, Input LocalTensor
* \param[in] src1, Input scalar
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void MulsAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src, T src1, OperationShape& operationShape)
{
AscendC::Muls(dst, src, src1, operationShape.axis0);
}
* \brief dst[i] = src0[i] / src1[i]
* \param[in] src0, Input LocalTensor
* \param[in] src1, Input LocalTensor
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void DivAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src0, const AscendC::LocalTensor<T>& src1,
OperationShape& operationShape)
{
AscendC::Div(dst, src0, src1, operationShape.axis0);
}
#if _ATVOSS_ARCH35_
* \brief dst[i] = src[i] / scalar
* \param[in] src, Input LocalTensor
* \param[in] scalar, Input scalar
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void DivsAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src, T scalar, OperationShape& operationShape)
{
AscendC::Divs(dst, src, scalar, operationShape.axis0);
}
* \brief dst[i] = scalar / src[i]
* \param[in] src, Input LocalTensor
* \param[in] scalar, Input scalar
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void DivsAssign(
AscendC::LocalTensor<T>& dst, T scalar, const AscendC::LocalTensor<T>& src, OperationShape& operationShape)
{
AscendC::Divs(dst, scalar, src, operationShape.axis0);
}
#endif
* \brief dst[i] = src[i] / scalarValue
* \param[in] src, Input LocalTensor
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, auto scalarValue, typename T>
__aicore__ inline void DivsAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src, OperationShape& operationShape)
{
T src1 = T{1} / scalarValue;
AscendC::Muls(dst, src, src1, operationShape.axis0);
}
* \brief dst[i] = exp(src[i])
* \param[in] src, Input LocalTensor
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void ExpAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src, OperationShape& operationShape)
{
AscendC::Exp(dst, src, operationShape.axis0);
}
* \brief dst[i] = abs(src[i])
* \param[in] src, Input LocalTensor
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void AbsAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src, OperationShape& operationShape)
{
AscendC::Abs(dst, src, operationShape.axis0);
}
* \brief dst[i] = sqrt(src[i])
* \param[in] src, Input LocalTensor
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, typename T>
__aicore__ inline void SqrtAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src, OperationShape& operationShape)
{
AscendC::Sqrt(dst, src, operationShape.axis0);
}
* \brief dst[i] = power(src[i])
* \param[in] src, Input LocalTensor
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, auto scalarValue, typename T>
__aicore__ inline void PowerAssign(
AscendC::LocalTensor<T>& dst, const AscendC::LocalTensor<T>& src, OperationShape& operationShape)
{
AscendC::Power(dst, src, T{scalarValue}, operationShape.axis0);
}
* \brief dst[i] = cast(src[i])
* \param[in] src, Input LocalTensor
* \param[out] dst, Output LocalTensor
*/
template <typename OperationShape, CastMode castMode, typename T1, typename T2>
__aicore__ inline void CastAssign(
AscendC::LocalTensor<T1>& dst, const AscendC::LocalTensor<T2>& src, OperationShape& operationShape)
{
if constexpr (castMode == CastMode::CAST_NONE) {
AscendC::Cast(dst, src, AscendC::RoundMode::CAST_NONE, operationShape.axis0);
} else if constexpr (castMode == CastMode::CAST_RINT) {
AscendC::Cast(dst, src, AscendC::RoundMode::CAST_RINT, operationShape.axis0);
} else if constexpr (castMode == CastMode::CAST_FLOOR) {
AscendC::Cast(dst, src, AscendC::RoundMode::CAST_FLOOR, operationShape.axis0);
} else if constexpr (castMode == CastMode::CAST_CEIL) {
AscendC::Cast(dst, src, AscendC::RoundMode::CAST_CEIL, operationShape.axis0);
} else if constexpr (castMode == CastMode::CAST_ROUND) {
AscendC::Cast(dst, src, AscendC::RoundMode::CAST_ROUND, operationShape.axis0);
} else if constexpr (castMode == CastMode::CAST_TRUNC) {
AscendC::Cast(dst, src, AscendC::RoundMode::CAST_TRUNC, operationShape.axis0);
} else if constexpr (castMode == CastMode::CAST_ODD) {
AscendC::Cast(dst, src, AscendC::RoundMode::CAST_ODD, operationShape.axis0);
}
}
using OperationShape = Atvoss::Layout::OperationShape;
* \brief Add calculation based on the expression
* \param[in] args, Input LocalTensor & output LocalTensor
* \param[in] localVars, Temp LocalTensor
* \param[in] tail, Length of calculation data
* \return Add expression
*/
template <typename T, typename U>
struct Evaluator<OpAdd<T, U>> {
using Type =
decltype(Add(std::declval<typename Evaluator<T>::Type>(), std::declval<typename Evaluator<U>::Type>()));
template <typename Context>
__aicore__ inline auto operator()(const OpAdd<T, U>& op, Context& context) const
{
return Add(Evaluator<T>{}(op.GetLhs(), context), Evaluator<U>{}(op.GetRhs(), context));
}
};
* \brief Add calculation based on the expression
* \param[in] args, Input LocalTensor & output LocalTensor
* \param[in] localVars, Temp LocalTensor
* \param[in] tail, Length of calculation data
* \return Add expression
*/
template <typename T, typename U, typename V>
struct Evaluator<OpAssign<T, OpAdd<U, V>>> {
using Type = void;
template <typename Context>
__aicore__ inline auto operator()(const OpAssign<T, OpAdd<U, V>>& op, Context& context) const
{
using Dtype = Dtype_t<T>;
OperationShape operationShape = GetShape<Operation::Unary>(context.argsTensors);
static_assert(
!std::is_scalar_v<typename U::Type> || !std::is_scalar_v<typename V::Type>,
"OpAdd's inputs not accepts all scalar types");
if constexpr (std::is_scalar_v<typename U::Type>) {
return Atvoss::Tile::AddsAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context), operationShape);
} else if constexpr (std::is_scalar_v<typename V::Type>) {
return Atvoss::Tile::AddsAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context), operationShape);
} else {
return Atvoss::Tile::AddAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context).GetUbTensor(), operationShape);
}
}
};
* \brief Sub calculation based on the expression
* \param[in] args, Input LocalTensor & output LocalTensor
* \param[in] localVars, Temp LocalTensor
* \param[in] tail, Length of calculation data
* \return Sub expression
*/
template <typename T, typename U, typename V>
struct Evaluator<OpAssign<T, OpSub<U, V>>> {
using Type = void;
template <typename Context>
__aicore__ inline auto operator()(const OpAssign<T, OpSub<U, V>>& op, Context& context) const
{
using Dtype = Dtype_t<T>;
OperationShape operationShape = GetShape<Operation::Unary>(context.argsTensors);
#if _ATVOSS_ARCH35_
static_assert(
!std::is_scalar_v<typename U::Type> || !std::is_scalar_v<typename V::Type>,
"OpSub's inputs not accepts all scalar types");
if constexpr (std::is_scalar_v<typename U::Type>) {
return Atvoss::Tile::SubsAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(), Evaluator<U>{}(op.GetRhs().GetLhs(), context),
Evaluator<V>{}(op.GetRhs().GetRhs(), context).GetUbTensor(), operationShape);
} else if constexpr (std::is_scalar_v<typename V::Type>) {
return Atvoss::Tile::SubsAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context), operationShape);
} else {
return Atvoss::Tile::SubAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context).GetUbTensor(), operationShape);
}
#else
return Atvoss::Tile::SubAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context).GetUbTensor(), operationShape);
#endif
}
};
* \brief Mul calculation based on the expression
* \param[in] args, Input LocalTensor & output LocalTensor
* \param[in] localVars, Temp LocalTensor
* \param[in] tail, Length of calculation data
* \return Mul expression
*/
template <typename T, typename U, typename V>
struct Evaluator<OpAssign<T, OpMul<U, V>>> {
using Type = void;
template <typename Context>
__aicore__ inline auto operator()(const OpAssign<T, OpMul<U, V>>& op, Context& context) const
{
using Dtype = Dtype_t<T>;
OperationShape operationShape = GetShape<Operation::Unary>(context.argsTensors);
static_assert(
!std::is_scalar_v<typename U::Type> || !std::is_scalar_v<typename V::Type>,
"MulAssign's inputs not accepts all scalar types");
if constexpr (std::is_scalar_v<typename U::Type>) {
return Atvoss::Tile::MulsAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context), operationShape);
} else if constexpr (std::is_scalar_v<typename V::Type>) {
return Atvoss::Tile::MulsAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context), operationShape);
} else {
return Atvoss::Tile::MulAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context).GetUbTensor(), operationShape);
}
}
};
* \brief Div calculation based on the expression
* \param[in] args, Input LocalTensor & output LocalTensor
* \param[in] localVars, Temp LocalTensor
* \param[in] tail, Length of calculation data
* \return Div expression
*/
template <typename T, typename U, typename V>
struct Evaluator<OpAssign<T, OpDiv<U, V>>> {
using Type = void;
template <typename Context>
__aicore__ inline auto operator()(const OpAssign<T, OpDiv<U, V>>& op, Context& context) const
{
using Dtype = Dtype_t<T>;
OperationShape operationShape = GetShape<Operation::Unary>(context.argsTensors);
#if _ATVOSS_ARCH35_
static_assert(
!std::is_scalar_v<typename U::Type> || !std::is_scalar_v<typename V::Type>,
"OpDiv's inputs not accepts all scalar types");
if constexpr (std::is_scalar_v<typename U::Type>) {
return Atvoss::Tile::DivsAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(), Evaluator<U>{}(op.GetRhs().GetLhs(), context),
Evaluator<V>{}(op.GetRhs().GetRhs(), context).GetUbTensor(), operationShape);
} else if constexpr (std::is_scalar_v<typename V::Type>) {
return Atvoss::Tile::DivsAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context), operationShape);
} else {
return Atvoss::Tile::DivAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context).GetUbTensor(), operationShape);
}
#else
return Atvoss::Tile::DivAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetLhs(), context).GetUbTensor(),
Evaluator<V>{}(op.GetRhs().GetRhs(), context).GetUbTensor(), operationShape);
#endif
}
};
* \brief Divs calculation based on the expression
* \param[in] args, Input LocalTensor & output LocalTensor
* \param[in] localVars, Temp LocalTensor
* \param[in] tail, Length of calculation data
* \return Divs expression
*/
template <typename T, typename U, int scalarValue>
struct Evaluator<OpAssign<T, OpDivs<scalarValue, U>>> {
using Type = void;
template <typename Context>
__aicore__ inline auto operator()(const OpAssign<T, OpDivs<scalarValue, U>>& op, Context& context) const
{
using Dtype = Dtype_t<T>;
OperationShape operationShape = GetShape<Operation::Unary>(context.argsTensors);
return Atvoss::Tile::DivsAssign<OperationShape, scalarValue, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetData(), context).GetUbTensor(), operationShape);
}
};
* \brief Exp calculation based on the expression
* \param[in] args, Input LocalTensor & output LocalTensor
* \param[in] localVars, Temp LocalTensor
* \param[in] tail, Length of calculation data
* \return Exp expression
*/
template <typename T, typename U>
struct Evaluator<OpAssign<T, OpExp<U>>> {
using Type = void;
template <typename Context>
__aicore__ inline auto operator()(const OpAssign<T, OpExp<U>>& op, Context& context) const
{
using Dtype = Dtype_t<T>;
OperationShape operationShape = GetShape<Operation::Unary>(context.argsTensors);
return Atvoss::Tile::ExpAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetData(), context).GetUbTensor(), operationShape);
}
};
* \brief Abs calculation based on the expression
* \param[in] args, Input LocalTensor & output LocalTensor
* \param[in] localVars, Temp LocalTensor
* \param[in] tail, Length of calculation data
* \return Abs expression
*/
template <typename T, typename U>
struct Evaluator<OpAssign<T, OpAbs<U>>> {
using Type = void;
template <typename Context>
__aicore__ inline auto operator()(const OpAssign<T, OpAbs<U>>& op, Context& context) const
{
using Dtype = Dtype_t<T>;
OperationShape operationShape = GetShape<Operation::Unary>(context.argsTensors);
return Atvoss::Tile::AbsAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetData(), context).GetUbTensor(), operationShape);
}
};
* \brief Sqrt calculation based on the expression
* \param[in] args, Input LocalTensor & output LocalTensor
* \param[in] localVars, Temp LocalTensor
* \param[in] tail, Length of calculation data
* \return Sqrt expression
*/
template <typename T, typename U>
struct Evaluator<OpAssign<T, OpSqrt<U>>> {
using Type = void;
template <typename Context>
__aicore__ inline auto operator()(const OpAssign<T, OpSqrt<U>>& op, Context& context) const
{
using Dtype = Dtype_t<T>;
OperationShape operationShape = GetShape<Operation::Unary>(context.argsTensors);
return Atvoss::Tile::SqrtAssign<OperationShape, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetData(), context).GetUbTensor(), operationShape);
}
};
* \brief Sqrt calculation based on the expression
* \param[in] args, Input LocalTensor & output LocalTensor
* \param[in] localVars, Temp LocalTensor
* \param[in] tail, Length of calculation data
* \return Sqrt expression
*/
template <typename T, typename U, int scalarValue>
struct Evaluator<OpAssign<T, OpPower<scalarValue, U>>> {
using Type = void;
template <typename Context>
__aicore__ inline auto operator()(const OpAssign<T, OpPower<scalarValue, U>>& op, Context& context) const
{
using Dtype = Dtype_t<T>;
OperationShape operationShape = GetShape<Operation::Unary>(context.argsTensors);
return Atvoss::Tile::PowerAssign<OperationShape, scalarValue, Dtype>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetData(), context).GetUbTensor(), operationShape);
}
};
* \brief Cast calculation based on the expression
* \param[in] args, Input LocalTensor & output LocalTensor
* \param[in] localVars, Temp LocalTensor
* \param[in] tail, Length of calculation data
* \return Cast expression
*/
template <typename T, typename U, typename R, CastMode castMode>
struct Evaluator<OpAssign<T, OpCast<castMode, R, U>>> {
using Type = void;
template <typename Context>
__aicore__ inline auto operator()(const OpAssign<T, OpCast<castMode, R, U>>& op, Context& context) const
{
using DstType = Dtype_t<T>;
using SrcType = Dtype_t<U>;
OperationShape operationShape = GetShape<Operation::Unary>(context.argsTensors);
return Atvoss::Tile::CastAssign<OperationShape, castMode, DstType, SrcType>(
Evaluator<T>{}(op.GetLhs(), context).GetUbTensor(),
Evaluator<U>{}(op.GetRhs().GetData(), context).GetUbTensor(), operationShape);
}
};
}
#endif