// Copyright (c) 2025 Huawei Technologies Co., Ltd.
// This program is free software, you can redistribute it and/or modify it under the terms and conditions of
// CANN Open Software License Agreement Version 2.0 (the "License").
// Please refer to the License for details. You may not use this file except in compliance with the License.
// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
// See LICENSE in the root of the software repository for the full text of the License.
// RUN: ascir-translate -mlir-to-ascendc %s | FileCheck %s
// CHECK-LABEL:void emit_softmax(AscendC::LocalTensor<half> v1, AscendC::LocalTensor<half> v2, AscendC::LocalTensor<half> v3, AscendC::LocalTensor<half> v4, SoftMaxTiling v5, AscendC::SoftMaxShapeInfo v6) {
// CHECK-NEXT: AscendC::SimpleSoftMax<half, 0, 0, 0>(v1, v2, v3, v4, v5, v6);
// CHECK-NEXT: AscendC::SoftMax<half, 1, 1, 0>(v1, v2, v3, v4, v5, v6);
// CHECK-NEXT: return;
// CHECK-NEXT: }
func.func @emit_softmax(%arg0: !ascendc.local_tensor<*xf16>, %arg1: !ascendc.local_tensor<*xf16>, %arg2: !ascendc.local_tensor<*xf16>, %arg3: !ascendc.local_tensor<*xf16>, %arg4: !ascendc.softmax_tiling, %arg5: !ascendc.softmax_shape_info) {
ascendc.simple_softmax %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 {operandSegmentSizes = array<i32: 1, 1, 1, 1, 0, 1, 1>} : !ascendc.local_tensor<*xf16>, !ascendc.local_tensor<*xf16>, !ascendc.local_tensor<*xf16>, !ascendc.local_tensor<*xf16>, !ascendc.softmax_tiling, !ascendc.softmax_shape_info
ascendc.softmax %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 {basicBlock, operandSegmentSizes = array<i32: 1, 1, 1, 1, 0, 1, 1>, reuseSource} : !ascendc.local_tensor<*xf16>, !ascendc.local_tensor<*xf16>, !ascendc.local_tensor<*xf16>, !ascendc.local_tensor<*xf16>, !ascendc.softmax_tiling, !ascendc.softmax_shape_info
return
}
// CHECK-LABEL:void emit_unary_math_ops(AscendC::LocalTensor<float> v1, AscendC::LocalTensor<float> v2, AscendC::LocalTensor<float> v3, int32_t v4, bool v5) {
// CHECK-NEXT: AscendC::Acosh<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Acos<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Asinh<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Asin<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Atanh<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Atan<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Ceil<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Cosh<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Cos<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Digamma<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Erfc<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Erf<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Floor<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Frac<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Lgamma<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Log<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Round<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Sign<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Sinh<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Sin<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Tanh<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Tan<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Trunc<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: return;
// CHECK-NEXT: }
func.func @emit_unary_math_ops(%dst: !ascendc.local_tensor<1024xf32>, %src: !ascendc.local_tensor<1024xf32>, %sharedTmpBuffer : !ascendc.local_tensor<1024xf32>, %calCount : i32, %isReuseSource : i1) {
ascendc.acosh %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.acos %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.asinh %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.asin %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.atanh %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.atan %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.ceil %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.cosh %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.cos %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.digamma %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.erfc %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.erf %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.floor %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.frac %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.lgamma %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.log %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.round %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.sign %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.sinh %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.sin %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.tanh %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.tan %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.trunc %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
return
}
// CHECK-LABEL:void emit_unary_math_ops_test_params(AscendC::LocalTensor<float> v1, AscendC::LocalTensor<float> v2, AscendC::LocalTensor<float> v3, int32_t v4, bool v5) {
// CHECK-NEXT: AscendC::Acosh<float, v5>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Acosh<float, v5>(v1, v2);
// CHECK-NEXT: AscendC::Acosh<float, v5>(v1, v2, v3);
// CHECK-NEXT: AscendC::Acosh<float, v5>(v1, v2, v4);
// CHECK-NEXT: return;
// CHECK-NEXT: }
func.func @emit_unary_math_ops_test_params(%dst: !ascendc.local_tensor<1024xf32>, %src: !ascendc.local_tensor<1024xf32>, %sharedTmpBuffer : !ascendc.local_tensor<1024xf32>, %calCount : i32, %isReuseSource : i1) {
ascendc.acosh %dst, %src, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.acosh %dst, %src, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 0, 0, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i1
ascendc.acosh %dst, %src, %sharedTmpBuffer, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 0, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i1
ascendc.acosh %dst, %src, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 0, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
return
}
// CHECK-LABEL:void emit_binary_math_ops(AscendC::LocalTensor<float> v1, AscendC::LocalTensor<float> v2, AscendC::LocalTensor<float> v3, AscendC::LocalTensor<float> v4, int32_t v5, bool v6) {
// CHECK-NEXT: AscendC::Power<float, v6>(v1, v2, v3, v4, v5);
// CHECK-NEXT: AscendC::Xor<float, v6>(v1, v2, v3, v4, v5);
// CHECK-NEXT: return;
// CHECK-NEXT: }
func.func @emit_binary_math_ops(%dst: !ascendc.local_tensor<1024xf32>, %src0: !ascendc.local_tensor<1024xf32>, %src1: !ascendc.local_tensor<1024xf32>, %sharedTmpBuffer : !ascendc.local_tensor<1024xf32>, %calCount : i32, %isReuseSource : i1) {
ascendc.power %dst, %src0, %src1, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.xor %dst, %src0, %src1, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
return
}
// CHECK-LABEL:void emit_binary_math_ops_test_params(AscendC::LocalTensor<float> v1, AscendC::LocalTensor<float> v2, AscendC::LocalTensor<float> v3, AscendC::LocalTensor<float> v4, int32_t v5, bool v6) {
// CHECK-NEXT: AscendC::Power<float, v6>(v1, v2, v3);
// CHECK-NEXT: AscendC::Power<float, v6>(v1, v2, v3, v5);
// CHECK-NEXT: AscendC::Power<float, v6>(v1, v2, v3, v4);
// CHECK-NEXT: AscendC::Power<float, v6>(v1, v2, v3, v4, v5);
// CHECK-NEXT: return;
// CHECK-NEXT: }
func.func @emit_binary_math_ops_test_params(%dst: !ascendc.local_tensor<1024xf32>, %src0: !ascendc.local_tensor<1024xf32>, %src1: !ascendc.local_tensor<1024xf32>, %sharedTmpBuffer : !ascendc.local_tensor<1024xf32>, %calCount : i32, %isReuseSource : i1) {
ascendc.power %dst, %src0, %src1, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i1
ascendc.power %dst, %src0, %src1, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 0, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.power %dst, %src0, %src1, %sharedTmpBuffer, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 0, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i1
ascendc.power %dst, %src0, %src1, %sharedTmpBuffer, %calCount, %isReuseSource {operandSegmentSizes = array<i32: 1, 1, 1, 1, 1, 1>} : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i1
return
}
// CHECK-LABEL:void emit_clamp(AscendC::LocalTensor<float> v1, AscendC::LocalTensor<float> v2, AscendC::LocalTensor<float> v3, int32_t v4, int32_t v5, bool v6) {
// CHECK-NEXT: AscendC::ClampMax<float, v6>(v1, v2, v3, v4, v5);
// CHECK-NEXT: AscendC::ClampMax<float, v6>(v1, v2, v4, v5);
// CHECK-NEXT: AscendC::ClampMin<float, v6>(v1, v2, v3, v4, v5);
// CHECK-NEXT: AscendC::ClampMin<float, v6>(v1, v2, v4, v5);
// CHECK-NEXT: return;
// CHECK-NEXT: }
func.func @emit_clamp(%dst: !ascendc.local_tensor<1024xf32>, %src: !ascendc.local_tensor<1024xf32>, %sharedTmpBuffer : !ascendc.local_tensor<1024xf32>, %scalar : i32, %calCount : i32, %isReuseSource : i1) {
ascendc.clamp_max %dst, %src, %sharedTmpBuffer, %scalar, %calCount, %isReuseSource : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i32, i1
ascendc.clamp_max %dst, %src, %scalar, %calCount, %isReuseSource : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i32, i1
ascendc.clamp_min %dst, %src, %sharedTmpBuffer, %scalar, %calCount, %isReuseSource : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i32, i1
ascendc.clamp_min %dst, %src, %scalar, %calCount, %isReuseSource : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i32, i1
return
}
// CHECK-LABEL:void emit_cumsum(AscendC::LocalTensor<float> v1, AscendC::LocalTensor<float> v2, AscendC::LocalTensor<float> v3, AscendC::LocalTensor<float> v4, int32_t v5, int32_t v6, int32_t v7) {
// CHECK-NEXT: AscendC::CumSum(v1, v2, v3, v4, AscendC::CumSumInfo(v5, v6, v7));
// CHECK-NEXT: AscendC::CumSum(v1, v2, v3, AscendC::CumSumInfo(v5, v6, v7));
// CHECK-NEXT: return;
// CHECK-NEXT: }
func.func @emit_cumsum(%dst: !ascendc.local_tensor<1024xf32>, %lastRaw: !ascendc.local_tensor<1024xf32>, %src: !ascendc.local_tensor<1024xf32>, %sharedTmpBuffer : !ascendc.local_tensor<1024xf32>, %lastAxis : i32, %reuseSource : i32, %outputLastRow : i32) {
ascendc.cumsum %dst, %lastRaw, %src, %sharedTmpBuffer, %lastAxis, %reuseSource, %outputLastRow : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i32, i32
ascendc.cumsum %dst, %lastRaw, %src, %lastAxis, %reuseSource, %outputLastRow : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i32, i32
return
}
// CHECK-LABEL:void emit_axpy(AscendC::LocalTensor<float> v1, AscendC::LocalTensor<float> v2, int32_t v3, AscendC::LocalTensor<float> v4, int32_t v5, bool v6) {
// CHECK-NEXT: AscendC::Axpy<float, float, v6>(v1, v2, v3, v4, v5);
// CHECK-NEXT: AscendC::Axpy<float, float, v6>(v1, v2, v3, v5);
// CHECK-NEXT: return;
// CHECK-NEXT: }
func.func @emit_axpy(%dst: !ascendc.local_tensor<1024xf32>, %src: !ascendc.local_tensor<1024xf32>, %scalar : i32, %sharedTmpBuffer : !ascendc.local_tensor<1024xf32>, %calCount : i32, %isReuseSource : i1) {
ascendc.axpy %dst, %src, %scalar, %sharedTmpBuffer, %calCount, %isReuseSource : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, !ascendc.local_tensor<1024xf32>, i32, i1
ascendc.axpy %dst, %src, %scalar, %calCount, %isReuseSource : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, i32, i1
return
}
// CHECK-LABEL:void emit_exp(AscendC::LocalTensor<float> v1, AscendC::LocalTensor<float> v2, int32_t v3, uint8_t v4, AscendC::LocalTensor<float> v5, bool v6) {
// CHECK-NEXT: AscendC::Exp<float, v4, v6>(v1, v2, v5, v3);
// CHECK-NEXT: AscendC::Exp<float, v4, v6>(v1, v2, v3);
// CHECK-NEXT: return;
// CHECK-NEXT: }
func.func @emit_exp(%dst: !ascendc.local_tensor<1024xf32>, %src: !ascendc.local_tensor<1024xf32>, %calCount : i32, %taylorExpandLevel: ui8, %sharedTmpBuffer : !ascendc.local_tensor<1024xf32>, %isReuseSource : i1) {
ascendc.exp %dst, %src, %calCount, %taylorExpandLevel, %sharedTmpBuffer, %isReuseSource : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, ui8, !ascendc.local_tensor<1024xf32>, i1
ascendc.exp %dst, %src, %calCount, %taylorExpandLevel, %isReuseSource : !ascendc.local_tensor<1024xf32>, !ascendc.local_tensor<1024xf32>, i32, ui8, i1
return
}
// CHECK-LABEL:void emit_cube_tiling_operator_equal(TCubeTiling* v1, TCubeTiling* v2) {
// CHECK-NEXT: constexpr uint32_t c0_idx = 0;
// CHECK-NEXT: TCubeTiling v3 = v2[c0_idx];
// CHECK-NEXT: constexpr uint32_t c0_idx = 0;
// CHECK-NEXT: v1[c0_idx] = v3;
// CHECK-NEXT: return;
// CHECK-NEXT:}
func.func @emit_cube_tiling_operator_equal(%arg0: memref<?x!ascendc.cube_tiling>, %arg1: memref<?x!ascendc.cube_tiling>) {
%c0 = arith.constant 0 : index
%0 = memref.load %arg1[%c0] : memref<?x!ascendc.cube_tiling>
%c0_0 = arith.constant 0 : index
memref.store %0, %arg0[%c0_0] : memref<?x!ascendc.cube_tiling>
return
}
// CHECK-LABEL: void emit_concat(__gm__ uint64_t* v1) {
// CHECK-NEXT: set_ffts_base_addr(*v1);
// CHECK-NEXT: constexpr uint32_t v2 = 128;
// CHECK-NEXT: constexpr uint32_t v3 = 0;
// CHECK-NEXT: constexpr uint32_t v4 = 256;
// CHECK-NEXT: constexpr int32_t c4_i32 = 4;
// CHECK-NEXT: AscendC::LocalTensor<half> v5 = AscendC::LocalTensor<half>(AscendC::TPosition::VECOUT, v3, v2);
// CHECK-NEXT: AscendC::LocalTensor<half> v6 = AscendC::LocalTensor<half>(AscendC::TPosition::VECIN, v3, v2);
// CHECK-NEXT: AscendC::LocalTensor<half> v7 = AscendC::LocalTensor<half>(AscendC::TPosition::VECIN, v3, v4);
// CHECK-NEXT: AscendC::Concat(v5, v6, v7, c4_i32);
// CHECK-NEXT: return;
// CHECK-NEXT: }
func.func @emit_concat(%arg0: memref<?xui64, 22>){
ascendc.set_ffts_base_addr %arg0 : memref<?xui64, 22>
%0 = "emitc.constant"() <{value = 128 : ui32}> : () -> ui32
%1 = "emitc.constant"() <{value = 0 : ui32}> : () -> ui32
%2 = "emitc.constant"() <{value = 256 : ui32}> : () -> ui32
%c4_i32 = arith.constant 4 : i32
%3 = ascendc.local_tensor_v2 vecout, %1, %0 : !ascendc.local_tensor<*xf16>
%4 = ascendc.local_tensor_v2 vecin, %1, %0 : !ascendc.local_tensor<*xf16>
%5 = ascendc.local_tensor_v2 vecin, %1, %2 : !ascendc.local_tensor<*xf16>
ascendc.concat %3, %4, %5, %c4_i32 : !ascendc.local_tensor<*xf16>, !ascendc.local_tensor<*xf16>, !ascendc.local_tensor<*xf16>, i32
return
}
// CHECK-LABEL: void emit_extract(__gm__ uint64_t* v1) {
// CHECK-NEXT: set_ffts_base_addr(*v1);
// CHECK-NEXT: constexpr uint32_t v2 = 128;
// CHECK-NEXT: constexpr uint32_t v3 = 0;
// CHECK-NEXT: constexpr uint32_t v4 = 256;
// CHECK-NEXT: constexpr int32_t c8_i32 = 8;
// CHECK-NEXT: AscendC::LocalTensor<half> v5 = AscendC::LocalTensor<half>(AscendC::TPosition::VECOUT, v3, v2);
// CHECK-NEXT: AscendC::LocalTensor<uint32_t> v6 = AscendC::LocalTensor<uint32_t>(AscendC::TPosition::VECOUT, v3, v2);
// CHECK-NEXT: AscendC::LocalTensor<half> v7 = AscendC::LocalTensor<half>(AscendC::TPosition::VECIN, v3, v4);
// CHECK-NEXT: AscendC::Extract(v5, v6, v7, c8_i32);
// CHECK-NEXT: return;
// CHECK-NEXT: }
func.func @emit_extract(%arg0: memref<?xui64, 22>){
ascendc.set_ffts_base_addr %arg0 : memref<?xui64, 22>
%0 = "emitc.constant"() <{value = 128 : ui32}> : () -> ui32
%1 = "emitc.constant"() <{value = 0 : ui32}> : () -> ui32
%2 = "emitc.constant"() <{value = 256 : ui32}> : () -> ui32
%c8_i32 = arith.constant 8 : i32
%3 = ascendc.local_tensor_v2 vecout, %1, %0 : !ascendc.local_tensor<*xf16>
%4 = ascendc.local_tensor_v2 vecout, %1, %0 : !ascendc.local_tensor<*xui32>
%5 = ascendc.local_tensor_v2 vecin, %1, %2 : !ascendc.local_tensor<*xf16>
ascendc.extract %3, %4, %5, %c8_i32 : !ascendc.local_tensor<*xf16>, !ascendc.local_tensor<*xui32>, !ascendc.local_tensor<*xf16>, i32
return
}