#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
# ----------------------------------------------------------------------------
# Copyright (c) 2026 Huawei Technologies Co., Ltd.
# This program is free software, you can redistribute it and/or modify it under the terms and conditions of
# CANN Open Software License Agreement Version 2.0 (the "License").
# Please refer to the License for details. You may not use this file except in compliance with the License.
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
# See LICENSE in the root of the software repository for the full text of the License.
# ----------------------------------------------------------------------------
__input__ = {
"kernel": {
"matmul_reduce_scatter_v2": "matmul_reduce_scatter_v2_inputs"
}
}
import numpy as np
def matmul_reduce_scatter_v2_inputs(
x1,
x2,
bias=None,
x1_scale=None,
x2_scale=None,
quant_scale=None,
group="",
reduce_op="sum",
is_trans_a=False,
is_trans_b=False,
comm_turn=0,
rank_size=0,
block_size=0,
group_size=0,
is_amax_out=False,
y_dtype=0,
comm_mode="aicpu",
**kwargs
):
if is_trans_b:
x2 = x2.transpose()
if x2_scale is not None:
per_block_flag = kwargs.get('per_block_flag', False)
if per_block_flag:
x2_scale = x2_scale.transpose()
if rank_size <= 0:
rank_size = kwargs.get('world_size', 1)
if x1.shape[0] % rank_size != 0:
raise ValueError(f"x1.shape[0] ({x1.shape[0]}) must be divisible by rank_size ({rank_size})")
return x1, x2, bias, x1_scale, x2_scale, quant_scale, group, reduce_op, is_trans_a, is_trans_b, comm_turn, rank_size, block_size, group_size, is_amax_out, y_dtype, comm_mode