ce50bcc8创建于 2025年8月22日历史提交
#!/usr/bin/env python3
# coding: utf-8
# Copyright 2023 Huawei Technologies Co., Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
# ===========================================================================


class PrefillConfig:
    """
    Prefill配置类

    Attributes:
        maxSeqLen (int): 最大序列长度
        maxInputTokenLen (int): 最大输入token长度
        dp (int): 数据并行度
        cp (int): 上下文并行度
        tp (int): 张量并行度
        sp (int): 序列并行度
        pp (int): 流水线并行度
        moe_ep (int): MoE专家并行度
        moe_tp (int): MoE张量并行度
        ep_level (int): 专家并行级别
        MTP (bool): MTP配置状态(true或false)
        maxPrefillTokens (int): 最大预填充token数
        enable_init_routing_cutoff (bool): 是否启用初始路由截断(a910b)
        topk_scaling_factor (float): topk缩放因子(a910b)
    """

    def __init__(self, maxSeqLen, maxInputTokenLen, dp, cp, tp, sp, pp, moe_ep,
                 moe_tp, ep_level, MTP, maxPrefillTokens,
                 enable_init_routing_cutoff=None, topk_scaling_factor=None):
        # type: (int, int, int, int, int, int, int, int, int, int, bool, int, bool, float) -> None
        self.maxSeqLen = maxSeqLen              # type: int
        self.maxInputTokenLen = maxInputTokenLen  # type: int
        self.dp = dp                            # type: int
        self.cp = cp                            # type: int
        self.tp = tp                            # type: int
        self.sp = sp                            # type: int
        self.pp = pp                            # type: int
        self.moe_ep = moe_ep                    # type: int
        self.moe_tp = moe_tp                    # type: int
        self.ep_level = ep_level                # type: int
        self.MTP = MTP                          # type: bool
        self.maxPrefillTokens = maxPrefillTokens  # type: int
        self.enable_init_routing_cutoff = enable_init_routing_cutoff  # type: bool
        self.topk_scaling_factor = topk_scaling_factor  # type: float


class DecodeConfig:
    """
    Decode配置类

    Attributes:
        maxSeqLen (int): 最大序列长度
        maxInputTokenLen (int): 最大输入token长度
        dp (dict): 数据并行度配置,格式为{节点数: 值}
        tp (int): 张量并行度
        sp (int): 序列并行度
        cp (int): 上下文并行度
        pp (int): 流水线并行度
        moe_ep (dict): MoE专家并行度配置,格式为{节点数: 值}
        moe_tp (int): MoE张量并行度
        ep_level (int): 专家并行级别
        MTP (bool): MTP配置状态("开启"或"关闭")
        maxPrefillTokens (int): 最大预填充token数
        maxIterTimes (int): 最大迭代次数
    """

    def __init__(self, maxSeqLen, maxInputTokenLen, dp, tp, sp, cp, pp, moe_ep,
                 moe_tp, ep_level, MTP, maxPrefillTokens, maxIterTimes):
        # type: (int, int, dict, int, int, int, int, dict, int, int, bool, int, int) -> None
        self.maxSeqLen = maxSeqLen              # type: int
        self.maxInputTokenLen = maxInputTokenLen  # type: int
        self.dp = dp                            # type: dict
        self.tp = tp                            # type: int
        self.sp = sp                            # type: int
        self.cp = cp                            # type: int
        self.pp = pp                            # type: int
        self.moe_ep = moe_ep                    # type: dict
        self.moe_tp = moe_tp                    # type: int
        self.ep_level = ep_level                # type: int
        self.MTP = MTP                          # type: bool
        self.maxPrefillTokens = maxPrefillTokens  # type: int
        self.maxIterTimes = maxIterTimes        # type: int


MAX_SEQ_LEN_DICT = {
    18000: "16k",
    68000: "64k",
    134000: "128k",
}

# a910_93 经典配置
STATIC_CONFIG_DICT_A910_93 = {
    "16k": {
        "prefill": PrefillConfig(
            maxSeqLen=18000,           # 最大序列长度
            maxInputTokenLen=18000,    # 最大输入token长度
            dp=2,                      # 数据并行度
            cp=1,                      # 上下文并行度
            tp=8,                      # 张量并行度
            sp=1,                      # 序列并行度
            pp=1,                      # 流水线并行度
            moe_ep=16,                 # MoE专家并行度
            moe_tp=1,                  # MoE张量并行度
            ep_level=2,                # 专家并行级别
            MTP=True,                # MTP配置状态
            maxPrefillTokens=18000     # 最大预填充token数
        ),
        "decode": DecodeConfig(
            maxSeqLen=18000,           # 最大序列长度
            maxInputTokenLen=18000,    # 最大输入token长度
            dp={2: 32, 4: 64, 8: 128},        # 数据并行度配置 {节点数: 值}
            tp=1,                      # 张量并行度
            sp=1,                      # 序列并行度
            cp=1,                      # 上下文并行度
            pp=1,                      # 流水线并行度
            moe_ep={2: 32, 4: 64, 8: 128},    # MoE专家并行度配置 {节点数: 值}
            moe_tp=1,                  # MoE张量并行度
            ep_level=2,                # 专家并行级别
            MTP=True,                # MTP配置状态
            maxPrefillTokens=18000,    # 最大预填充token数
            maxIterTimes=18000         # 最大迭代次数
        )
    },
    "64k": {
        "prefill": PrefillConfig(
            maxSeqLen=68000,
            maxInputTokenLen=68000,
            dp=1,
            cp=2,
            tp=8,
            sp=8,
            pp=1,
            moe_ep=16,
            moe_tp=1,
            ep_level=2,
            MTP=True,
            maxPrefillTokens=68000
        ),
        "decode": DecodeConfig(
            maxSeqLen=68000,
            maxInputTokenLen=68000,
            dp={2: 32, 4: 64, 8: 128},
            tp=1,
            sp=1,
            cp=1,
            pp=1,
            moe_ep={2: 32, 4: 64, 8: 128},
            moe_tp=1,
            ep_level=2,
            MTP=True,
            maxPrefillTokens=68000,
            maxIterTimes=68000
        )
    },
    "128k": {
        "prefill": PrefillConfig(
            maxSeqLen=134000,
            maxInputTokenLen=134000,
            dp=1,
            cp=2,
            tp=8,
            sp=8,
            pp=1,
            moe_ep=16,
            moe_tp=1,
            ep_level=2,
            MTP=False,
            maxPrefillTokens=134000
        ),
        "decode": DecodeConfig(
            maxSeqLen=134000,
            maxInputTokenLen=134000,
            dp={2: 32, 4: 64, 8: 128},
            tp=1,
            sp=1,
            cp=1,
            pp=1,
            moe_ep={2: 32, 4: 64, 8: 128},
            moe_tp=1,
            ep_level=2,
            MTP=False,
            maxPrefillTokens=134000,
            maxIterTimes=134000
        )
    }
}

STATIC_CONFIG_DICT_A910B = {
    "16k": {
        "prefill": PrefillConfig(
            maxSeqLen=18000,           # 最大序列长度
            maxInputTokenLen=18000,    # 最大输入token长度
            dp=2,                      # 数据并行度
            cp=1,                      # 上下文并行度
            tp=8,                      # 张量并行度
            sp=1,                      # 序列并行度
            pp=1,                      # 流水线并行度
            moe_ep=4,                 # MoE专家并行度
            moe_tp=4,                  # MoE张量并行度
            ep_level=1,                # 专家并行级别
            MTP=True,                # MTP配置状态
            enable_init_routing_cutoff=False, # 初始路由剪裁使能
            maxPrefillTokens=18000     # 最大预填充token数
        ),
        "decode": DecodeConfig(
            maxSeqLen=18000,           # 最大序列长度
            maxInputTokenLen=18000,    # 最大输入token长度
            dp={4: 32, 8: 64},        # 数据并行度配置 {节点数: 值}
            tp=1,                      # 张量并行度
            sp=1,                      # 序列并行度
            cp=1,                      # 上下文并行度
            pp=1,                      # 流水线并行度
            moe_ep={4: 32, 8: 64},    # MoE专家并行度配置 {节点数: 值}
            moe_tp=1,                  # MoE张量并行度
            ep_level=2,                # 专家并行级别
            MTP=True,                # MTP配置状态
            maxPrefillTokens=18000,    # 最大预填充token数
            maxIterTimes=18000         # 最大迭代次数
        )
    },
    "64k": {
        "prefill": PrefillConfig(
            maxSeqLen=68000,
            maxInputTokenLen=68000,
            dp=1,
            cp=2,
            tp=8,
            sp=8,
            pp=1,
            moe_ep=16,
            moe_tp=1,
            ep_level=1,
            MTP=True,
            enable_init_routing_cutoff=True,
            topk_scaling_factor=0.25,  # topk缩放因子
            maxPrefillTokens=68000
        ),
        "decode": DecodeConfig(
            maxSeqLen=68000,
            maxInputTokenLen=68000,
            dp={4: 32, 8: 64},
            tp=1,
            sp=1,
            cp=1,
            pp=1,
            moe_ep={4: 32, 8: 64},
            moe_tp=1,
            ep_level=2,
            MTP=True,
            maxPrefillTokens=68000,
            maxIterTimes=68000
        )
    },
    "128k": {
        "prefill": PrefillConfig(
            maxSeqLen=134000,
            maxInputTokenLen=134000,
            dp=1,
            cp=2,
            tp=8,
            sp=8,
            pp=1,
            moe_ep=16,
            moe_tp=1,
            ep_level=1,
            MTP=False,
            enable_init_routing_cutoff=True,
            topk_scaling_factor=0.25,  # topk缩放因子
            maxPrefillTokens=134000
        ),
        "decode": DecodeConfig(
            maxSeqLen=134000,
            maxInputTokenLen=134000,
            dp={4: 32, 8: 64},
            tp=1,
            sp=1,
            cp=1,
            pp=1,
            moe_ep={4: 32, 8: 64},
            moe_tp=1,
            ep_level=2,
            MTP=False,
            maxPrefillTokens=134000,
            maxIterTimes=134000
        )
    }
}