ec2ff2f0创建于 2025年8月29日历史提交
__all__ = ["create_schedule_context_holder"]

from ._schedule_context import _create_schedule_context_holder, ScheduleContextHolder


def create_schedule_context_holder(
        schedule_mode: int,
        session_num: int,
        micro_batch_num: int,
        micro_batch_size: int,
        selected_expert_num: int,
        expert_num: int,
        attn_to_ffn_token_size: int,
        ffn_to_attn_token_size: int,
        ffn_window: int = 0,
        ffn_window_size: int = 0,
        attention_window: int = 0,
        attention_window_size: int = 0,
) -> ScheduleContextHolder:
    """
    A holder class for managing scheduling context in distributed inference.

    Args:
        schedule_mode: Scheduling mode identifier, 0:schedule ffn, 1:shcedule attention
        session_num: Number of sessions
        micro_batch_num: Number of micro batches
        micro_batch_size: micro batch size
        selected_expert_num: selected experts num
        expert_num: Total number of experts
        attn_to_ffn_token_size: Token size from attention to FFN
        ffn_to_attn_token_size: Token size from FFN to attention
        ffn_window: FFN window addr (default: 0), must assign value when schedule_mode=0
        ffn_window_size: FFN window size (default: 0), must assign value when schedule_mode=0
        attention_window: Attention window addr (default: 0), must assign value when schedule_mode=1
        attention_window_size: Attention window size (default: 0), must assign value when schedule_mode=1

    Returns:
        ScheduleContextHolder: the schedule context holder
    """
    return _create_schedule_context_holder(
        schedule_mode,
        session_num,
        micro_batch_num,
        micro_batch_size,
        selected_expert_num,
        expert_num,
        attn_to_ffn_token_size,
        ffn_to_attn_token_size,
        ffn_window,
        ffn_window_size,
        attention_window,
        attention_window_size,
    )