/**
 * Copyright (c) 2025 Huawei Technologies Co., Ltd.
 * This program is free software, you can redistribute it and/or modify it under the terms and conditions of 
 * CANN Open Software License Agreement Version 2.0 (the "License").
 * Please refer to the License for details. You may not use this file except in compliance with the License.
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, 
 * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
 * See LICENSE in the root of the software repository for the full text of the License.
 */

syntax = "proto3";

package domi;

message ModelTaskDef {
    string version = 1;

    map<string, string> attr = 9; // Extended field
    repeated TaskDef task = 10;

    uint64 memory_size = 11;
    uint32 stream_num = 12;
    uint32 event_num = 13;
    uint64 weight_size = 14;

    repeated bytes op = 15; // input/output opdef in bytes

    uint64 base_addr = 16;    // base addr
    uint64 weight_addr = 17;  // weight addr
    uint32 batch_num = 18;
}

message TaskDef {
    uint32 id = 1;
    uint32 type = 2;

    uint32 stream_id = 10;
    uint32 event_id = 11;
    uint32 notify_id = 12;
    uint32 sqe_num = 13;

    KernelDef kernel = 20;
    KernelExDef kernel_ex = 21;
    KernelHcclDef kernel_hccl = 25;
    EventExDef event_ex = 26;
    LogTimeStampDef log_timestamp = 28;

    uint32 label_id = 30;

    MemcpyAsyncDef memcpy_async = 31;
    StreamSwitchDef stream_switch = 32;
    StreamActiveDef stream_active = 33;
    bytes private_def = 34;
    uint64 ops_kernel_store_ptr = 35;      // adjustments to other fields in the future
    StreamSwitchNDef stream_switch_n = 36;

    LabelSetDef label_set = 37;
    LabelGotoExDef label_goto_ex = 38;
    LabelSwitchByIndexDef label_switch_by_index = 39;
    KernelDefWithHandle kernel_with_handle = 40;
    FftsTaskDef ffts_task = 41;
    FftsPlusTaskDef ffts_plus_task = 42;
    DSATaskDef dsa_task = 43;
    CmoTaskDef cmo_task = 44;
    CmoBarrierTaskDef cmo_barrier_task = 45;
    NpuGetFloatStatusDef npu_get_float_status = 46;
    NpuClearFloatStatusDef npu_clear_float_status = 47;
    DvppTaskDef dvpp_task = 48;
    NpuGetFloatDebugStatusDef npu_get_float_debug_status = 49;
    NpuClearFloatDebugStatusDef npu_clear_float_debug_status = 50;
    CmoAddrTaskDef cmo_addr_task = 51;
    UpdatePcTaskDef update_pc_task = 52;
    FusionTaskDef fusion_task = 53;
}

message KernelDef {
    KernelContext context = 1;
    bool is_block_task_prefetch = 2;
    string stub_func = 10;
    uint32 block_dim = 11;
    uint32 args_size = 12;
    bytes args = 13;
    bytes sm_desc = 14;
    bytes flowtable = 15;
    string so_name = 16;
    string kernel_name = 17;
    bytes kernel_ext_info = 18;
    uint32 kernel_ext_info_size = 19;
    repeated ArgsInfo args_info = 20;
    uint32 schedule_mode = 21;
    uint32 block_dim_offset = 22; // vector core offset
}

message KernelDefWithHandle {
    KernelContext context = 1;
    bool is_block_task_prefetch = 2;
    uint64 handle = 10;
    string dev_func = 11;
    uint32 block_dim = 12;
    uint32 args_size = 13;
    bytes args = 14;
    bytes sm_desc = 15;
    string original_kernel_key = 16;
    string node_info = 17;
    repeated ArgsInfo args_info = 18;
    uint32 schedule_mode = 19;
    uint32 block_dim_offset = 20; // vector core offset
}

message KernelContext {
    uint32 kernel_type = 1;
    uint32 op_id = 2;                              // OP type in CCE
    uint32 kernel_func_id = 3;
    uint32 op_index = 4;                           // TE/Custom operator
    bool is_flowtable = 5;                         // Identify whether args is a flowtable structure
    bytes args_offset = 6;                         // args offset information
    uint32 args_count = 7;                         // args count
    string args_format = 10;
    repeated uint32 origin_op_index = 8;
}

message ArgsInfo {
    enum ArgsType {
        INPUT = 0;
        OUTPUT = 1;
    }
    enum ArgsFormat {
        DIRECT_ADDR = 0;
        SECONDARY_ADDR = 1;
    }
    ArgsType arg_type = 1;              // 表示args内存中的输入、输出等类型
    ArgsFormat arg_format = 2;          // 一级指针 还是 二级指针
    int32 start_index = 3;              // -1代表没有实际连边
    uint32 size = 4;                    // 实际数据个数
}

message KernelExDef {
    uint32 flags = 1;

    uint32 op_index = 4;
    uint32 args_size = 12;
    bytes args = 13;
    bytes task_info = 14;                 // serialized nodeDef, funcDef, inputoutput
    uint32 task_info_size = 15;
    bytes kernel_ext_info = 16;
    uint32 kernel_ext_info_size = 17;
}


message KernelHcclDef {
    uint32 op_index = 8;
    string hccl_type = 9;
    repeated int32 input_zero_copy_flag = 10;
    repeated int32 output_zero_copy_flag = 11;
    uint32 aiv_block_dim = 12;
}


message EventExDef {
    uint32 op_index = 1;
    uint32 event_type = 2;
}

message LogTimeStampDef {
    uint64 logid = 1;
    bool notify = 2;
    uint32 flat = 3;
}

message MemcpyAsyncDef {
    uint64 dst = 1;
    uint64 dst_max = 2;
    uint64 src = 3;
    uint64 count = 4;
    uint32 kind = 5;
    uint32 op_index = 6;
    string args_format = 7;
}

message StreamSwitchDef {
    uint32 op_index = 1;
    uint32 true_stream_id = 2;
    int64 value = 3;
    uint64 value_ptr = 4;
    uint32 data_type = 5;
}

message StreamActiveDef {
    uint32 op_index = 1;
    uint32 active_stream_id = 2;
}

message StreamSwitchNDef {
    uint32 op_index = 1;
    uint32 size = 2;
    repeated int64 target_value = 3;
    repeated uint32 true_stream_id = 4;
    uint32 element_size = 5;
    uint32 data_type = 6;
}

message LabelSetDef {
    uint32 op_index = 1;
    uint32 label_id = 2;
    uint32 model_id = 3;
}

message LabelGotoExDef {
    uint32 op_index = 1;
    uint32 label_id = 2;
    uint32 model_id = 3;
}

message LabelSwitchByIndexDef {
    uint32 op_index = 1;
    uint32 label_max = 2;
}

message FftsTaskDef {
    uint32 op_index = 1;

    uint32 ffts_type = 2;   // 2: Auto Threading / 3: Manual Threading
    uint32 addr_size = 3;   // Total task addr num
    FftsDescInfoDef ffts_desc = 4;
    repeated FftsSubTaskDef sub_task = 5;
    repeated TicketCacheDef ticket_cache = 6;
}

message FftsDescInfoDef {
    uint32 tm = 1;    // thread subtask kickstart mode, 0:order, 1:disorder
    uint32 di = 2;    // discard invalidate
    uint32 dw = 3;    // discard write back
    uint32 df = 4;    // discard flush
    uint32 data_split_unit = 5;    // split source or ticket cache by 2~dataSplitUnit MB
    uint32 prefetch_ost_num = 6;
    uint32 cache_maintain_ost_num = 7;
    uint32 aic_prefetch_upper = 8;
    uint32 aic_prefetch_lower = 9;
    uint32 aiv_prefetch_upper = 10;
    uint32 aiv_prefetch_lower = 11;
}

message FftsSubTaskDef {
    uint32 sub_task_type = 1;   // 0: AIC / 1: AIV / 2: DMU / 3: NOP / 4: Prefetch

    uint32 thread_dim = 2;    // thread count
    uint32 dst_tick_cache_vld_bitmap = 3;
    uint32 src_tick_cache_vld_bitmap = 4;
    uint32 src_data_out_of_subgraph_bitmap = 5;

    repeated uint32 dst_tick_cache_id = 6;  // Max 8 output
    repeated uint32 src_tick_cache_id = 7;  // Max 8 input

    AutoThreadAicAivDef auto_thread_aic_aiv = 8;

    ManualThreadAicAivDef manual_thread_aic_aiv = 9;
    ManualThreadNopDef manual_thread_nop = 10;
}

message TicketCacheDef {
    uint32 cache_option = 1;    // 1: invalidate / 2: flush
    uint32 ticket_cache_window = 2;

    AutoThreadCacheDef auto_thread_cache = 3;

    ManualThreadCacheDef manual_thread_cache = 4;
}

message AutoThreadAicAivDef {
    repeated uint64 task_addr = 1; // input / output address (runtime device memory)
    repeated uint64 task_addr_offset = 2;
    uint32 task_param_offset = 3;

    uint32 sat_mode = 4;
    uint32 schedule_mode = 5;    // 0:normal mode, 1:batch mode, 2:sync mode, 3:reserved
    uint32 cache_prefetch_cnt = 6;    // units is 2K
    uint32 prefetch_enable_bitmap = 7;  // 8 bit bitmap
    uint32 prefetch_once_bitmap = 8;    // 8 bit bitmap

    uint32 tail_blk_dim = 9;
    uint32 non_tail_blk_dim = 10;

    string non_tail_task_func_stub = 11;
    string tail_task_func_stub = 12;

    repeated AutoThreadPrefetchDef src_prefetch = 13;
    uint32 input_output_count = 14;
}

message AutoThreadCacheDef {
    uint64 data_addr = 1;   // device mem
    uint32 data_addr_offset = 2;
    uint32 non_tail_data_len = 3;
    uint32 tail_data_len = 4;
    uint32 ticket_cache_ref_cnt = 5;
}

message AutoThreadPrefetchDef {
    uint64 data_addr = 1;  // device mem
    uint32 data_addr_offset = 2;
    uint32 non_tail_data_len = 3;
    uint32 tail_data_len = 4;
}

message ManualThreadAicAivDef {
    repeated uint64 task_addr = 1;    // input/output address(runtime device memory)
    repeated uint64 task_addr_offset = 2;
    uint32 task_param_offset = 3;

    uint32 sat_mode = 4;
    uint32 schedule_mode = 5;    // 0:normal mode; 1:batch mode; 2:sync mode; 3:reserved
    uint32 cache_prefetch_cnt = 6;  // units is 2K
    uint32 prefetch_enable_bitmap = 7;  // 8 bit bitmap 1010
    uint32 prefetch_once_bitmap = 8;    // 8 bit bitmap 1010

    uint32 prefetch_once_dmu_num = 9;   // prefetch_once_dmu_descriptor_index in ffts
    // num: thread0_prefetch_dmu_descriptor_index - prefetch_once_dmu_descriptor_index
    // offset: PrefetchOnceDmuDescIndex

    repeated uint32 thread_prefetch_dmu_idx = 10;    // max valid is thread dim
    repeated uint32 thread_blk_dim = 11;

    repeated string thread_task_func_stub = 12;

    repeated ManualThreadDmuDef prefetch_list = 13;  // dmu desc 0-64k
    repeated ManualThreadDependencyDef src_dep_tbl = 14;
    uint32 input_output_count = 15;
}

message ManualThreadNopDef {
    repeated ManualThreadDependencyDef src_dep_tbl = 1;
}

message ManualThreadCacheDef {
    repeated ManualThreadDmuDef dmu_list = 1;

    repeated uint32 slice_dmu_idx = 2;
    repeated uint32 ticket_cache_ref_cnt_tbl = 3;
}

message ManualThreadDmuDef {
    uint64 data_addr = 1;   // device mem
    uint32 num_outer = 2;
    uint32 num_inner = 3;
    uint32 stride_outer = 4;
    uint32 len_inner = 5;
    uint32 stride_inner = 6;
}

message ManualThreadDependencyDef {
    repeated uint32 dependency = 1;
}

message DSATaskDef {
    uint32 op_index = 1;
    uint32 start = 2; // start, the value is 1
    uint32 sqe_type = 3;
    uint32 distribution_type = 4;
    uint32 data_type = 5;
    uint32 alg_type = 6;
    uint32 input_vld = 7;
    uint32 input_value_addr_flag = 8;
    uint32 input1_value_or_ptr = 9;
    uint32 input2_value_or_ptr = 10;
    uint32 seed_value_or_ptr = 11;
    uint32 random_count_value_or_ptr = 12;
    DSATaskArgsDef args = 13;
}

message DSATaskArgsDef {
    uint64 output_addr = 1;
    uint64 workspace_philox_count_addr = 2;
    uint64 workspace_input_addr = 3;
    bytes seed_value_or_addr = 4;
    bytes random_count_value_or_addr = 5;
    bytes input1_value_or_addr = 6;
    bytes input2_value_or_addr = 7;
}

message FftsPlusTaskDef {
    uint32 op_index = 1;
    uint32 addr_size = 2;   // Total task addr num
    FftsPlusSqeDef ffts_plus_sqe = 3;
    repeated FftsPlusCtxDef ffts_plus_ctx = 4;  // include total context
    repeated AdditionalDataDef additional_data = 5;
}

message AdditionalDataDef {
    uint32 data_type = 1;   // ModeInArgsFirstField
    repeated uint32 context_id = 2;
}

message FftsPlusSqeDef {
    StarsSqeHeaderDef sqe_header = 1;

    uint32 wrr_ratio = 2;
    uint32 sqe_index = 3;

    uint32 total_context_num = 4;
    uint32 ready_context_num = 5;
    uint32 preload_context_num = 6;

    uint32 prefetch_ost_num = 7;
    uint32 cmaint_ost_num = 8;

    uint32 aic_prefetch_lower = 9;
    uint32 aic_prefetch_upper = 10;
    uint32 aiv_prefetch_lower = 11;
    uint32 aiv_prefetch_upper = 12;

    uint32 data_split_unit = 13;
}

message StarsSqeHeaderDef {
    uint32 l1_lock = 1;
    uint32 l1_unlock = 2;
    uint32 block_dim = 3;
}

// ffts plus context
message FftsPlusCtxDef {
    enum OpType {
        NORMAL = 0;
        ATOMIC = 1;
    }
    uint32 op_index = 1;
    string uniq_ctx_name = 2;
    uint32 context_type = 3;
    uint32 context_id = 4;
    OpType op_type = 5;

    FftsPlusAicAivCtxDef aic_aiv_ctx = 6;
    FftsPlusMixAicAivCtxDef mix_aic_aiv_ctx = 7;
    FftsPlusSdmaCtxDef sdma_ctx = 8;
    FftsPlusNotifyCtxDef notify_ctx = 9;
    FftsPlusWriteValueCtxDef write_value_ctx = 10;
    FftsPlusAicpuCtxDef aicpu_ctx = 11;
    FftsPlusDataCtxDef data_ctx = 12;
    FftsPlusAtStartCtxDef at_start_ctx = 13;
    FftsPlusAtEndCtxDef at_end_ctx = 14;
    FftsPlusLabelCtxDef label_ctx = 15;
    FftsPlusCaseSwitchCtxDef case_switch_ctx = 16;
    FftsPlusCaseDefaultCtxDef case_default_ctx = 17;
    FftsPlusCondSwitchCtxDef cond_switch_ctx = 18;
    FftsPlusCachePersistCtxDef cache_persist_ctx = 19;
    FftsPlusDsaCtxDef dsa_ctx = 20;
}

// aic/aiv context
message FftsPlusAicAivCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 prefetch_config = 3;
    uint32 pred_cnt_init = 4;
    uint32 pred_cnt = 5;
    repeated uint32 successor_list = 6; // 16 bits, len = 26

    uint32 schem = 7;
    uint32 atm = 8;
    uint32 prefetch_enable_bitmap = 9;
    uint32 prefetch_once_bitmap = 10;

    uint32 pmg = 11;
    uint32 ns = 12;
    uint32 part_id = 13;
    uint32 qos = 14;

    uint32 thread_id = 15;
    uint32 thread_dim = 16;

    uint32 non_tail_block_dim = 17;
    uint32 tail_block_dim = 18;

    uint32 task_param_ptr_offset = 19;
    uint32 save_task_addr = 20;
    repeated uint64 task_addr = 21;
    repeated uint64 task_addr_offset = 22;
    uint32 input_output_count = 23;

    repeated string kernel_name = 24;

    repeated uint32 src_slot = 25;  // len = 4, context ID for source data which is out of subgraph
    uint32 policy_pri = 26;
    uint32 thread_window_size = 27;
}

// mix aic/aiv context
message FftsPlusMixAicAivCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 prefetch_config = 3;
    uint32 pred_cnt_init = 4;
    uint32 pred_cnt = 5;
    repeated uint32 successor_list = 6; // len = 26

    uint32 schem = 7;
    uint32 atm = 8;
    uint32 prefetch_enable_bitmap = 9;
    uint32 prefetch_once_bitmap = 10;

    uint32 pmg = 11;
    uint32 ns = 12;
    uint32 part_id = 13;
    uint32 qos = 14;

    uint32 non_tail_block_ratio_n = 15;
    uint32 tail_block_ratio_n = 16;

    uint32 thread_id = 17;
    uint32 thread_dim = 18;

    uint32 non_tail_block_dim = 19;
    uint32 tail_block_dim = 20;

    uint32 aic_task_param_ptr_offset = 21;
    uint32 aiv_task_param_ptr_offset = 22;

    repeated string kernel_name = 23;

    repeated uint64 task_addr = 24;
    repeated uint64 task_addr_offset = 25;
    uint32 input_output_count = 26;
    uint32 save_task_addr = 27;
    repeated uint32 src_slot = 28;  // len = 4, context ID for source data which is out of subgraph
    uint32 policy_pri = 29;
    uint32 thread_window_size = 30;
    string args_format = 31;
}

// sdma context
message FftsPlusSdmaCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 pred_cnt_init = 3;
    uint32 pred_cnt = 4;
    repeated uint32 successor_list = 5; // len = 26

    uint32 atm = 6;
    uint32 pmg = 7;
    uint32 ns = 8;
    uint32 part_id = 9;
    uint32 qos = 10;

    uint32 thread_id = 11;
    uint32 thread_dim = 12;

    uint32 sdma_sqe_header = 13;

    uint32 src_stream_id = 14;
    uint32 src_sub_stream_id = 15;
    uint32 dst_stream_id = 16;
    uint32 dst_sub_stream_id = 17;

    uint64 src_addr_base = 18;
    uint32 src_addr_offset = 19;
    uint64 dst_addr_base = 20;
    uint32 dst_addr_offset = 21;

    uint32 non_tail_data_len = 22;
    uint32 tail_data_len = 23;
}

// ffts plus notify record/wait context
message FftsPlusNotifyCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 pred_cnt_init = 3;
    uint32 pred_cnt = 4;
    repeated uint32 successor_list = 5; // len = 26

    uint32 satm = 6;
    uint32 atm = 7;

    uint32 thread_id = 8;
    uint32 thread_dim = 9;

    uint32 notify_id_base = 10;
    uint32 auto_window = 11;

    repeated uint32 notify_id = 12;
}

// write value context
message FftsPlusWriteValueCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 pred_cnt_init = 3;
    uint32 pred_cnt = 4;
    repeated uint32 successor_list = 5; // len = 26

    uint32 atm = 6;
    uint32 thread_id = 7;
    uint32 thread_dim = 8;

    uint32 aw_size = 9;
    uint32 aw_snoop = 10;
    uint32 aw_cache = 11;
    uint32 aw_prot = 12;
    uint32 aw_va = 13;

    uint32 ar_size = 14;
    uint32 ar_snoop = 15;
    uint32 ar_cache = 16;
    uint32 ar_prot = 17;
    uint32 ar_va = 18;

    uint64 write_addr_base = 19;
    uint32 write_addr_offset = 20;

    repeated uint32 write_value = 21;
}

// aicpu context
message FftsPlusAicpuCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 pred_cnt_init = 3;
    uint32 pred_cnt = 4;
    repeated uint32 successor_list = 5;   // len = 26

    uint32 atm = 6;
    uint32 sqe_index = 7;
    uint32 kernel_type = 8;
    uint32 bm = 9;
    uint32 topic_type = 10;
    uint32 qos = 11;

    uint32 thread_id = 12;
    uint32 thread_dim = 13;

    uint32 non_tail_block_dim = 14;
    uint32 tail_block_dim = 15;

    uint32 sub_topic_id = 16;
    uint32 topic_id = 17;
    uint32 group_id = 18;

    uint32 task_param_offset = 19;

    aicpuKernelDef kernel = 20;
}

message aicpuKernelDef {
    uint32 args_size = 1;
    bytes args = 2;
    string so_name = 3;
    string kernel_name = 4;
    bytes kernel_ext_info = 5;
    uint32 kernel_ext_info_size = 6;
}

// data context
message FftsPlusDataCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 cnt_init = 3;
    uint32 cnt = 4;
    repeated uint32 successor_list = 5; // len = 26

    uint32 atm = 6;
    uint32 pmg = 7;
    uint32 ns = 8;
    uint32 part_id = 9;
    uint32 qos = 10;

    uint32 orig_consumer_counter = 11;
    uint32 run_consumer_counter = 12;

    uint32 thread_id = 13;
    uint32 thread_dim = 14;

    uint64 addr_base = 15;
    uint32 addr_offset = 16;

    uint32 non_tail_num_outter = 17;
    uint32 non_tail_num_inner = 18;
    uint32 non_tail_len_inner = 19;
    uint32 non_tail_stride_outter = 20;
    uint32 non_tail_stride_inner = 21;

    uint32 tail_num_outter = 22;
    uint32 tail_num_inner = 23;
    uint32 tail_len_inner = 24;
    uint32 tail_stride_outter = 25;
    uint32 tail_stride_inner = 26;
}

// at start context
message FftsPlusAtStartCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 pred_cnt_init = 3;
    uint32 pred_cnt = 4;
    repeated uint32 successor_list = 5; // len = 26

    uint32 thread_id = 6;
    uint32 thread_dim = 7;

    uint32 thread_id_init = 8;
    uint32 thread_window_size = 9;
}

// at end context
message FftsPlusAtEndCtxDef {
    uint32 at_start_slot_num = 1;
    uint32 out_label_slot_num = 2;
    uint32 aten = 3;

    uint32 pred_cnt_init = 4;
    uint32 pred_cnt = 5;

    repeated uint32 succ_at_start_slot = 6;     // len = 12
    repeated uint32 succ_out_label_slot = 7;    // len = 12

    uint32 thread_id = 8;
}

// label context
message FftsPlusLabelCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 pred_cnt_init = 3;
    uint32 pred_cnt = 4;
    repeated uint32 successor_list = 5; // len = 26
    uint32 thread_id = 6;
    uint32 thread_dim = 7;
}

// switch context
message FftsPlusCaseSwitchCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 start_label_id = 3;
    uint32 label_list_len = 4;
    uint32 pred_cnt_init = 5;
    uint32 pred_cnt = 6;
    repeated uint32 successor_list = 7; // len = 26

    uint32 atm = 8;

    uint32 thread_id = 9;
    uint32 thread_dim = 10;

    uint32 ar_size = 11;
    uint32 snoop = 12;
    uint32 ar_cache = 13;
    uint32 ar_prot = 14;
    uint32 va = 15;

    uint64 load_addr0_base = 16;
    uint32 ld0_en = 17;
    uint32 load_addr0_offset = 18;

    uint64 load_addr1_base = 19;
    uint32 ld1_en = 20;
    uint32 load_addr1_offset = 21;
}

// case default context
message FftsPlusCaseDefaultCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 start_label_id = 3;
    uint32 label_list_len = 4;
    uint32 pred_cnt_init = 5;
    uint32 pred_cnt = 6;
    repeated uint32 successor_list = 7; // len = 26
}

// cond context
message FftsPlusCondSwitchCtxDef {
    uint32 true_successor_num = 1;
    uint32 false_successor_num = 2;
    uint32 aten = 3;

    uint32 condition = 4;
    uint32 pred_cnt_init = 5;
    uint32 pred_cnt = 6;

    repeated uint32 true_successor_list = 7;    // len = 12
    repeated uint32 false_successor_list = 8;   // len = 14

    uint32 atm = 9;

    uint32 thread_id = 10;
    uint32 thread_dim = 11;

    uint32 ar_size = 12;
    uint32 snoop = 13;
    uint32 ar_cache = 14;
    uint32 ar_prot = 15;
    uint32 va = 16;

    uint64 load_addr0_base = 17;
    uint32 ld0_en = 18;
    uint32 load_addr0_offset = 19;

    uint64 load_addr1_base = 20;
    uint32 ld1_en = 21;
    uint32 load_addr1_offset = 22;

    uint32 cmp_value_1 = 23;
    uint32 cmp_value_2 = 24;
}

message FftsPlusCachePersistCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 prefetch_config = 3;
    uint32 pred_cnt_init = 4;
    uint32 pred_cnt = 5;
    repeated uint32 successor_list = 6; // 16 bits, len = 26

    uint32 persistent_size = 7;
    uint32 persistent_en = 8;
    uint32 persistent_id = 9;
}

//ffts dsa context
message FftsPlusDsaCtxDef {
    uint32 successor_num = 1;
    uint32 aten = 2;
    uint32 pred_cnt_init = 3;
    uint32 pred_cnt = 4;
    repeated uint32 successor_list = 5; // 16 bits, len = 26
    uint32 atm = 6;
    uint32 address_offset = 7;
    uint32 thread_id = 8;
    uint32 thread_dim = 9;

    uint32 start = 10; // start
    uint32 distribution_type = 11;
    uint32 data_type = 12;
    uint32 alg_type = 13;
    uint32 input_vld = 14;
    uint32 input_value_addr_flag = 15;
    uint32 input1_value_or_ptr = 16;
    uint32 input2_value_or_ptr = 17;
    uint32 seed_value_or_ptr = 18;
    uint32 random_count_value_or_ptr = 19;
    DSATaskArgsDef args = 20;
}

message CmoTaskDef {
    uint32 cmo_type = 1;
    uint32 logic_id = 2;
    uint32 op_code = 3;
    uint32 qos = 4;
    uint32 part_id = 5;
    uint32 pmg = 6;
    uint32 num_inner = 7;
    uint32 num_outer = 8;
    uint32 length_inner = 9;
    uint64 source_addr = 10;
    uint32 strider_outer = 11;
    uint32 strider_inner = 12;
}

message CmoAddrTaskDef {
    uint32 cmo_op_code = 1;
    uint32 op_index = 2;
    uint32 num_inner = 3;
    uint32 num_outer = 4;
    uint64 src = 5;
    uint32 length_inner = 6;
    uint32 stride_outer = 7;
    uint32 stride_inner = 8;
    uint32 resv0 = 9;
    uint32 resv1 = 10;
    string args_format = 11;
}

message CmoBarrierTaskDef {
    uint32 logic_id_num = 1;
    repeated CmoBarrierInfoDef barrier_info = 2;
}

message CmoBarrierInfoDef {
    uint32 cmo_type = 1;
    uint32 logic_id = 2;
}

message NpuGetFloatStatusDef {
    uint64 output_addr = 1;
    uint32 output_size = 2;
    uint32 mode = 3;
    uint32 op_index = 4;
}

message NpuClearFloatStatusDef {
    uint32 mode = 1;
    uint32 op_index = 2;
}

message NpuGetFloatDebugStatusDef {
    uint64 output_addr = 1;
    uint32 output_size = 2;
    uint32 mode = 3;
    uint32 op_index = 4;
}

message NpuClearFloatDebugStatusDef {
    uint32 mode = 1;
    uint32 op_index = 2;
}

message DvppTaskDef {
    uint32 op_index = 1;
}

message UpdatePcTaskDef {
    uint32 op_index = 1;
    uint32 stream_id = 2;
    string args_format = 3;
}

message QueueAttrs {
  uint32 queue_id = 1;
  int32 device_type = 2; // CPU NPU
  int32 device_id = 3;
  uint32 logic_id = 4; // 动态请求全局id
}

message QueueInfo {
    QueueAttrs queue_attrs = 1;
    uint32 logic_group_id = 2;
    uint32 model_uuid = 3;
    int32 trans_id_old = 4;
    int32 route_label_old = 5;
    uint32 choose_logic_id = 6;
    uint32 root_model_id = 7;
    bool need_cache = 8;
    uint64 trans_id = 9;
    uint32 route_label = 10;
}

message FlowgwRequest {
    int32 node_id = 1;
    int32 input_index = 2;
    repeated QueueInfo queue_infos = 3;
}

message FlowgwResponse {
    repeated QueueInfo queue_infos = 1;
}

message QueueStatus {
  QueueAttrs queue_attrs = 1;
  uint32 queue_depth = 2;
  uint32 input_consume_num = 3;
}

message DataFlowException {
  int32 exception_code = 1;
  uint64 trans_id = 2;
  string scope = 3;
  uint64 user_context_id = 4;
  bytes exception_context = 5; // user_data + xxx + HeadMsg
}

message SubmodelStatus {
  uint32 model_uuid = 1;
  repeated QueueStatus queue_statuses = 2;
  uint32 msg_type = 3; // 0:status 1:exception
  DataFlowException exception = 4;
}

message LaunchAttributeValue {
  message Group {
    uint32 group_dim = 1;
    uint32 group_block_dim = 2;
  }

  oneof attribute_value {
    uint32 block_dim = 1;
    uint32 dynamic_share_mem_size = 2;
    Group group = 3;
    uint32 qos = 4;
    uint32 part_id = 5;
    uint32 schem_model = 6;
    uint32 block_dim_offset = 7;
    uint32 dump_flag = 8;
  }
}

message LaunchAttribute {
    enum LaunchAttributeId {
        BLOCKDIM = 0;
        DYNAMIC_SHARE_MEM_SIZE = 1;
        GROUP = 2;
        QOS = 3;
        PARTID = 4;
        SCHEMMODE = 5;
        BLOCKDIM_OFFSET = 6;
        DUMPFLAG = 7;
        MAX = 8;
    }
    LaunchAttributeId id = 1;
    LaunchAttributeValue value = 2;
}

message LaunchConfig {
    repeated LaunchAttribute launch_attribute = 1;
}

message AicoreFusionTaskInfo {
    KernelContext context = 1;
    bool is_all_kernel = 2;
    LaunchConfig config = 3;
}

message AicpuFusionTaskInfo {
    KernelContext context = 1;
    uint32 flags = 2;
    uint32 block_dim = 3;
    string so_name = 4;
    string kernel_name = 5;
    bytes kernel_ext_info = 6;
}

message CcuTaskInfo {
    uint32 die_id = 1;
    uint32 mission_id = 2;
    uint32 timeout = 3;
    uint32 inst_start_id = 4;
    uint32 inst_cnt = 5;
    uint32 key = 6;
    uint32 arg_size = 7;
    repeated uint64 args = 9;
}

message CcuTaskGroup {
    repeated CcuTaskInfo ccu_task_info = 1;
    repeated string group = 2;
}

message FusionSubTaskDef {
    oneof sub_task_info {
        AicoreFusionTaskInfo aicore_fusion_task_info = 1;
        AicpuFusionTaskInfo aicpu_fusion_task_info = 2;
        CcuTaskGroup ccu_task_group = 3;
    }
}

message FusionSubTaskInfo {
    enum FusionType {
        HCOM_CPU = 0;
        AICPU = 1;
        AICORE = 2;
        CCU = 3;
        END = 4;
    }

    FusionType type = 1;
    FusionSubTaskDef task = 2;
}


message FusionTaskDef {
    uint32 op_index = 1;
    string args_format = 2;
    uint32 kfc_args_format_offset = 3;
    repeated FusionSubTaskInfo fusion_sub_task_info = 4;
}