from msconfig.meta_config import MetaConfig
from profiling_bean.prof_enum.chip_model import ChipModel
class AICoreConfig(MetaConfig):
DATA_DEFAULT = {
'events': [
('0x1', 'vec_instr_exec'),
('0x2', 'scalar_instr_exec'),
('0x3', 'cube_instr_exec'),
('0x4', 'mte1_instr_exec'),
('0x5', 'mte2_instr_exec'),
('0x6', 'mte3_instr_exec'),
('0x7', 'total_instr_exec'),
('0x8', 'vec_busy_cycles'),
('0x9', 'scalar_busy_cycles'),
('0xa', 'cube_busy_cycles'),
('0xb', 'mte1_busy_cycles'),
('0xc', 'mte2_busy_cycles'),
('0xd', 'mte3_busy_cycles'),
('0xe', 'total_busy_cycles'),
('0xf', 'l2_read_req'),
('0x10', 'l2_write_req'),
('0x11', 'total_l2_req'),
('0x12', 'main_read_req'),
('0x13', 'main_write_req'),
('0x14', 'total_main_req'),
('0x15', 'ub_read_req'),
('0x16', 'ub_write_req'),
('0x17', 'total_ub_req'),
('0x18', 'ub_read_cycles'),
('0x19', 'ub_write_cycles'),
('0x1a', 'total_ub_cycles'),
('0x1b', 'l0a_read_req'),
('0x1c', 'l0a_write_req'),
('0x1d', 'total_l0a_req'),
('0x1e', 'l0a_read_cycles'),
('0x1f', 'l0a_write_cycles'),
('0x20', 'total_l0a_cycles'),
('0x21', 'l0b_read_req'),
('0x22', 'l0b_write_req'),
('0x23', 'total_l0b_req'),
('0x24', 'l0b_read_cycles'),
('0x25', 'l0b_write_cycles'),
('0x26', 'total_l0b_cycles'),
('0x27', 'vec2l0c_read_req'),
('0x28', 'cube2l0c_read_req'),
('0x29', 'vec2l0c_write_req'),
('0x2a', 'cube2l0c_write_req'),
('0x2b', 'total_l0c_req'),
('0x2c', 'vec2l0c_read_cycles'),
('0x2d', 'cube2l0c_read_cycles'),
('0x2e', 'vec2l0c_write_cycles'),
('0x2f', 'cube2l0c_write_cycles'),
('0x30', 'total_l0c_cycles'),
('0x31', 'l1_read_req'),
('0x32', 'l1_write_req'),
('0x33', 'total_l1_req'),
('0x34', 'l1_read_cycles'),
('0x35', 'l1_write_cycles'),
('0x36', 'total_l1_cycles'),
('0x37', 'scalar2ub_read_req'),
('0x38', 'scalar2ub_write_req'),
('0x39', 'total_scalar2ub_req'),
('0x3a', 'scalar2ub_read_cycles'),
('0x3b', 'scalar2ub_write_cycles'),
('0x3c', 'total_scalar2ub_cycles'),
('0x3d', 'mte2ub_read_req'),
('0x3e', 'mte2ub_write_req'),
('0x3f', 'total_mte2ub_req'),
('0x40', 'mte2ub_read_cycles'),
('0x41', 'mte2ub_write_cycles'),
('0x42', 'total_mte2ub_cycles'),
('0x43', 'vec2ub_read_req'),
('0x44', 'vec2ub_write_req'),
('0x45', 'total_vec2ub_req'),
('0x46', 'vec2ub_read_cycles'),
('0x47', 'vec2ub_write_cycles'),
('0x48', 'total_vec2ub_cycles'),
('0x49', 'cube_fp16_exec'),
('0x4a', 'cube_int8_exec'),
('0x4b', 'vec_fp32_exec'),
('0x4c', 'vec_fp16_128lane_exec'),
('0x4d', 'vec_fp16_64lane_exec'),
('0x4e', 'vec_int32_exec'),
('0x4f', 'vec_misc_exec'),
('0x50', 'flow_ctrl_exec'),
('0x51', 'loop_exec'),
('0x52', 'loop_slot0_exec'),
('0x53', 'loop_slot1_exec'),
('0x54', 'icache_req'),
('0x55', 'icache_miss'),
('0x56', 'icache_prefetch'),
('0x57', 'scalar_waitflag_cycles'),
('0x58', 'cube_waitflag_cycles'),
('0x59', 'vec_waitflag_cycles'),
('0x5a', 'mte1_waitflag_cycles'),
('0x5b', 'mte2_waitflag_cycles'),
('0x5c', 'mte3_waitflag_cycles'),
('0x5d', 'indir_jump_instr'),
('0x5e', 'flow_ctrl_predicted'),
('0x5f', 'mispredicted_instr'),
('0x60', 'scalar_div_exec'),
('0x61', 'scalar_div_cycles'),
('0x62', 'scalar_mult_exec'),
('0x63', 'scalar_madd_exec'),
('0x64', 'bankgroup_stall_cycles'),
('0x65', 'bank_stall_cycles'),
('0x66', 'vec_resc_conflict_cycles'),
('0x67', 'mte_conflict_cycles'),
('0x68', 'vec_instr_flushed'),
('0x69', 'mte_stalled_by_ub'),
('0x6a', 'scalar_stalled_by_ub'),
('0x6b', 'mte1_iq_full_cycles'),
('0x6c', 'mte2_iq_full_cycles'),
('0x6d', 'mte3_iq_full_cycles'),
('0x6e', 'cube_iq_full_cycles'),
('0x6f', 'vec_iq_full_cycles'),
('0x70', 'sinlge_issue_cycles'),
('0x71', 'dual_issue_cycles'),
('0x72', 'ib_empty_cycles'),
('0x73', 'scalar_only_cycles'),
('0x74', 'fmc_fail_times'),
('0x75', 'fmd_work_cycles'),
('0x76', 'fmd_idle_cycles'),
('0x77', 'unzip_work_cycles'),
('0x78', 'unzip_idle_cycles'),
('0x79', 'cube_l0c_stall'),
('0x7a', 'vec_writeback_stall'),
('0x7b', 'latency_histogram0'),
('0x7c', 'latency_histogram1'),
('0x7d', 'latency_histogram2'),
('0x7e', 'latency_histogram3'),
('0x7f', 'latency_histogram4'),
('0x80', 'latency_histogram5'),
('0x81', 'latency_histogram6'),
('0x82', 'outstanding_histogram0'),
('0x83', 'outstanding_histogram1'),
('0x84', 'outstanding_histogram2'),
('0x85', 'outstanding_histogram3'),
('0x86', 'outstanding_histogram4'),
('0x12c', 'vec_exe_ratio'),
('0x17f', 'ub_read_bw_vector(GB/s)'),
('0x180', 'ub_read_bw_vector(GB/s)'),
('0x191', 'ub_write_bw_vector(GB/s)'),
('0x1a5', 'ub_read_bw_mte(GB/s)'),
('0x1a6', 'ub_write_bw_mte(GB/s)'),
('0x206', 'l0c_to_l1_datas'),
('0x20c', 'l0c_to_gm_datas'),
('0x302', 'mte1_cycles_extra'),
('0x303', 'fixpipe_cycles'),
('0x416', 'mac_fp_ratio'),
('0x417', 'mac_int_ratio'),
('0x500', 'write_cache_hit'),
('0x502', 'write_cache_miss_allocate'),
('0x504', 'r0_read_cache_hit'),
('0x506', 'r0_read_cache_miss_allocate'),
('0x508', 'r1_read_cache_hit'),
('0x50a', 'r1_read_cache_miss_allocate'),
('0x50c', 'write_main_memory_datas'),
('0x50d', 'read_main_memory'),
('0x50e', 'read_main_memory_datas')
],
'metrics': [
('total_time(ms)', '(task_cyc*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('total_cycles', 'task_cyc'),
('mac_fp16_ratio', '1.0*r49/task_cyc'),
('mac_int8_ratio', '1.0*r4a/task_cyc'),
('vec_ratio', '1.0*r8/task_cyc'),
('vec_time', '(1.0*r8*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('vec_exe_ratio', '1.0*r12c/task_cyc'),
('vec_exe_time', '(1.0*r12c*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('mac_ratio', '1.0*ra/task_cyc'),
('mac_time', '(1.0*ra*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('mac_ratio_extra', '1.0*(r416+r417)/task_cyc'),
('mac_time_extra', '(1.0*(r416+r417)*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('mac_exe_ratio', '1.0*(r49+r4a)/task_cyc'),
('mac_exe_time', '(1.0*(r49+r4a)*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('scalar_ratio', '1.0*r9/task_cyc'),
('scalar_time', '(1.0*r9*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('scalar_exe_ratio', '1.0*r9/task_cyc'),
('scalar_exe_time', '(1.0*r9*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('fixpipe_ratio', '1.0*r303/task_cyc'),
('fixpipe_time', '(1.0*r303*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('fixpipe_exe_ratio', '1.0*r303/task_cyc'),
('fixpipe_exe_time', '(1.0*r303*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('scalar_ld_ratio', '1.0*r3a/task_cyc'),
('scalar_st_ratio', '1.0*r3b/task_cyc'),
('vec_bankgroup_cflt_ratio', '1.0*r64/task_cyc'),
('vec_bank_cflt_ratio', '1.0*r65/task_cyc'),
('vec_resc_cflt_ratio', '1.0*r66/task_cyc'),
('mte1_iq_full_ratio', '1.0*r6b/task_cyc'),
('mte2_iq_full_ratio', '1.0*r6c/task_cyc'),
('mte3_iq_full_ratio', '1.0*r6d/task_cyc'),
('cube_iq_full_ratio', '1.0*r6e/task_cyc'),
('vec_iq_full_ratio', '1.0*r6f/task_cyc'),
('iq_full_ratio', '1.0*(r6b+r6c+r6d+r6e+r6f)/task_cyc'),
('vec_fp32_ratio', '1.0*r4b/task_cyc'),
('vec_fp16_ratio', '1.0*(r4c+r4d)/task_cyc'),
('vec_int32_ratio', '1.0*r4e/task_cyc'),
('vec_misc_ratio', '1.0*r4f/task_cyc'),
('mte0_exe_ratio', '1.0*r302/task_cyc'),
('mte0_exe_time', '(1.0*r302*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('mte1_ratio', '1.0*rb/task_cyc'),
('mte1_time', '(1.0*rb*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('mte1_ratio_extra', '1.0*r302/task_cyc'),
('mte1_time_extra', '(1.0*r302*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('mte1_exe_ratio', '1.0*r302/task_cyc'),
('mte1_exe_time', '(1.0*r302*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('mte2_ratio', '1.0*rc/task_cyc'),
('mte2_time', '(1.0*rc*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('mte2_exe_ratio', '1.0*rc/task_cyc'),
('mte2_exe_time', '(1.0*rc*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('mte3_ratio', '1.0*rd/task_cyc'),
('mte3_time', '(1.0*rd*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('mte3_exe_ratio', '1.0*rd/task_cyc'),
('mte3_exe_time', '(1.0*rd*1000000/(freq))/block_num*((block_num+core_num-1)/core_num)'),
('icache_miss_rate', '1.0*r55/r54'),
('scalar_waitflag_ratio', '1.0*r57/task_cyc'),
('cube_waitflag_ratio', '1.0*r58/task_cyc'),
('vector_waitflag_ratio', '1.0*r59/task_cyc'),
('mte1_waitflag_ratio', '1.0*r5a/task_cyc'),
('mte2_waitflag_ratio', '1.0*r5b/task_cyc'),
('mte3_waitflag_ratio', '1.0*r5c/task_cyc'),
('cube_fops', '1.0*r49*16*16*16*2+1.0*r4a*16*16*32*2'),
('vector_fops', '1.0*r4b*32.0+1.0*r4c*128.0+1.0*r4d*64.0+1.0*r4e*64.0+1.0*r4f*32.0'),
('ub_read_bw(GB/s)',
'1.0*r15*256.0*4.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw(GB/s)',
'1.0*r16*256.0*4.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l1_read_bw(GB/s)',
'1.0*r31*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l1_write_bw(GB/s)',
'1.0*r32*128.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l2_read_bw(GB/s)',
'1.0*rf*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l2_write_bw(GB/s)',
'1.0*r10*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('main_mem_read_bw(GB/s)',
'1.0*r12*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('main_mem_write_bw(GB/s)',
'1.0*r13*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0a_read_bw(GB/s)',
'1.0*r1b*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0a_write_bw(GB/s)',
'1.0*r1c*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0b_read_bw(GB/s)',
'1.0*r21*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0b_write_bw(GB/s)',
'1.0*r22*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0c_read_bw(GB/s)',
'1.0*r27*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0c_write_bw(GB/s)',
'1.0*r29*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0c_read_bw_cube(GB/s)',
'1.0*r28*256.0*32.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0c_write_bw_cube(GB/s)',
'1.0*r2a*256.0*32.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_read_bw_vector(GB/s)',
'1.0*r43*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw_vector(GB/s)',
'1.0*r44*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_read_bw_scalar(GB/s)',
'1.0*r37*256.0*32.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw_scalar(GB/s)',
'1.0*r38*256.0*32.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_read_bw_mte(GB/s)',
'(r3d*128.0*8.0)/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw_mte(GB/s)',
'(r3e*128.0*8.0)/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('write_cache_hit', '1.0 * r500'),
('write_cache_miss_allocate', '1.0 * r502'),
('r0_read_cache_hit', '1.0 * r504'),
('r0_read_cache_miss_allocate', '1.0 * r506'),
('r1_read_cache_hit', '1.0 * r508'),
('r1_read_cache_miss_allocate', '1.0 * r50a')
],
'formula': [
('total_time(ms)', 'SUM((task_cyc*1000000/(freq))/block_num*((block_num+core_num-1)/core_num))'),
('total_cycles', 'task_cyc'),
('mac_fp16_ratio', '1.0*SUM(r49)/SUM(task_cyc)'),
('mac_int8_ratio', '1.0*SUM(r4a)/SUM(task_cyc)'),
('vec_ratio', '1.0*SUM(r8)/SUM(task_cyc)'),
('vec_exe_ratio', '1.0*SUM(r12c)/SUM(task_cyc)'),
('mac_ratio', '1.0*SUM(ra)/SUM(task_cyc)'),
('mac_exe_ratio', '1.0*(SUM(r4a)+Sum(r49))/SUM(task_cyc)'),
('mac_ratio_extra', '1.0*(SUM(r416)+Sum(r417))/SUM(task_cyc)'),
('scalar_ratio', '1.0*SUM(r9)/SUM(task_cyc)'),
('scalar_exe_ratio', '1.0*SUM(r9)/SUM(task_cyc)'),
('scalar_ld_ratio', '1.0*SUM(r3a)/SUM(task_cyc)'),
('scalar_st_ratio', '1.0*SUM(r3b)/SUM(task_cyc)'),
('vec_bankgroup_cflt_ratio', '1.0*SUM(r64)/SUM(task_cyc)'),
('vec_bank_cflt_ratio', '1.0*SUM(r65)/SUM(task_cyc)'),
('vec_resc_cflt_ratio', '1.0*SUM(r66)/SUM(task_cyc)'),
('mte1_iq_full_ratio', '1.0*SUM(r6b)/SUM(task_cyc)'),
('mte2_iq_full_ratio', '1.0*SUM(r6c)/SUM(task_cyc)'),
('mte3_iq_full_ratio', '1.0*SUM(r6d)/SUM(task_cyc)'),
('cube_iq_full_ratio', '1.0*SUM(r6e)/SUM(task_cyc)'),
('vec_iq_full_ratio', '1.0*SUM(r6f)/SUM(task_cyc)'),
('iq_full_ratio', '1.0*(SUM(r6b)+SUM(r6c)+SUM(r6d)+SUM(r6e)+SUM(r6f))/SUM(task_cyc)'),
('vec_fp32_ratio', '1.0*SUM(r4b)/SUM(task_cyc)'),
('vec_fp16_ratio', '1.0*(SUM(r4c)+SUM(r4d))/SUM(task_cyc)'),
('vec_int32_ratio', '1.0*SUM(r4e)/SUM(task_cyc)'),
('vec_misc_ratio', '1.0*SUM(r4f)/SUM(task_cyc)'),
('mte0_exe_ratio', '1.0*SUM(r302)/SUM(task_cyc)'),
('mte1_ratio', '1.0*SUM(rb)/SUM(task_cyc)'),
('mte1_exe_ratio', '1.0*SUM(r302)/SUM(task_cyc)'),
('mte1_ratio_extra', '1.0*SUM(r302)/SUM(task_cyc)'),
('mte2_ratio', '1.0*SUM(rc)/SUM(task_cyc)'),
('mte2_exe_ratio', '1.0*SUM(rc)/SUM(task_cyc)'),
('mte3_ratio', '1.0*SUM(rd)/SUM(task_cyc)'),
('mte3_exe_ratio', '1.0*SUM(rd)/SUM(task_cyc)'),
('fixpipe_ratio', '1.0*SUM(r303)/SUM(task_cyc)'),
('fixpipe_exe_ratio', '1.0*SUM(r303)/SUM(task_cyc)'),
('icache_miss_rate', '1.0*SUM(r55)/SUM(r54)'),
('scalar_waitflag_ratio', '1.0*SUM(r57)/SUM(task_cyc)'),
('cube_waitflag_ratio', '1.0*SUM(r58)/SUM(task_cyc)'),
('vector_waitflag_ratio', '1.0*SUM(r59)/SUM(task_cyc)'),
('mte1_waitflag_ratio', '1.0*SUM(r5a)/SUM(task_cyc)'),
('mte2_waitflag_ratio', '1.0*SUM(r5b)/SUM(task_cyc)'),
('mte3_waitflag_ratio', '1.0*SUM(r5c)/SUM(task_cyc)'),
('cube_fops', '1.0*SUM(r49)*16*16*16*2+1.0*SUM(r4a)*16*16*32*2'),
('vector_fops',
'1.0*SUM(r4b)*r4b_num+1.0*SUM(r4c)*128.0+1.0*SUM(r4d)*64.0+1.0*SUM(r4e)*r4e_num+1.0*SUM(r4f)*r4f_num'),
('ub_read_bw(GB/s)',
'1.0*SUM(r15)*256.0*4.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw(GB/s)',
'1.0*SUM(r16)*256.0*4.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l1_read_bw(GB/s)',
'1.0*SUM(r31)*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l1_write_bw(GB/s)',
'1.0*SUM(r32)*128.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l2_read_bw(GB/s)',
'1.0*SUM(rf)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l2_write_bw(GB/s)',
'1.0*SUM(r10)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('main_mem_read_bw(GB/s)',
'1.0*SUM(r12)*8.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('main_mem_write_bw(GB/s)',
'1.0*SUM(r13)*8.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0a_read_bw(GB/s)',
'1.0*SUM(r1b)*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0a_write_bw(GB/s)',
'1.0*SUM(r1c)*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0b_read_bw(GB/s)',
'1.0*SUM(r21)*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0b_write_bw(GB/s)',
'1.0*SUM(r22)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0c_read_bw(GB/s)',
'1.0*SUM(r27)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0c_write_bw(GB/s)',
'1.0*SUM(r29)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0c_read_bw_cube(GB/s)',
'1.0*SUM(r28)*256.0*32.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0c_write_bw_cube(GB/s)',
'1.0*SUM(r2a)*256.0*32.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_read_bw_vector(GB/s)',
'1.0*SUM(r43)*128.0*2.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw_vector(GB/s)',
'1.0*SUM(r44)*128.0*1.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_read_bw_scalar(GB/s)',
'1.0*SUM(r37)*128.0*1.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw_scalar(GB/s)',
'1.0*SUM(r38)*128.0*1.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_read_bw_mte(GB/s)',
'1.0*SUM(r3d)*128.0*1.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw_mte(GB/s)',
'1.0*SUM(r3e)*128.0*1.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('write_cache_hit', '1.0 * SUM(r500)'),
('write_cache_miss_allocate', '1.0 * SUM(r502)'),
('r0_read_cache_hit', '1.0 * SUM(r504)'),
('r0_read_cache_miss_allocate', '1.0 * SUM(r506)'),
('r1_read_cache_hit', '1.0 * SUM(r508)'),
('r1_read_cache_miss_allocate', '1.0 * SUM(r50a)'),
('read_main_memory_datas(KB)', '128.0/1024*(SUM(r50d)+SUM(r50e))'),
('write_main_memory_datas(KB)', '128.0/1024*SUM(r50c)'),
('gm_to_l1_datas(KB)', '256.0/1024*(SUM(r32)-SUM(r206))'),
('l0c_to_l1_datas(KB)', '128.0/1024*SUM(r206)'),
('l0c_to_gm_datas(KB)', '128.0/1024*(SUM(r20c)-SUM(r206))'),
('gm_to_ub_datas(KB)', '128.0/1024*SUM(r3e)'),
('ub_to_gm_datas(KB)', '128.0/1024*SUM(r3d)')
],
'event2metric': [
('0x8', 'vec_ratio'),
('0xa', 'mac_ratio'),
('0x9', 'scalar_ratio'),
('0xb', 'mte1_ratio'),
('0xc', 'mte2_ratio'),
('0xd', 'mte3_ratio'),
('0xf', 'l2_read_bw(GB/s)'),
('0x10', 'l2_write_bw(GB/s)'),
('0x12', 'main_mem_read_bw(GB/s)'),
('0x13', 'main_mem_write_bw(GB/s)'),
('0x15', 'ub_read_bw(GB/s)'),
('0x16', 'ub_write_bw(GB/s)'),
('0x1b', 'l0a_read_bw(GB/s)'),
('0x1c', 'l0a_write_bw(GB/s)'),
('0x21', 'l0b_read_bw(GB/s)'),
('0x22', 'l0b_write_bw(GB/s)'),
('0x27', 'l0c_read_bw(GB/s)'),
('0x28', 'l0c_read_bw_cube(GB/s)'),
('0x29', 'l0c_write_bw(GB/s)'),
('0x2a', 'l0c_write_bw_cube(GB/s)'),
('0x31', 'l1_read_bw(GB/s)'),
('0x32', 'l1_write_bw(GB/s)'),
('0x37', 'ub_read_bw_scalar(GB/s)'),
('0x38', 'ub_write_bw_scalar(GB/s)'),
('0x3a', 'scalar_ld_ratio'),
('0x3b', 'scalar_st_ratio'),
('0x3d', 'ub_read_bw_mte(GB/s)'),
('0x3e', 'ub_write_bw_mte(GB/s)'),
('0x43', 'ub_read_bw_vector(GB/s)'),
('0x44', 'ub_write_bw_vector(GB/s)'),
('0x49', 'mac_fp16_ratio'),
('0x4a', 'mac_int8_ratio'),
('0x4b', 'vec_fp32_ratio'),
('0x4c', 'vec_fp16_128lane_ratio'),
('0x4d', 'vec_fp16_64lane_ratio'),
('0x4e', 'vec_int32_ratio'),
('0x4f', 'vec_misc_ratio'),
('0x54', 'icache_req_ratio'),
('0x55', 'icache_miss_rate'),
('0x57', 'scalar_waitflag_ratio'),
('0x58', 'cube_waitflag_ratio'),
('0x59', 'vector_waitflag_ratio'),
('0x5a', 'mte1_waitflag_ratio'),
('0x5b', 'mte2_waitflag_ratio'),
('0x5c', 'mte3_waitflag_ratio'),
('0x64', 'vec_bankgroup_cflt_ratio'),
('0x65', 'vec_bank_cflt_ratio'),
('0x66', 'vec_resc_cflt_ratio'),
('0x6b', 'mte1_iq_full_ratio'),
('0x6c', 'mte2_iq_full_ratio'),
('0x6d', 'mte3_iq_full_ratio'),
('0x6e', 'cube_iq_full_ratio'),
('0x6f', 'vec_iq_full_ratio'),
('0x12c', 'vec_exe_ratio'),
('0x17f', 'ub_read_bw_vector(GB/s)'),
('0x180', 'ub_read_bw_vector(GB/s)'),
('0x191', 'ub_write_bw_vector(GB/s)'),
('0x1a5', 'ub_read_bw_mte(GB/s)'),
('0x1a6', 'ub_write_bw_mte(GB/s)'),
('0x302', 'mte1_ratio_extra'),
('0x303', 'fixpipe_ratio'),
('0x416', 'mac_fp_ratio'),
('0x417', 'mac_int_ratio'),
('0x500', 'write_cache_hit'),
('0x502', 'write_cache_miss_allocate'),
('0x504', 'r0_read_cache_hit'),
('0x506', 'r0_read_cache_miss_allocate'),
('0x508', 'r1_read_cache_hit'),
('0x50a', 'r1_read_cache_miss_allocate')
],
'custom': [
('0x1', 'vec_instr_exec'),
('0x2', 'scalar_instr_exec'),
('0x3', 'cube_instr_exec'),
('0x4', 'mte1_instr_exec'),
('0x5', 'mte2_instr_exec'),
('0x6', 'mte3_instr_exec'),
('0x7', 'total_instr_exec'),
('0x8', 'vec_busy_cycles'),
('0x9', 'scalar_busy_cycles'),
('0xa', 'cube_busy_cycles'),
('0xb', 'mte1_busy_cycles'),
('0xc', 'mte2_busy_cycles'),
('0xd', 'mte3_busy_cycles'),
('0xe', 'total_busy_cycles'),
('0xf', 'l2_read_req'),
('0x10', 'l2_write_req'),
('0x11', 'total_l2_req'),
('0x12', 'main_read_req'),
('0x13', 'main_write_req'),
('0x14', 'total_main_req'),
('0x15', 'ub_read_req'),
('0x16', 'ub_write_req'),
('0x17', 'total_ub_req'),
('0x18', 'ub_read_cycles'),
('0x19', 'ub_write_cycles'),
('0x1a', 'total_ub_cycles'),
('0x1b', 'l0a_read_req'),
('0x1c', 'l0a_write_req'),
('0x1d', 'total_l0a_req'),
('0x1e', 'l0a_read_cycles'),
('0x1f', 'l0a_write_cycles'),
('0x20', 'total_l0a_cycles'),
('0x21', 'l0b_read_req'),
('0x22', 'l0b_write_req'),
('0x23', 'total_l0b_req'),
('0x24', 'l0b_read_cycles'),
('0x25', 'l0b_write_cycles'),
('0x26', 'total_l0b_cycles'),
('0x27', 'vec2l0c_read_req'),
('0x28', 'cube2l0c_read_req'),
('0x29', 'vec2l0c_write_req'),
('0x2a', 'cube2l0c_write_req'),
('0x2b', 'total_l0c_req'),
('0x2c', 'vec2l0c_read_cycles'),
('0x2d', 'cube2l0c_read_cycles'),
('0x2e', 'vec2l0c_write_cycles'),
('0x2f', 'cube2l0c_write_cycles'),
('0x30', 'total_l0c_cycles'),
('0x31', 'l1_read_req'),
('0x32', 'l1_write_req'),
('0x33', 'total_l1_req'),
('0x34', 'l1_read_cycles'),
('0x35', 'l1_write_cycles'),
('0x36', 'total_l1_cycles'),
('0x37', 'scalar2ub_read_req'),
('0x38', 'scalar2ub_write_req'),
('0x39', 'total_scalar2ub_req'),
('0x3a', 'scalar2ub_read_cycles'),
('0x3b', 'scalar2ub_write_cycles'),
('0x3c', 'total_scalar2ub_cycles'),
('0x3d', 'mte2ub_read_req'),
('0x3e', 'mte2ub_write_req'),
('0x3f', 'total_mte2ub_req'),
('0x40', 'mte2ub_read_cycles'),
('0x41', 'mte2ub_write_cycles'),
('0x42', 'total_mte2ub_cycles'),
('0x43', 'vec2ub_read_req'),
('0x44', 'vec2ub_write_req'),
('0x45', 'total_vec2ub_req'),
('0x46', 'vec2ub_read_cycles'),
('0x47', 'vec2ub_write_cycles'),
('0x48', 'total_vec2ub_cycles'),
('0x49', 'cube_fp16_exec'),
('0x4a', 'cube_int8_exec'),
('0x4b', 'vec_fp32_exec'),
('0x4c', 'vec_fp16_128lane_exec'),
('0x4d', 'vec_fp16_64lane_exec'),
('0x4e', 'vec_int32_exec'),
('0x4f', 'vec_misc_exec'),
('0x50', 'flow_ctrl_exec'),
('0x51', 'loop_exec'),
('0x52', 'loop_slot0_exec'),
('0x53', 'loop_slot1_exec'),
('0x54', 'icache_req'),
('0x55', 'icache_miss'),
('0x56', 'icache_prefetch'),
('0x57', 'scalar_waitflag_cycles'),
('0x58', 'cube_waitflag_cycles'),
('0x59', 'vec_waitflag_cycles'),
('0x5a', 'mte1_waitflag_cycles'),
('0x5b', 'mte2_waitflag_cycles'),
('0x5c', 'mte3_waitflag_cycles'),
('0x5d', 'indir_jump_instr'),
('0x5e', 'flow_ctrl_predicted'),
('0x5f', 'mispredicted_instr'),
('0x60', 'scalar_div_exec'),
('0x61', 'scalar_div_cycles'),
('0x62', 'scalar_mult_exec'),
('0x63', 'scalar_madd_exec'),
('0x64', 'bankgroup_stall_cycles'),
('0x65', 'bank_stall_cycles'),
('0x66', 'vec_resc_conflict_cycles'),
('0x67', 'mte_conflict_cycles'),
('0x68', 'vec_instr_flushed'),
('0x69', 'mte_stalled_by_ub'),
('0x6a', 'scalar_stalled_by_ub'),
('0x6b', 'mte1_iq_full_cycles'),
('0x6c', 'mte2_iq_full_cycles'),
('0x6d', 'mte3_iq_full_cycles'),
('0x6e', 'cube_iq_full_cycles'),
('0x17f', 'ub_read_bw_vector(GB/s)'),
('0x180', 'ub_read_bw_vector(GB/s)'),
('0x191', 'ub_write_bw_vector(GB/s)'),
('0x1a5', 'ub_read_bw_mte(GB/s)'),
('0x1a6', 'ub_write_bw_mte(GB/s)')
],
}
DATA = DATA_DEFAULT
CHIP_V6_MAP = {
'events': [
('0x1', 'scalar_ratio'),
('0x3', 'ub_read_bw_scalar(GB/s)'),
('0x5', 'ub_write_bw_scalar(GB/s)'),
('0x34', 'icache_req_ratio'),
('0x35', 'icache_miss_rate'),
('0x202', 'mte2_ratio'),
('0x203', 'mte3_ratio'),
('0x204', 'ub_read_bw_mte(GB/s)'),
('0x206', 'ub_write_bw_mte(GB/s)'),
('0x301', 'mac_ratio'),
('0x323', 'mac_fp16_ratio'),
('0x324', 'mac_int8_ratio'),
('0x304', 'l0a_read_bw(GB/s)'),
('0x306', 'l0b_read_bw(GB/s)'),
('0x308', 'l0c_write_bw_cube(GB/s)'),
('0x30a', 'l0c_read_bw_cube(GB/s)'),
('0x400', 'main_mem_read_bw(GB/s)'),
('0x401', 'main_mem_write_bw(GB/s)'),
('0x424', 'read_local_l2_hit'),
('0x425', 'read_local_l2_miss'),
('0x426', 'read_local_l2_victim'),
('0x42a', 'write_local_l2_hit'),
('0x42b', 'write_local_l2_miss'),
('0x42c', 'write_local_l2_victim'),
('0x501', 'vec_ratio'),
('0x502', 'pmu_idc_aic_vec_instr_vf_busy_o'),
('0x528', 'vec_resc_cflt_ratio'),
('0x540', 'vec_bank_cflt_ratio'),
('0x556', 'stu_pmu_wctl_ub_cflt'),
('0x56f', 'ub_read_bw(GB/s)'),
('0x570', 'ub_write_bw(GB/s)'),
('0x571', 'ub_read_bw_vector(GB/s)'),
('0x572', 'ub_write_bw_vector(GB/s)'),
('0x701', 'mte1_ratio'),
('0x703', 'l0a_write_bw(GB/s)'),
('0x705', 'l0b_write_bw(GB/s)'),
('0x707', 'l1_read_bw(GB/s)'),
('0x709', 'l1_write_bw(GB/s)'),
('0x70c', 'fixp2ub_write_bw(GB/s)'),
('0x712', 'l0c_read_bw(GB/s)'),
('0x714', 'fixpipe_ratio')
],
'metrics': [
('vec_ratio', ''),
('mac_ratio', ''),
('scalar_ratio', ''),
('mte1_ratio', ''),
('mte2_ratio', ''),
('mte3_ratio', ''),
('icache_req_ratio', ''),
('icache_miss_rate', ''),
('fixpipe_ratio', ''),
('main_mem_read_bw(GB/s)', ''),
('main_mem_write_bw(GB/s)', ''),
('vec_resc_cflt_ratio', ''),
('vec_bank_cflt_ratio', ''),
('ub_read_bw(GB/s)', ''),
('ub_write_bw(GB/s)', ''),
('l1_read_bw(GB/s)', ''),
('l1_write_bw(GB/s)', ''),
('l0a_read_bw(GB/s)', ''),
('l0a_write_bw(GB/s)', ''),
('l0b_read_bw(GB/s)', ''),
('l0b_write_bw(GB/s)', ''),
('l0c_read_bw(GB/s)', ''),
('l0c_read_bw_cube(GB/s)', ''),
('l0c_write_bw_cube(GB/s)', ''),
('ub_read_bw_scalar(GB/s)', ''),
('ub_write_bw_scalar(GB/s)', ''),
('fixp2ub_write_bw(GB/s)', ''),
('ub_write_bw_mte(GB/s)', ''),
('ub_read_bw_mte(GB/s)', ''),
('ub_read_bw_vector(GB/s)', ''),
('ub_write_bw_vector(GB/s)', ''),
('mac_fp16_ratio', ''),
('mac_int8_ratio', ''),
('cube_fops', ''),
('read_local_l2_hit', ''),
('read_local_l2_miss', ''),
('read_local_l2_victim', ''),
('write_local_l2_hit', ''),
('write_local_l2_miss', ''),
('write_local_l2_victim', ''),
('pmu_idc_aic_vec_instr_vf_busy_o', ''),
('vec_resc_cflt_ratio', ''),
('vec_bank_cflt_ratio', ''),
('stu_pmu_wctl_ub_cflt', '')
],
'formula': [
('total_time(ms)', 'SUM((task_cyc*1000000/(freq))/block_num*((block_num+core_num-1)/core_num))'),
('total_cycles', 'task_cyc'),
('vec_ratio', '1.0*SUM(r501)/SUM(task_cyc)'),
('mac_ratio', '1.0*SUM(r301)/SUM(task_cyc)'),
('scalar_ratio', '1.0*SUM(r1)/SUM(task_cyc)'),
('mte1_ratio', '1.0*SUM(r701)/SUM(task_cyc)'),
('mte2_ratio', '1.0*SUM(r202)/SUM(task_cyc)'),
('mte3_ratio', '1.0*SUM(r203)/SUM(task_cyc)'),
('icache_miss_rate', '1.0*SUM(r35)/SUM(r34)'),
('fixpipe_ratio', '1.0*SUM(r714)/SUM(task_cyc)'),
('ub_read_bw(GB/s)',
'1.0*(r56f+r571)*256.0*4.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw(GB/s)',
'1.0*SUM(r570+r572)*256.0*4.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l1_read_bw(GB/s)',
'1.0*SUM(r707)*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l1_write_bw(GB/s)',
'1.0*SUM(r709)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('main_mem_read_bw(GB/s)',
'1.0*SUM(r400)*8.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('main_mem_write_bw(GB/s)',
'1.0*SUM(r401)*8.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0a_read_bw(GB/s)',
'1.0*SUM(r304)*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0a_write_bw(GB/s)',
'1.0*SUM(r703)*256.0*16.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0b_read_bw(GB/s)',
'1.0*SUM(r306)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0b_write_bw(GB/s)',
'1.0*SUM(r705)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0c_read_bw(GB/s)',
'1.0*SUM(r712)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0c_read_bw_cube(GB/s)',
'1.0*SUM(r30a)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('l0c_write_bw_cube(GB/s)',
'1.0*SUM(r308)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('fixp2ub_write_bw(GB/s)',
'1.0*SUM(r70c)*256.0*8.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw_mte(GB/s)',
'1.0*SUM(r206)*128.0*1.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_read_bw_mte(GB/s)',
'1.0*SUM(r204)*128.0*1.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_read_bw_vector(GB/s)',
'1.0*SUM(r571)*128.0*2.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw_vector(GB/s)',
'1.0*SUM(r572)*128.0*1.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_read_bw_scalar(GB/s)',
'1.0*SUM(r3)*128.0*1.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('ub_write_bw_scalar(GB/s)',
'1.0*SUM(r5)*128.0*1.0/((task_cyc*1000/(freq*1000.0))/'
'block_num*((block_num+core_num-1)/core_num))/(8589934592.0)'),
('mac_fp16_ratio', '1.0*r323/task_cyc'),
('mac_int8_ratio', '1.0*r324/task_cyc'),
('cube_fops', '1.0*r323*16*16*16*2+1.0*r324*16*16*32*2'),
('read_local_l2_hit', '1.0 * r424'),
('read_local_l2_miss', '1.0 * r425'),
('read_local_l2_victim', '1.0 * r426'),
('write_local_l2_hit', '1.0 * r42a'),
('write_local_l2_miss', '1.0 * r42b'),
('write_local_l2_victim', '1.0 * r42c'),
('vec_bank_cflt_ratio', '1.0*(r540+r556)/(task_cyc)'),
('vec_resc_cflt_ratio', '1.0*(r528)/(r502)')
],
'event2metric': [
('0x1', 'scalar_ratio'),
('0x3', 'ub_read_bw_scalar(GB/s)'),
('0x5', 'ub_write_bw_scalar(GB/s)'),
('0x34', 'icache_req_ratio'),
('0x35', 'icache_miss_rate'),
('0x202', 'mte2_ratio'),
('0x203', 'mte3_ratio'),
('0x204', 'ub_read_bw_mte(GB/s)'),
('0x206', 'ub_write_bw_mte(GB/s)'),
('0x301', 'mac_ratio'),
('0x323', 'mac_fp16_ratio'),
('0x324', 'mac_int8_ratio'),
('0x304', 'l0a_read_bw(GB/s)'),
('0x306', 'l0b_read_bw(GB/s)'),
('0x308', 'l0c_write_bw_cube(GB/s)'),
('0x30a', 'l0c_read_bw_cube(GB/s)'),
('0x400', 'main_mem_read_bw(GB/s)'),
('0x401', 'main_mem_write_bw(GB/s)'),
('0x424', 'read_local_l2_hit'),
('0x425', 'read_local_l2_miss'),
('0x426', 'read_local_l2_victim'),
('0x42a', 'write_local_l2_hit'),
('0x42b', 'write_local_l2_miss'),
('0x42c', 'write_local_l2_victim'),
('0x501', 'vec_ratio'),
('0x502', 'pmu_idc_aic_vec_instr_vf_busy_o'),
('0x528', 'vec_resc_cflt_ratio'),
('0x540', 'vec_bank_cflt_ratio'),
('0x556', 'stu_pmu_wctl_ub_cflt'),
('0x56f', 'ub_read_bw(GB/s)'),
('0x570', 'ub_write_bw(GB/s)'),
('0x571', 'ub_read_bw_vector(GB/s)'),
('0x572', 'ub_write_bw_vector(GB/s)'),
('0x701', 'mte1_ratio'),
('0x703', 'l0a_write_bw(GB/s)'),
('0x705', 'l0b_write_bw(GB/s)'),
('0x707', 'l1_read_bw(GB/s)'),
('0x709', 'l1_write_bw(GB/s)'),
('0x70c', 'fixp2ub_write_bw(GB/s)'),
('0x712', 'l0c_read_bw(GB/s)'),
('0x714', 'fixpipe_ratio')
],
'custom': [
('0x1', 'scalar_ratio'),
('0x3', 'ub_read_bw_scalar(GB/s)'),
('0x5', 'ub_write_bw_scalar(GB/s)'),
('0x34', 'icache_req_ratio'),
('0x35', 'icache_miss_rate'),
('0x202', 'mte2_ratio'),
('0x203', 'mte3_ratio'),
('0x204', 'ub_read_bw_mte(GB/s)'),
('0x206', 'ub_write_bw_mte(GB/s)'),
('0x301', 'mac_ratio'),
('0x323', 'mac_fp16_ratio'),
('0x324', 'mac_int8_ratio'),
('0x304', 'l0a_read_bw(GB/s)'),
('0x306', 'l0b_read_bw(GB/s)'),
('0x308', 'l0c_write_bw_cube(GB/s)'),
('0x30a', 'l0c_read_bw_cube(GB/s)'),
('0x400', 'main_mem_read_bw(GB/s)'),
('0x401', 'main_mem_write_bw(GB/s)'),
('0x424', 'read_local_l2_hit'),
('0x425', 'read_local_l2_miss'),
('0x426', 'read_local_l2_victim'),
('0x427', 'read_remote_l2_hit'),
('0x428', 'read_remote_l2_miss'),
('0x429', 'read_remote_l2_victim'),
('0x42a', 'write_local_l2_hit'),
('0x42b', 'write_local_l2_miss'),
('0x42c', 'write_local_l2_victim'),
('0x42d', 'write_remote_l2_hit'),
('0x42e', 'write_remote_l2_miss'),
('0x42f', 'write_remote_l2_victim'),
('0x501', 'vec_ratio'),
('0x502', 'pmu_idc_aic_vec_instr_vf_busy_o'),
('0x528', 'vec_resc_cflt_ratio'),
('0x540', 'vec_bank_cflt_ratio'),
('0x556', 'stu_pmu_wctl_ub_cflt'),
('0x56f', 'ub_read_bw(GB/s)'),
('0x570', 'ub_write_bw(GB/s)'),
('0x571', 'ub_read_bw_vector(GB/s)'),
('0x572', 'ub_write_bw_vector(GB/s)'),
('0x701', 'mte1_ratio'),
('0x703', 'l0a_write_bw(GB/s)'),
('0x705', 'l0b_write_bw(GB/s)'),
('0x707', 'l1_read_bw(GB/s)'),
('0x709', 'l1_write_bw(GB/s)'),
('0x70c', 'fixp2ub_write_bw(GB/s)'),
('0x712', 'l0c_read_bw(GB/s)'),
('0x714', 'fixpipe_ratio')
]
}
DATA_MAP = {
ChipModel.CHIP_V6_1_0: CHIP_V6_MAP,
ChipModel.CHIP_V6_2_0: CHIP_V6_MAP,
}
def __init__(self):
super().__init__()
@classmethod
def set_config_data(cls, chip_id):
cls.DATA = cls.DATA_MAP.get(chip_id, cls.DATA_DEFAULT)
for section in cls.DATA:
items = cls.DATA.get(section, [])
for i, item in enumerate(items):
items[i] = (str(item[0]).lower(), str(item[1]))