examples/shmem_perftest/udma_perftest/
mte_perftest
udma_perftest
aclshmemx_udma_put_nbi
aclshmemx_udma_get_nbi
aclshmemx_udma_put_signal_nbi
put / bi_put / get / bi_get / put_signal
main.cpp
udma_perftest_kernel.cpp
run.sh
CMakeLists.txt
README.md
mte_perftest -> shmem_perftest
inner -> mte_perftest
shmem_perftest
DEVICE_SIDE
HOST_SIDE
block_dim=1
-b/--block-size
--block-range
--metric bw
prof_start → loop(*_nbi) → quiet → prof_end
quiet
--metric lat
prof_start → loop(put_nbi) → prof_end → quiet
SHMEMI_PROF_START/END
loop_count
pipe_barrier
--batch
--batch 0
--batch <loop_count>
--batch 1
*_nbi
--batch N
1 < N < loop_count
loop_count % N != 0
prof_end
put_signal
put_signal_nbi
signal_base + warmup + loop_count - 1
write_notify
src/device/gm2gm/engine/shmem_device_udma.hpp
bash scripts/build.sh -examples -soc_type Ascend950
./run.sh -t put -d float --exponent-range 8 17 --loop-count 1000
-t bi_put / get / bi_get / put_signal
--metric lat -t put
--batch 1 / --batch 16 / --batch 1000
bw
--batch -1
--batch abc
-t get
aclshmemi_kernel_abort
examples/shmem_perftest/udma_perftest/README.md
examples/shmem_perftest/README.md
mte_perftest/README.md
ascendc_perftest/README.md
autodev/issue-280
nino888/shmem
cann/shmem
examples/dispatch_gmm_combine/include/dispatch_gmm_combine.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_init_routing_quant_v2.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_common.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_expert_token_out.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_fullload_dynamic_quant.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_fullload_quant.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_fullload_quant_base.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_gather_dynamic_quant.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_gather_quant.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_mrgsort.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_mrgsort_out.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_sort_base.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_sort_multi_core.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_sort_one_core.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_src_to_dst_and_gather.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_src_to_dst_op.h
examples/dispatch_gmm_combine/include/moe_init_routing_quant_v2/moe_v2_src_to_dst_with_capacity.h
examples/dispatch_gmm_combine/include/moe_token_unpermute.h
examples/dispatch_gmm_combine/include/select_helper.h
examples/dispatch_gmm_combine/include/sync_util.h
examples/dynamic_tiling/impl/kernel/allgather_matmul.h
examples/dynamic_tiling/impl/kernel/allgather_matmul_padding.h
examples/dynamic_tiling/impl/kernel/allgather_matmul_with_gather_result.h
examples/dynamic_tiling/impl/kernel/matmul_allreduce.h
examples/dynamic_tiling/impl/kernel/matmul_reduce_scatter.h
examples/dynamic_tiling/impl/kernel/matmul_reduce_scatter_padding_a.h
examples/dynamic_tiling/impl/kernel/matmul_reduce_scatter_padding_ab.h
examples/dynamic_tiling/impl/kernel/matmul_reduce_scatter_padding_b.h
examples/matmul_allreduce/epilogue/block/epilogue_allreduce.hpp
src/device/gm2gm/shmemi_device_rma.cpp
src/host/bootstrap/shmemi_bootstrap_config_store.cpp
src/host/data_plane/shmem_host_rma.cpp
src/host/entity/mem_entity_default.cpp
src/host/entity/mem_entity_entry.cpp
src/host/init/shmem_init.cpp
src/host/mem/heap/hybm_vmm_based_segment.cpp
src/host/mem/shmem_rma.cpp
src/host/team/shmem_team.cpp
src/host/transport/transport_manager.cpp
echo 'TODO: replace with real tests, e.g. pytest -q'