source examples/fsdp2/env_config.sh
NPUS_PER_NODE=8
MASTER_ADDR=localhost
MASTER_PORT=12322
NNODES=1
NODE_RANK=0
WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
DISTRIBUTED_ARGS="
--nproc_per_node $NPUS_PER_NODE \
--nnodes $NNODES \
--node_rank $NODE_RANK \
--master_addr $MASTER_ADDR \
--master_port $MASTER_PORT
"
torchrun $DISTRIBUTED_ARGS train_fsdp2.py ./tests/st/shell_scripts/pretrain_qwen3_8b_4K_fsdp2.yaml