export NPU_VISIBLE_DEVICES=0,1,2,3,5,6,7export NGPUS=8export OMP_NUM_THREADS=4 # you can change this value according to your number of cpu corespython -m torch.distributed.launch --nproc_per_node=$NGPUS train.py configs/culane.py# python train.py configs/tusimple.py