#!/bin/bash
Network="DCGAN"
batch_size=64
export RANK_SIZE=1
checkpoint_path=""
train_epochs=20
device_id=0
workers=8
base_lr=0.0002
for para in $*
do
if [[ $para == --device_id* ]];then
device_id=`echo ${para#*=}`
elif [[ $para == --checkpoint_path* ]];then
checkpoint_path=`echo ${para#*=}`
fi
done
if [[ $checkpoint_path == "" ]];then
echo "[Error] para \"checkpoint_path\" must be confing"
exit 1
fi
if [ $ASCEND_DEVICE_ID ];then
echo "device id is ${ASCEND_DEVICE_ID}"
elif [ ${device_id} ];then
export ASCEND_DEVICE_ID=${device_id}
echo "device id is ${ASCEND_DEVICE_ID}"
else
"[Error] device id must be config"
exit 1
fi
cur_path=`pwd`
cur_path_last_dirname=${cur_path##*/}
if [ x"${cur_path_last_dirname}" == x"test" ];then
test_path_dir=${cur_path}
cd ..
cur_path=`pwd`
else
test_path_dir=${cur_path}/test
fi
if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
else
mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
fi
start_time=$(date +%s)
check_etp_flag=`env | grep etp_running_flag`
etp_flag=`echo ${check_etp_flag#*=}`
if [ x"${etp_flag}" != x"true" ];then
source ${test_path_dir}/env_npu.sh
fi
python3 ./main.py \
--data "" \
--addr=$(hostname -I |awk '{print $1}') \
--batch-size=${batch_size} \
--n-epochs ${train_epochs} \
--lr=${base_lr} \
--n-cpu ${workers} \
--world-size=1 \
--print-freq=1 \
--gpu=0 \
--rank 0 \
--amp \
--dist-backend 'hccl' \
--opt-level O2 \
--device-list '0' \
--loss-scale 1024 \
--evaluate \
--device="npu" \
--checkpoint-path ${checkpoint_path} \
> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
wait
end_time=$(date +%s)
e2e_time=$(( $end_time - $start_time ))
echo "------------------ Final result ------------------"
FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $11}'|awk 'END {print}'`
echo "Final Performance images/sec : $FPS"
echo "GAN model : no acc output"
echo "E2E Training Duration sec : $e2e_time"
BatchSize=${batch_size}
DeviceType=`uname -m`
CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'eval'
ActualFPS=${FPS}
TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "BatchSize = no batchsize in eval stage" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "TrainAccuracy = no acc for dcgan" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "ActualLoss = no Loss in eval stage" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log