seed: 0
output_dir: './output'
load_checkpoint: ''
use_parallel: False
run_mode: 'predict'
use_legacy: False
load_ckpt_format: 'safetensors'
trainer:
type: CausalLanguageModelingTrainer
model_name: 'glm4_moe'
parallel_config:
data_parallel: 1
model_parallel: 8
pretrained_model_dir: '/path/hf_dir'
model:
model_config:
compute_dtype: "bfloat16"
layernorm_compute_dtype: "float32"
softmax_compute_dtype: "float32"
rotary_dtype: "bfloat16"
params_dtype: "bfloat16"
context:
mode: 0
enable_graph_kernel: False
ascend_config:
precision_mode: "must_keep_origin_dtype"
max_device_memory: "59GB"
save_graphs: False
save_graphs_path: "./graph"
parallel:
parallel_mode: "MANUAL_PARALLEL"
enable_alltoall: False