seed: 0
output_dir: './output'
load_checkpoint: ''
use_parallel: True
run_mode: 'predict'
use_legacy: False
load_ckpt_format: 'safetensors'
trainer:
type: CausalLanguageModelingTrainer
parallel_config:
data_parallel: 1
model_parallel: 2
pretrained_model_dir: '/path/hf_dir'
model:
model_config:
seq_length: 32768
compute_dtype: "bfloat16"
layernorm_compute_dtype: "float32"
softmax_compute_dtype: "float32"
rotary_dtype: "float32"
params_dtype: "bfloat16"
block_size: 128
num_blocks: 1024
generation_config:
do_sample: False
max_length: 32768
max_new_tokens: 512
bos_token_id: 1
eos_token_id: [2]
pad_token_id: 3
top_k: 1
top_p: 1.0
repetition_penalty: 1.05
temperature: 1.0
context:
mode: 0
enable_graph_kernel: False
ascend_config:
precision_mode: "must_keep_origin_dtype"
max_device_memory: "59GB"
save_graphs: False
save_graphs_path: "./graph"
parallel:
parallel_mode: "MANUAL_PARALLEL"