model: /path/MindSpeed-MM/configs/model/qwen2.5vl_7b.json
megatron_training:
model: qwen2.5vl_7b
use_fused_rmsnorm: false
normalization: RMSNorm
use_mcore_models: true
sequence_parallel: false
use_flash_attn: true
no_masked_softmax_fusion: true
attention_softmax_in_fp32: true
no_gradient_accumulation_fusion: true
use_fused_swiglu: false
swiglu: true
use_fused_rotary_pos_emb: false
position_embedding_type: rope
bf16: true
use_distributed_optimizer: true
tokenizer_type: PretrainedFromHF
tokenizer_name_or_path: /path/Qwen2.5-VL-7B-Instruct
global_batch_size: 512
seq_length: 1024
save_interval: 50
train_iters: 1000
stage: ray_dapo
attention_dropout: 0.0
init_method_std: 0.01
hidden_dropout: 0.0
distributed_backend: nccl
no_shared_storage: true
dataset_additional_keys: ['input_ids', 'input_ids_length', 'attention_mask', 'position_ids']
data_path: /path/dataset/rl_data
split: 100,0,0
no_shuffle: true
seed: 1234
npu_deterministic: false
actor_config:
model: qwen2.5vl_7b
micro_batch_size: 1
tensor_model_parallel_size: 4
pipeline_model_parallel_size: 1
lr: 1e-6
lr_decay_style: constant
min_lr: 0.0
weight_decay: 0.01
lr_warmup_fraction: 0.0
clip_grad: 1.0
adam_beta1: 0.9
adam_beta2: 0.999
finetune: true
load: /path/qwen2_5_vl_7b_tp4pp1
save: ./ckpt
no_load_optim: true
no_load_rng: true
num_workers: 8
rl_config:
runtime_env_path: MindSpeed-MM/examples/rl/envs/runtime_env.yaml
is_multimodal: true
guarantee_order: true
use_integrated_worker: true
blocking: true
gamma: 1.0
lam: 0.95
use_dynamic_bsz: false
ref_max_packing_token_size: 55000
actor_max_packing_token_size: 55000
update_max_packing_token_size: 8000
reward_dispatch_size: 5
adv_dispatch_size: 5
actor_rollout_dispatch_size: 320
actor_update_dispatch_size: 320
actor_logprob_dispatch_size: 320
adv_estimator: group_norm
kl_penalty: low_var_kl
kl_ctrl_type: fixed
init_kl_coef: 0
mini_batch_size: 32
max_prompt_length: 1024
epochs: 1
clip_ratio: 0.2
entropy_coeff: 0
shuffle_mini_batch: false
n_samples_per_prompt: 5
rule_reward: true
verifier_function: ["acc_for_dapo"]
verifier_weight: [1.0]
num_cpus_for_local_task: 1.0
use_tensorboard: false
token_level_loss: true
clip_higher_enable: true
clip_ratio_low: 0.2
clip_ratio_high: 0.28
overlong_buffer_enable: true
rollout_max_tokens: 2048
overlong_buffer: 512
overlong_buffer_penalty_factor: 1.0
filter_groups_enable: true
filter_groups_metric: acc_for_dapo
filter_groups_max_batches: -1
filter_groups_train_batch_size: 512
actor_resource:
num_npus: 8
generate_config:
enforce_eager: true
trust_remote_code: false
offload_train_optimizer: true
offload_train_grad: true
offload_train_param: true
infer_tensor_parallel_size: 1
infer_pipeline_parallel_size: 1
infer_expert_parallel_size: 1
max_num_seqs: 64
max_model_len: 3072
dtype: "bfloat16"
gpu_memory_utilization: 0.8
sampling_config:
logprobs: 0
max_tokens: 2048
top_p: 1.0
top_k: -1
min_p: 0.0
temperature: 1.0
detokenize: false
seed: 1234