defaults:
- model:
- qwen25_7b
megatron_training:
model: qwen25_7b
use_fused_rmsnorm: true
use_mcore_models: true
sequence_parallel: true
use_flash_attn: true
no_masked_softmax_fusion: true
attention_softmax_in_fp32: true
no_gradient_accumulation_fusion: true
use_fused_swiglu: true
use_fused_rotary_pos_emb: true
bf16: true
use_distributed_optimizer: true
tokenizer_type: PretrainedFromHF
tokenizer_name_or_path: ./multiturn-sft-qwen-2.5-7b-instruct/global_step_372_huggingface
global_batch_size: 32
seq_length: 2048
save_interval: 100
train_iters: 1000
stage: ray_dapo
attention_dropout: 0.0
init_method_std: 0.01
hidden_dropout: 0.0
distributed_backend: nccl
no_shared_storage: true
dataset_additional_keys: ['labels',]
data_path: ./data
split: 100,0,0
no_shuffle: true
full_shuffle_instruction_dataset: false
actor_config:
model: qwen25_7b
micro_batch_size: 1
tensor_model_parallel_size: 4
pipeline_model_parallel_size: 1
lr: 1e-6
lr_decay_style: constant
min_lr: 0
weight_decay: 0.01
lr_warmup_fraction: 0.0
clip_grad: 1.0
adam_beta1: 0.9
adam_beta2: 0.95
finetune: true
load: ./multiturn-sft-qwen-2.5-7b-instruct-tp4pp1
save: ./ckpt
no_load_optim: true
no_load_rng: true
no_save_optim: false
no_save_rng: false
rl_config:
guarantee_order: true
use_integrated_worker: true
blocking: true
actor_forward_micro_batch_size: 1
ref_forward_micro_batch_size: 1
gamma: 1.0
lam: 0.95
adv_estimator: group_norm
mini_batch_size: 32
max_prompt_length: 2048
epochs: 1
clip_ratio: 0.2
entropy_coeff: 0.0
shuffle_mini_batch: false
n_samples_per_prompt: 8
rule_reward: true
verifier_function: ["retool_reward"]
verifier_weight: [1.0]
num_cpus_for_local_task: 1.0
use_tensorboard: true
token_level_loss: true
clip_higher_enable: true
clip_ratio_low: 0.2
clip_ratio_high: 0.28
overlong_buffer_enable: false
rollout_max_tokens : 2048
overlong_buffer: 512
overlong_buffer_penalty_factor: 1.0
filter_groups_enable: false
filter_groups_metric: acc_for_dapo
filter_groups_max_batches: -1
filter_groups_train_batch_size: 32
multi_turn_enable: true
tool_config_path: ./configs/tools/retool_config.yaml
max_tool_calls: 1
max_parallel_calls: 1
max_total_response_length: 2048
max_tool_response_length: 256
tool_response_truncate_side: 'middle'
tool_parser_format: 'hermes'
async_engine: true
actor_resource:
num_npus: 8
generate_config:
enforce_eager: true
trust_remote_code: true
offload_train_optimizer: true
offload_train_grad: true
offload_train_param: true
infer_tensor_parallel_size: 1
infer_pipeline_parallel_size: 1
infer_expert_parallel_size: 1
max_num_seqs: 1024
max_model_len: 4096
max_num_batched_tokens: 8192
dtype: "bfloat16"
gpu_memory_utilization: 0.4
sampling_config:
logprobs: 1
max_tokens: 2048
top_p: 1.0
top_k: -1
min_p: 0.0
temperature: 1.0
detokenize: false