model:
base: "LiquidAI/LFM2.5-1.2B-Instruct"
output: "outputs/sft-lfm2"
trust_remote_code: true
dataset:
name: "data/train-lfm2/"
text_field: "text"
split: "train"
eval_split: 0.1
training:
epochs: 5
batch_size: 4
gradient_accumulation_steps: 4
learning_rate: 2e-4
max_length: 512
warmup_ratio: 0.03
lr_scheduler: "cosine"
lora:
rank: 16
alpha: 32
dropout: 0.0
target_modules:
- "conv.in_proj"
- "conv.out_proj"
- "q_proj"
- "k_proj"
- "v_proj"
- "out_proj"
- "feed_forward.w1"
- "feed_forward.w2"
- "feed_forward.w3"
generation:
temperature: 0.1
top_k: 50
top_p: 0.1
repetition_penalty: 1.05
gguf: false
tracking:
project: "qmd-query-expansion"
run_name: "sft-lfm2-1.2B"