"""
SFT training for QMD query expansion with LiquidAI LFM2-1.2B.
LFM2 is a hybrid architecture optimized for edge/on-device inference.
Uses different LoRA target modules than standard transformers.
Self-contained script for HuggingFace Jobs:
hf jobs uv run --flavor a10g-large --secrets HF_TOKEN --timeout 2h jobs/sft_lfm2.py
"""
import os
from huggingface_hub import login
BASE_MODEL = "LiquidAI/LFM2-1.2B"
OUTPUT_MODEL = "tobil/qmd-query-expansion-lfm2-sft"
DATASET = "tobil/qmd-query-expansion-train"
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
login(token=hf_token)
from datasets import load_dataset
from peft import LoraConfig
from transformers import AutoTokenizer
from trl import SFTTrainer, SFTConfig
print(f"Loading dataset: {DATASET}...")
dataset = load_dataset(DATASET, split="train")
print(f"Dataset loaded: {len(dataset)} examples")
split = dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = split["train"]
eval_dataset = split["test"]
print(f" Train: {len(train_dataset)}, Eval: {len(eval_dataset)}")
config = SFTConfig(
output_dir="qmd-query-expansion-lfm2-sft",
push_to_hub=True,
hub_model_id=OUTPUT_MODEL,
hub_strategy="every_save",
num_train_epochs=5,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=2e-4,
max_length=512,
logging_steps=10,
save_strategy="steps",
save_steps=200,
save_total_limit=2,
eval_strategy="steps",
eval_steps=200,
warmup_ratio=0.03,
lr_scheduler_type="cosine",
bf16=True,
report_to="none",
)
peft_config = LoraConfig(
r=16,
lora_alpha=32,
lora_dropout=0.0,
bias="none",
task_type="CAUSAL_LM",
target_modules=["q_proj", "k_proj", "v_proj", "out_proj", "in_proj", "w1", "w2", "w3"],
)
print("Initializing SFT trainer...")
trainer = SFTTrainer(
model=BASE_MODEL,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
args=config,
peft_config=peft_config,
)
print("Starting SFT training (LFM2-1.2B)...")
trainer.train()
print("Pushing to Hub...")
trainer.push_to_hub()
print(f"Done! Model: https://huggingface.co/{OUTPUT_MODEL}")