{
    "pretrain_dataset": [
        {
            "params" : {
                "input-dataset": "/data/ci/datasets/origin/train-00000-of-00001-a09b74b3ef9c3b56.parquet",
                "test-out-part": "/data/ci/cache/process_dataset/test_merge_subs/",
                "base-out-part": "/data/ci/datasets/processed/base_merge_subs/",
                "test-out-merge": "/data/ci/cache/process_dataset/test_merge/",
                "base-out-merge": "/data/ci/datasets/processed/base_merge/"
            }
        }
    ],
    "test_pretrain_datasets_part1": [
        {
            "params": {
                "input": "/data/ci/datasets/origin/0001-alpaca.parquet",
                "tokenizer-type": "PretrainedFromHF",
                "output-prefix": "/data/ci/cache/process_dataset/test_merge_subs/part1", 
                "tokenizer-name-or-path": "/data/ci/models/llama2/hf/llama-2-7b-hf",
                "cache-dir": "/data/ci/cache/process_dataset/tmp/",
                "workers": 4,
                "log-interval": 1000
            }
        }
    ],
    "test_pretrain_datasets_part2": [
        {
            "params": {
                "input": "/data/ci/datasets/origin/0002-alpaca.parquet",
                "tokenizer-type": "PretrainedFromHF",
                "output-prefix": "/data/ci/cache/process_dataset/test_merge_subs/part2", 
                "tokenizer-name-or-path": "/data/ci/models/llama2/hf/llama-2-7b-hf",
                "cache-dir": "/data/ci/cache/process_dataset/tmp/",
                "workers": 4,
                "log-interval": 1000
            }
        }
    ],
    "test_merge_pretrain_datasets": [
        {
            "params": {
                "input": "/data/ci/cache/process_dataset/test_merge_subs/",
                "output-prefix": "/data/ci/cache/process_dataset/test_merge/merge",
                "merge-group-keys": "text_document"
            }
        }
    ],
    "instruction_dataset": [
        {
            "params" : {
                "input-dataset": "/data/ci/datasets/origin/train-00000-of-00001-a09b74b3ef9c3b56.parquet",
                "test-out-part": "/data/ci/cache/process_dataset/test_ins_subs/",
                "base-out-part": "/data/ci/datasets/processed/base_ins_subs/",
                "test-out-merge": "/data/ci/cache/process_dataset/test_ins_merge/",
                "base-out-merge": "/data/ci/datasets/processed/base_ins_merge/"
            }
        }
    ],
    "test_instruction_datasets_part1": [
        {
            "params": {
                "input": "/data/ci/datasets/origin/0001-alpaca.parquet",
                "tokenizer-type": "PretrainedFromHF",
                "handler-name": "GeneralInstructionHandler",
                "output-prefix": "/data/ci/cache/process_dataset/test_ins_subs/part1", 
                "tokenizer-name-or-path": "/data/ci/models/llama2/hf/llama-2-7b-hf",
                "cache-dir": "/data/ci/cache/process_dataset/tmp/",
                "workers": 4,
                "log-interval": 1000,
                "append-eod": null
            }
        }
    ],
    "test_instruction_datasets_part2": [
        {
            "params": {
                "input": "/data/ci/datasets/origin/0002-alpaca.parquet",
                "tokenizer-type": "PretrainedFromHF",
                "handler-name": "GeneralInstructionHandler",
                "output-prefix": "/data/ci/cache/process_dataset/test_ins_subs/part2", 
                "tokenizer-name-or-path": "/data/ci/models/llama2/hf/llama-2-7b-hf",
                "cache-dir": "/data/ci/cache/process_dataset/tmp/",
                "workers": 4,
                "log-interval": 1000,
                "append-eod": null
            }
        }
    ],
    "test_merge_instrction_datasets": [
        {
            "params": {
                "input": "/data/ci/cache/process_dataset/test_ins_subs/",
                "output-prefix": "/data/ci/cache/process_dataset/test_ins_merge/merge",
                "merge-group-keys": ["packed_attention_mask_document", "packed_input_ids_document", "packed_labels_document"]
            }
        }
    ]
}