75b08437创建于 2025年10月25日历史提交
{
    "architectures": [
        "NemotronH_Nano_VL_V2"
    ],
    "auto_map": {
        "AutoConfig": "configuration.NemotronH_Nano_VL_V2_Config",
        "AutoModel": "modeling.NemotronH_Nano_VL_V2",
        "AutoModelForCausalLM": "modeling.NemotronH_Nano_VL_V2"
    },
    "downsample_ratio": 0.5,
    "eos_token_id": 12,
    "force_image_size": 512,
    "image_tag_type": "internvl",
    "img_context_token": "<image>",
    "img_context_token_id": 131072,
    "img_end_token": "</img>",
    "img_start_token": "<img>",
    "llm_config": {
        "architectures": [
            "NemotronHForCausalLM"
        ],
        "attention_bias": false,
        "attention_dropout": 0.0,
        "attention_head_dim": 128,
        "auto_map": {
            "AutoConfig": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-Base--configuration_nemotron_h.NemotronHConfig",
            "AutoModelForCausalLM": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-Base--modeling_nemotron_h.NemotronHForCausalLM"
        },
        "chunk_size": 128,
        "conv_kernel": 4,
        "eos_token_id": 12,
        "expand": 2,
        "head_dim": 128,
        "hidden_dropout": 0.0,
        "hidden_size": 5120,
        "hybrid_override_pattern": "M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M-",
        "initializer_range": 0.02,
        "intermediate_size": 20480,
        "layer_norm_epsilon": 1e-05,
        "mamba_head_dim": 80,
        "mamba_hidden_act": "silu",
        "mamba_num_heads": 128,
        "mamba_proj_bias": false,
        "max_position_embeddings": 131072,
        "mlp_bias": false,
        "mlp_hidden_act": "relu2",
        "model_type": "nemotron_h",
        "n_groups": 8,
        "num_attention_heads": 40,
        "num_hidden_layers": 62,
        "num_key_value_heads": 8,
        "num_logits_to_keep": 1,
        "rescale_prenorm_residual": true,
        "residual_in_fp32": false,
        "rms_norm_eps": 1e-05,
        "sliding_window": null,
        "ssm_state_size": 128,
        "time_step_floor": 0.0001,
        "time_step_limit": [
            0.0,
            Infinity
        ],
        "time_step_max": 0.1,
        "time_step_min": 0.001,
        "time_step_rank": 256,
        "torch_dtype": "bfloat16",
        "use_bias": false,
        "use_cache": true,
        "use_conv_bias": true,
        "use_mamba_kernels": true,
        "vocab_size": 132096
    },
    "max_sequence_length": 131072,
    "model_type": "NemotronH_Nano_VL_V2",
    "norm_mean": [
        0.48145466,
        0.4578275,
        0.40821073
    ],
    "norm_std": [
        0.26862954,
        0.26130258,
        0.27577711
    ],
    "patch_size": 16,
    "projector_hidden_size": 20480,
    "ps_version": "v2",
    "template": "n5h_5p5_nanov2",
    "torch_dtype": "bfloat16",
    "transformers_version": "4.53.3",
    "use_thumbnail": true,
    "video_context_token": "<video>",
    "video_context_token_id": 131081,
    "video_pruning_rate": 0.7,
    "vision_config": {
        "adaptor_configs": {},
        "adaptor_names": null,
        "architectures": [
            "RADIOModel"
        ],
        "args": {
            "aa": null,
            "amp": true,
            "amp_dtype": "bfloat16",
            "amp_impl": "native",
            "aug_repeats": 0,
            "aug_splits": 0,
            "bn_eps": null,
            "bn_momentum": null,
            "cache_dir": null,
            "channels_last": false,
            "checkpoint_hist": 10,
            "chk_keep_forever": 100,
            "class_map": "",
            "clip_grad": null,
            "clip_mode": "norm",
            "cls_token_per_teacher": true,
            "coco_annotations_file": "/datasets/coco2017-adlsa/annotations/captions_val2017.json",
            "coco_image_dir": "/datasets/coco2017-adlsa/val2017",
            "color_jitter": 0.4,
            "cooldown_epochs": 0,
            "cpe_max_size": 2048,
            "crd_loss": false,
            "crd_loss_weight": 0.8,
            "crop_pct": null,
            "cutmix": 0.0,
            "cutmix_minmax": null,
            "dataset_download": false,
            "debug_full_knn": false,
            "decay_epochs": 90,
            "decay_milestones": [
                90,
                180,
                270
            ],
            "decay_rate": 0.1,
            "depchain": true,
            "dist_bn": "reduce",
            "dist_norm_weight": 0.0,
            "distributed": true,
            "drop": 0.0,
            "drop_block": null,
            "drop_connect": null,
            "drop_path": null,
            "dtype": "bfloat16",
            "epoch_repeats": 0.0,
            "eval": false,
            "eval_metric": "knn_top1",
            "eval_teacher": false,
            "eval_teacher_only": false,
            "eval_throughput": false,
            "fast_norm": false,
            "fd_loss_fn": "MSE",
            "feature_normalization": "SHIP_NORM",
            "feature_summarizer": "cls_token",
            "feature_upscale_factor": null,
            "force_new_wandb_id": false,
            "force_spectral_reparam": true,
            "freeze_bn": false,
            "fsdp": false,
            "fuser": "",
            "gp": null,
            "grad_accum_steps": 1,
            "grad_checkpointing": false,
            "head_init_bias": null,
            "head_init_scale": null,
            "head_warmup": 5,
            "head_weight_decay": 0.001,
            "hflip": 0.5,
            "img_size": null,
            "in_chans": null,
            "initial_checkpoint": null,
            "input_size": null,
            "interpolation": "",
            "layer_decay": null,
            "local_rank": 0,
            "log_interval": 50,
            "log_mlflow": false,
            "log_wandb": true,
            "loss_auto_balance": false,
            "lr_base": 0.1,
            "lr_base_scale": "",
            "lr_base_size": 256,
            "lr_cycle_decay": 0.5,
            "lr_cycle_limit": 1,
            "lr_cycle_mul": 1.0,
            "lr_k_decay": 1.0,
            "lr_noise": null,
            "lr_noise_pct": 0.67,
            "lr_noise_std": 1.0,
            "mean": null,
            "mesa": false,
            "min_lr": 0,
            "mixup": 0.0,
            "mixup_mode": "batch",
            "mixup_off_epoch": 0,
            "mixup_prob": 1.0,
            "mixup_switch_prob": 0.5,
            "mlp_hidden_size": 1520,
            "mlp_num_inner": 3,
            "mlp_version": "v2",
            "model": "vit_huge_patch16_224",
            "model_kwargs": {},
            "model_norm": false,
            "momentum": 0.9,
            "no_aug": false,
            "no_ddp_bb": true,
            "no_prefetcher": false,
            "no_resume_opt": false,
            "num_classes": null,
            "opt_betas": null,
            "opt_eps": null,
            "patience_epochs": 10,
            "pin_mem": false,
            "prefetcher": true,
            "pretrained": false,
            "rank": 0,
            "ratio": [
                0.75,
                1.3333333333333333
            ],
            "recount": 1,
            "recovery_interval": 0,
            "register_multiple": 16,
            "remode": "pixel",
            "reprob": 0.0,
            "reset_loss_state": false,
            "resplit": false,
            "save_images": false,
            "scale": [
                0.5,
                1.0
            ],
            "sched": "cosine",
            "seed": 42,
            "smoothing": 0.1,
            "spectral_heads": false,
            "spectral_reparam": false,
            "split_bn": false,
            "start_epoch": null,
            "std": null,
            "stream_teachers": true,
            "sync_bn": false,
            "synchronize_step": false,
            "teachers": [
                {
                    "fd_normalize": false,
                    "feature_distillation": true,
                    "input_size": 378,
                    "model": "ViT-H-14-378-quickgelu",
                    "name": "clip",
                    "pretrained": "dfn5b",
                    "type": "open_clip",
                    "use_summary": true
                },
                {
                    "fd_normalize": false,
                    "feature_distillation": true,
                    "input_size": 378,
                    "model": "ViT-SO400M-14-SigLIP-384",
                    "name": "siglip",
                    "pretrained": "webli",
                    "type": "open_clip",
                    "use_summary": true
                },
                {
                    "fd_normalize": false,
                    "feature_distillation": true,
                    "input_size": 378,
                    "model": "dinov2_vitg14_reg",
                    "name": "dino_v2",
                    "type": "dino_v2",
                    "use_summary": true
                },
                {
                    "fd_normalize": false,
                    "feature_distillation": true,
                    "input_size": 1024,
                    "model": "vit-h",
                    "name": "sam",
                    "type": "sam",
                    "use_summary": false
                }
            ],
            "torchcompile": null,
            "torchscript": false,
            "train_interpolation": "random",
            "train_split": "train",
            "tta": 0,
            "use_coco": false,
            "use_multi_epochs_loader": false,
            "val_ema_only": false,
            "val_split": "val",
            "vflip": 0.0,
            "vitdet_version": 1,
            "wandb_entity": "",
            "wandb_job_type": "",
            "wandb_name": "",
            "wandb_project": "",
            "warmup_lr": 1e-05,
            "warmup_prefix": false,
            "worker_seeding": "all",
            "workers": 8,
            "world_size": 256
        },
        "auto_map": {
            "AutoConfig": "nvidia/C-RADIOv2-H--hf_model.RADIOConfig",
            "AutoModel": "nvidia/C-RADIOv2-H--hf_model.RADIOModel"
        },
        "feature_normalizer_config": null,
        "inter_feature_normalizer_config": null,
        "max_resolution": 2048,
        "model_type": "",
        "patch_size": 16,
        "preferred_resolution": [
            768,
            768
        ],
        "torch_dtype": "bfloat16",
        "use_flash_attn": false,
        "version": "radio_v2.5-h",
        "vitdet_window_size": null
    },
    "vit_hidden_size": 1280,
    "quantization_config": {
        "config_groups": {
            "group_0": {
                "input_activations": {
                    "dynamic": false,
                    "num_bits": 8,
                    "type": "float"
                },
                "weights": {
                    "dynamic": false,
                    "num_bits": 8,
                    "type": "float"
                },
                "targets": [
                    "Linear"
                ]
            }
        },
        "ignore": [
            "model.layers.language_model.lm_head",
            "model.layers.mlp1*",
	        "model.layers.*.conv1d*",
            "model.layers.vision_model*",
            "lm_head"
        ],
        "quant_algo": "FP8",
        "producer": {
            "name": "modelopt",
            "version": "0.37.0.dev5+g76fb12d47.d20250905"
        },
        "quant_method": "modelopt"
    }
}