{
"model_id": "llava",
"text_decoder": {
"num_layers": 32,
"hidden_size": 4096,
"num_attention_heads": 32,
"num_query_groups": 32,
"ffn_hidden_size": 11008,
"add_bias_linear": false,
"bias_activation_fusion": false,
"gated_linear_unit": true,
"apply_query_key_layer_scaling":false,
"layernorm_zero_centered_gamma": false,
"max_position_embeddings": 4096,
"bias_dropout_fusion":false,
"apply_rope_fusion": false,
"attention_softmax_in_fp32": true,
"attention_dropout": 0.0,
"hidden_dropout": 0.0,
"bf16": true,
"params_dtype": "bf16",
"deallocate_pipeline_outputs": true,
"persist_layer_norm": true,
"activation_func": "silu",
"normalization": "RMSNorm",
"language_vocab_size": 32000,
"language_max_sequence_length": 4096,
"lm_position_embedding_type": "rope",
"recompute_granularity": "full",
"recompute_method": "block",
"recompute_num_layers": 32,
"freeze": true,
"ckpt_path": "/<your_vicuna_weights_path>/converted_vicuna.pt"
},
"image_encoder": {
"vision_encoder": {
"model_id": "clip",
"num_layers": 23,
"hidden_size": 1024,
"num_attention_heads": 16,
"num_query_groups": 16,
"ffn_hidden_size": 4096,
"post_layer_norm": false,
"add_bias_linear": true,
"add_qkv_bias": true,
"hidden_dropout": 0.0,
"attention_dropout": 0.0,
"bf16": true,
"params_dtype": "bf16",
"gated_linear_unit": false,
"kv_channels": 64,
"layernorm_zero_centered_gamma": false,
"bias_activation_fusion": false,
"bias_dropout_fusion": false,
"attention_softmax_in_fp32": true,
"normalization": "LayerNorm",
"apply_rope_fusion": false,
"activation_func": "quick_gelu",
"device": "npu",
"add_class_token": true,
"class_token_len": 1,
"patch_size": 14,
"image_size": 336,
"freeze": true,
"ckpt_path": "/<your_clip_weights_path>/converted_clip.pt"
},
"vision_projector": {
"model_id": "mlp",
"num_layers": 2,
"gated_linear_unit": false,
"bias_activation_fusion": false,
"add_bias_linear": true,
"input_size": 1024,
"hidden_size": 4096,
"ffn_hidden_size": 4096,
"activation_func": "gelu",
"bf16": true,
"params_dtype": "bf16",
"freeze": false,
"ckpt_path": null
}
},
"text_encoder": null,
"video_encoder": null
}