文件最后提交记录最后更新时间
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[Feature][Model] Switch DeepSeekV4 hc_pre to fused op (#9396) ### What this PR does / why we need it? This PR switches DeepSeekV4 hc_pre from the composite small-op path to the aclnnHcPre-backed npu_hc_pre_v2 interface. It also aligns the runtime hc_pre torch binding contract with the CANN recipe binding while leaving the Meta implementation as shape inference only, so torch compile does not trip over runtime-only checks: - x must be 3D or 4D BF16 - hc_mult / hc must be 4 - d must be 4096 or 7168 - hc_fn must be [24, hc * d] - hc_scale must be [3] - hc_base must be [24] - non-x tensors must be FP32 For Ascend950, npu_hc_pre_v2 follows the CANN recipe's batch filter and falls back to the composite path when bs > 512 and bs is not aligned to 8192. Other SoCs continue to use the fused path. Reference: https://gitcode.com/cann/cann-recipes-infer/blob/master/ops/ascendc/torch_ops_extension/custom_ops/csrc/npu_hc_pre.cpp - vLLM version: v0.20.2 - vLLM main: https://github.com/vllm-project/vllm/commit/0d4d334eaa583b9c09aa4eb7538c22db99fd84b3 Signed-off-by: maoxx241 <maomaoyu870@gmail.com>8 天前
[1/N] Refactor nightly test structure (#5479) ### What this PR does / why we need it? This patch is a series of refactoring actions, including clarifying the directory structure of nightly tests, refactoring the config retrieval logic, and optimizing the workflow, etc. This is the first step: refactoring the directory structure of nightly to make it more readable and logical. - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/5326c89803566a131c928f7fdd2100b75c981a42 Signed-off-by: wangli <wangli858794774@gmail.com>4 个月前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前