文件最后提交记录最后更新时间
[1/N] Refactor nightly test structure (#5479) ### What this PR does / why we need it? This patch is a series of refactoring actions, including clarifying the directory structure of nightly tests, refactoring the config retrieval logic, and optimizing the workflow, etc. This is the first step: refactoring the directory structure of nightly to make it more readable and logical. - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/5326c89803566a131c928f7fdd2100b75c981a42 Signed-off-by: wangli <wangli858794774@gmail.com>4 个月前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[BugFix][310p] Align 310P GDN state semantics with vLLM (#7902) ### What this PR does / why we need it? - I found that the FLA state semantics are not aligned between vllm-ascend and vLLM: in vLLM, the state layout is [N, HV, V, K], while in vllm-ascend it is [N, HV, K, V]. The early 310P implementation was aligned with the mainline vllm-ascend behavior, but the operator team now plans to provide a ` fused recurrent-gated-delta-rule` operator whose semantics will align with vLLM. Therefore, this needs to be adjusted in advance. - I also found that L2 normalization can use the operator provided in functional, instead of being fully implemented through manually stitched small ops as it is now. ### Does this PR introduce _any_ user-facing change? NA ### How was this patch tested? ut and the e2e test - vLLM version: v0.18.0 - vLLM main: https://github.com/vllm-project/vllm/commit/35141a7eeda941a60ad5a4956670c60fd5a77029 --------- Signed-off-by: Tflowers-0129 <2906339855@qq.com>1 个月前
[Feature] Optimize host-device sync problem in prefill phase for Qwen3Next/Qwen3.5 (#7967) ### What this PR does / why we need it? This PR optimizes the host-device sync problem in prefill phase for Qwen3Next/Qwen3.5 on Ascend. Backgound: - The original implementation clears the SSM state in non-spec prefill with an inefficient aten::index_put_ operation, causing severe host-bound problem. - Logically, this is an index_fill_ operation, but it is not yet natively supported by Ascend. Changes in this PR: - Introduce a simple Triton-implemented kernel clear_ssm_states to tackle the problem. ------ - vLLM version: v0.18.0 - vLLM main: https://github.com/vllm-project/vllm/commit/35141a7eeda941a60ad5a4956670c60fd5a77029 Signed-off-by: AyiStar <ayistar@outlook.com>1 个月前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[310p] Add a PyTorch implementation of the GDN gating operator on 310P (#7430) ### What this PR does / why we need it? RFC #7394 Add a PyTorch implementation of the GDN gating operator on 310P. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? UT - vLLM version: v0.17.0 - vLLM main: https://github.com/vllm-project/vllm/commit/4497431df654e46fb1fb5e64bf8611e762ae5d87 Signed-off-by: Tflowers-0129 <2906339855@qq.com>2 个月前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[BugFix][310p] Align 310P GDN state semantics with vLLM (#7902) ### What this PR does / why we need it? - I found that the FLA state semantics are not aligned between vllm-ascend and vLLM: in vLLM, the state layout is [N, HV, V, K], while in vllm-ascend it is [N, HV, K, V]. The early 310P implementation was aligned with the mainline vllm-ascend behavior, but the operator team now plans to provide a ` fused recurrent-gated-delta-rule` operator whose semantics will align with vLLM. Therefore, this needs to be adjusted in advance. - I also found that L2 normalization can use the operator provided in functional, instead of being fully implemented through manually stitched small ops as it is now. ### Does this PR introduce _any_ user-facing change? NA ### How was this patch tested? ut and the e2e test - vLLM version: v0.18.0 - vLLM main: https://github.com/vllm-project/vllm/commit/35141a7eeda941a60ad5a4956670c60fd5a77029 --------- Signed-off-by: Tflowers-0129 <2906339855@qq.com>1 个月前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前
[CI]Style: Convert test/ to ruff format(Batch #3) (#6744) ### What this PR does / why we need it? | File Path | | :--- | | tests/e2e/nightly/single_node/models/scripts/test_single_node.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py | | tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py | | tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 --------- Signed-off-by: MrZ20 <2609716663@qq.com>10 天前