"""
Test case for loop_unroll variable scope bugfix.
This test verifies that variables defined outside loop_unroll can be correctly
accessed inside nested loops without being prematurely deleted. This addresses
a bug where liveness analysis incorrectly marked variables for deletion after
inner loop exits, causing NameError when loop_unroll splits the loop into
multiple blocks.
"""
import os
import pypto
import torch
import numpy as np
from numpy.testing import assert_allclose
def test_loop_unroll_variable_scope():
"""Test that variables defined outside loop_unroll are accessible in all unroll blocks.
This test verifies the bugfix for liveness analysis incorrectly marking variables
for deletion after inner loop exits. The variable should only be deleted after
the outermost loop_unroll completes.
"""
device_id = int(os.environ.get('TILE_FWK_DEVICE_ID', 0))
torch.npu.set_device(device_id)
torch.manual_seed(42)
bs = 32
ne = 16
@pypto.frontend.jit(
runtime_options={"run_mode": pypto.RunMode.NPU}
)
def loop_unroll_kernel(
input_tensor: pypto.Tensor([pypto.STATIC, pypto.STATIC], pypto.DT_FP32),
bias_input: pypto.Tensor([pypto.STATIC], pypto.DT_FP32),
output: pypto.Tensor([pypto.STATIC, pypto.STATIC], pypto.DT_FP32)
):
pypto.set_vec_tile_shapes(16, 16)
bias_2d = pypto.reshape(bias_input, [1, input_tensor.shape[1]], inplace=True)
for bs_idx, tile_batch in pypto.loop_unroll(
0, input_tensor.shape[0], 1,
name="LOOP_UNROLL_TEST",
idx_name="bs_idx",
unroll_list=[2, 1]
):
tile_input = input_tensor[bs_idx:bs_idx + tile_batch, :]
tile_bias = pypto.tensor([tile_batch, input_tensor.shape[1]], bias_2d.dtype, "tile_bias")
for tmp_idx in pypto.loop(tile_batch):
pypto.assemble(bias_2d, [tmp_idx, 0], tile_bias)
tile_result = pypto.add(tile_input, tile_bias)
output[bs_idx:bs_idx + tile_batch, :] = tile_result
input_tensor = torch.randn((bs, ne), dtype=torch.float32, device=device_id)
bias_input = torch.randn((ne,), dtype=torch.float32, device=device_id)
output = torch.zeros((bs, ne), dtype=torch.float32, device=device_id)
expected = input_tensor + bias_input.unsqueeze(0)
loop_unroll_kernel(input_tensor, bias_input, output)
assert_allclose(
output.cpu().float().numpy(),
expected.cpu().float().numpy(),
rtol=1e-3,
atol=1e-3
)