"""
Test PReLU operation on board
"""
import os
import math
import torch
import pypto
import pytest
from numpy.testing import assert_allclose
import torch_npu
def test_prelu_onboard():
device_id = int(os.environ.get('TILE_FWK_DEVICE_ID', 0))
torch.npu.set_device(device_id)
shape = (72, 71)
view_shape = (32, 32)
tile_shape = (32, 32)
pypto.runtime._device_init()
input1 = pypto.tensor(shape, pypto.DT_FP32, "PTO_TENSOR_input1")
weight = pypto.tensor((shape[1],), pypto.DT_FP32, "PTO_TENSOR_weight")
output = pypto.tensor(shape, pypto.DT_FP32, "PTO_TENSOR_output")
b_loop_num = math.ceil(shape[0] / view_shape[0])
s_loop_num = math.ceil(shape[1] / view_shape[1])
with pypto.function("MAIN", input1, weight, output):
for b_idx in pypto.loop(b_loop_num, name="b0", idx_name="bidx"):
for s_idx in pypto.loop(s_loop_num, name="s0", idx_name="sidx"):
offset_x = b_idx * view_shape[0]
offset_y = s_idx * view_shape[1]
valid_shape_x = pypto.min(pypto.symbolic_scalar(shape[0]) - offset_x,
pypto.symbolic_scalar(view_shape[0]))
valid_shape_y = pypto.min(pypto.symbolic_scalar(shape[1]) - offset_y,
pypto.symbolic_scalar(view_shape[1]))
view_tensor_a = pypto.view(input1, view_shape,
[offset_x, offset_y],
valid_shape=[valid_shape_x, valid_shape_y])
weight_valid_shape = valid_shape_y
view_tensor_weight = pypto.view(weight, (view_shape[1],),
[offset_y, ],
valid_shape=[weight_valid_shape, ])
pypto.set_vec_tile_shapes(tile_shape[0], tile_shape[1])
res = pypto.prelu(view_tensor_a, view_tensor_weight)
pypto.assemble(res, [offset_x, offset_y], output)
assert isinstance(output, pypto.tensor)
a_tensor = torch.randn(size=[shape[0], shape[1]], dtype=torch.float32)
weight_tensor = torch.randn(size=[shape[1], ], dtype=torch.float32)
out_tensor = torch.zeros(shape[0], shape[1], dtype=torch.float32)
pto_a_tensor = pypto.from_torch(a_tensor, "PTO_TENSOR_input1")
pto_weight_tensor = pypto.from_torch(weight_tensor, "PTO_TENSOR_weight")
pto_out_tensor = pypto.from_torch(out_tensor, "PTO_TENSOR_output")
pypto.runtime._device_run_once_data_from_host(pto_a_tensor, pto_weight_tensor, pto_out_tensor)
golden = torch.where(a_tensor >= 0, a_tensor, weight_tensor * a_tensor)
assert_allclose(out_tensor.flatten(), golden.flatten(), rtol=1e-3, atol=1e-3)
pypto.runtime._device_fini()