import torch
import numpy as np
import torch_npu
from torch_npu.testing.testcase import TestCase, run_tests
from torch_npu.testing.common_utils import create_common_tensor
class TestAtan2(TestCase):
def cpu_op_exec(self, input1, input2):
output = torch.atan2(input1, input2)
output = output.numpy()
return output
def npu_op_exec(self, input1, input2):
output = torch.atan2(input1, input2)
output = output.to("cpu")
output = output.numpy()
return output
def npu_op_exec_out(self, input1, input2, out):
torch.atan2(input1, input2, out=out)
output = out.to("cpu")
output = output.numpy()
return output
def test_atan2_common_shape_format(self):
shape_format = [
[[np.float16, 0, [4, 12, 12, 128]], [np.float16, 0, [4]]],
[[np.float16, 0, [4, 128]], [np.float16, 0, [4, 256, 12]]],
[[np.float32, 0, [4, 12, 12, 128]], [np.float32, 0, [4]]],
[[np.float32, 0, [4, 128]], [np.float32, 0, [4, 256, 12]]],
[[np.float16, 2, [4, 12, 12, 128]], [np.float16, 0, [4]]],
[[np.float16, 3, [4, 128]], [np.float16, 0, [4, 256, 12]]],
[[np.float32, 2, [4, 12, 12, 128]], [np.float32, 0, [4]]],
[[np.float32, 3, [4, 128]], [np.float32, 0, [4, 256, 12]]],
]
for item in shape_format:
cpu_input1, npu_input1 = create_common_tensor(item[0], -1, 1)
cpu_input2, npu_input2 = create_common_tensor(item[0], -1, 1)
cpu_out, npu_out = create_common_tensor(item[1], -1, 1)
if cpu_input1.dtype == torch.float16:
cpu_input1 = cpu_input1.to(torch.float32)
if cpu_input2.dtype == torch.float16:
cpu_input2 = cpu_input2.to(torch.float32)
cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2)
npu_output = self.npu_op_exec(npu_input1, npu_input2)
npu_output_out = self.npu_op_exec_out(npu_input1, npu_input2, npu_out)
cpu_output = cpu_output.astype(npu_output.dtype)
self.assertRtolEqual(cpu_output, npu_output)
self.assertRtolEqual(cpu_output, npu_output_out)
def test_atan2_mix_dtype(self):
npu_input1, npu_input2 = create_common_tensor([np.float32, 0, (2, 3)], 1, 100)
npu_input3, npu_input4 = create_common_tensor([np.float16, 0, (2, 3)], 1, 100)
cpu_output = self.cpu_op_exec(npu_input1, npu_input3)
npu_output = self.npu_op_exec(npu_input2, npu_input4)
self.assertRtolEqual(cpu_output, npu_output)
if __name__ == "__main__":
run_tests()