import contextlib
import gc
import io
import inspect
import itertools
import math
import random
import re
import copy
import os
import tempfile
import unittest
import warnings
import types
import pickle
import textwrap
import subprocess
import weakref
import sys
from itertools import product, combinations, permutations, chain
import copyreg
from functools import partial
from multiprocessing.reduction import ForkingPickler
from typing import Tuple
import numpy as np
import torch
from url import get_url
from torch.testing._internal.two_tensor import TwoTensor
import torch_npu
import torch_npu.testing
import torch.utils.data
from torch import inf, nan
from torch import multiprocessing as mp
from torch.testing import make_tensor
from torch.testing._internal.common_optimizers import (
optim_db, optims, _get_optim_inputs_including_global_cliquey_kwargs)
from torch.testing._internal.common_utils import (
MI300_ARCH, TEST_WITH_TORCHINDUCTOR, TEST_WITH_ROCM, run_tests, IS_JETSON,
IS_FILESYSTEM_UTF8_ENCODING,
IS_SANDCASTLE, IS_FBCODE, IS_REMOTE_GPU, skipIfRocmArch, skipIfTorchInductor, load_tests, slowTest, slowTestIf,
skipIfCrossRef, TEST_WITH_CROSSREF, skipIfTorchDynamo, skipRocmIfTorchInductor, set_default_dtype,
skipCUDAMemoryLeakCheckIf, BytesIOContext,
skipIfRocm, skipIfNoSciPy, TemporaryFileName, TemporaryDirectoryName,
wrapDeterministicFlagAPITest, DeterministicGuard, CudaSyncGuard,
bytes_to_scalar, parametrize, skipIfMPS, noncontiguous_like,
AlwaysWarnTypedStorageRemoval, TEST_WITH_TORCHDYNAMO, xfailIfTorchDynamo,
xfailIfS390X, set_warn_always_context)
from torch.testing._internal.common_device_type import (
expectedFailureMeta,
expectedFailureXLA,
instantiate_device_type_tests,
onlyPRIVATEUSE1, onlyCPU,
dtypes, dtypesIfPRIVATEUSE1, dtypesIfCPU, deviceCountAtLeast,
skipMeta, PYTORCH_CUDA_MEMCHECK, largeTensorTest, onlyNativeDeviceTypes, skipCUDAIfNotRocm,
get_all_device_types, skipXLA)
import torch.backends.quantized
import torch.testing._internal.data
from torch.testing._internal.common_cuda import (
tf32_on_and_off, TEST_CUDNN,
_create_scaling_case, _create_scaling_models_optimizers)
from torch.testing._internal.common_mkldnn import reduced_f32_on_and_off
from torch.testing._internal.common_dtype import (
floating_types_and, get_all_math_dtypes, all_types_and_complex_and, complex_types,
all_types_and, floating_types, floating_and_complex_types, integral_types_and,
get_all_qint_dtypes, all_types_complex_float8_and,
)
from torch.testing._internal.common_utils import IS_WINDOWS
TEST_MULTINPU = torch.npu.is_available() and torch.npu.device_count() >= 2
if TEST_WITH_TORCHINDUCTOR:
from torch._inductor.test_case import TestCase
else:
from torch.testing._internal.common_utils import TestCase
assert torch.get_default_dtype() is torch.float32
load_tests = load_tests
DEVICE_NAME = torch_npu.npu.get_device_name(0)
device_is_910A = False
if "Ascend910A" in DEVICE_NAME or "Ascend910P" in DEVICE_NAME:
device_is_910A = True
if device_is_910A:
all_types_and_complex_and = all_types_and
@contextlib.contextmanager
def torch_vital_set(value):
stash = None
if 'TORCH_VITAL' in os.environ:
stash = os.environ['TORCH_VITAL']
os.environ['TORCH_VITAL'] = value
try:
yield
finally:
if stash:
os.environ['TORCH_VITAL'] = stash
else:
del os.environ['TORCH_VITAL']
class TestBasicVitalSigns(TestCase):
def test_basic_vitals(self):
with torch_vital_set(''):
self.assertFalse(torch.vitals_enabled())
with torch_vital_set('ON'):
self.assertTrue(torch.vitals_enabled())
def test_basic_vitals_read_write(self):
with torch_vital_set('ON'):
self.assertTrue(torch.vitals_enabled())
self.assertTrue(torch.set_vital('Dataloader', 'basic_unit_test', 'TEST_VALUE_STRING'))
self.assertIn('TEST_VALUE_STRING', torch.read_vitals())
self.assertIn('NPU.used', torch.read_vitals())
def test_dataloader_vitals(self):
with torch_vital_set('ON'):
inps = torch.arange(10 * 5, dtype=torch.float32).view(10, 5)
tgts = torch.arange(10 * 5, dtype=torch.float32).view(10, 5)
dataset = torch.utils.data.TensorDataset(inps, tgts)
loader = torch.utils.data.DataLoader(dataset, batch_size=2)
self.assertIn('Dataloader.enabled\t\t True', torch.read_vitals())
class TestVitalSignsNpu(TestCase):
@onlyPRIVATEUSE1
def test_NPU_vitals_NPU_only(self, device):
with torch_vital_set('ON'):
self.assertIn('NPU.used\t\t true', torch.read_vitals())
is_npu_sm86 = torch_npu.npu.is_available()
class TestTorchDeviceType(TestCase):
exact_dtype = True
def _rand_shape(self, dim, min_size, max_size):
shape = []
for i in range(dim):
shape.append(random.randint(min_size, max_size))
return tuple(shape)
@onlyCPU
def test_constants(self, device):
self.assertIsInstance(torch.e, float)
self.assertEqual(torch.e, math.e, atol=0, rtol=0)
self.assertIsInstance(torch.pi, float)
self.assertEqual(torch.pi, math.pi, atol=0, rtol=0)
self.assertIsInstance(torch.nan, float)
self.assertEqual(torch.nan, math.nan, equal_nan=True)
self.assertIsInstance(torch.inf, float)
self.assertEqual(torch.inf, math.inf)
@onlyNativeDeviceTypes
@slowTestIf(IS_WINDOWS)
@dtypes(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64,
torch.bool, torch.float32, torch.complex64, torch.float64,
torch.complex128, torch.uint16, torch.uint32, torch.uint64)
def test_bytes_to_scalar(self, device, dtype):
def rand_byte():
if dtype == torch.bool:
return torch.randint(0, 2, ()).item()
else:
return torch.randint(0, 256, ()).item()
element_size = torch._utils._element_size(dtype)
for i in range(10):
bytes_list = [rand_byte() for _ in range(element_size)]
scalar = bytes_to_scalar(bytes_list, dtype, device)
self.assertEqual(scalar.storage().untyped().tolist(), bytes_list)
@onlyPRIVATEUSE1
@largeTensorTest('56GB', device='npu')
@dtypes(torch.bfloat16)
@unittest.skipIf(IS_JETSON, "Large tensor tests are too large for Jetson.")
def test_int64_upsample3d(self, device, dtype):
x = torch.ones((1, 256, 16, 720, 1280), dtype=dtype, device=device)
try:
torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')
except Exception as e:
self.fail(f"Unexpected exception raised: {e}")
@dtypes(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64,
torch.bool, torch.float32, torch.complex64, torch.float64,
torch.complex128, torch.uint16, torch.uint32, torch.uint64)
@slowTestIf(IS_WINDOWS)
def test_storage(self, device, dtype):
v = make_tensor((3, 5), dtype=dtype, device=device, low=-9, high=9)
self.assertEqual(v.storage()[0], v[0][0])
self.assertEqual(v.storage()[14], v[2][4])
v_s = v.storage()
for el_num in range(v.numel()):
dim0 = el_num // v.size(1)
dim1 = el_num % v.size(1)
self.assertEqual(
v_s[el_num],
v[dim0][dim1])
v_s_byte = v.storage().untyped()
el_size = v.element_size()
for el_num in range(v.numel()):
start = el_num * el_size
end = start + el_size
dim0 = el_num // v.size(1)
dim1 = el_num % v.size(1)
self.assertEqual(
bytes_to_scalar(v_s_byte[start:end], dtype, device),
v[dim0][dim1])
@onlyNativeDeviceTypes
@dtypes(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64,
torch.bool, torch.float32, torch.complex64, torch.float64,
torch.complex128, torch.quint8, torch.qint8, torch.qint32,
torch.quint4x2)
def test_storage_setitem(self, device, dtype):
if torch.device(device).type == 'npu':
if dtype in [torch.quint8, torch.qint8, torch.qint32, torch.quint4x2]:
return
storage_type_name = torch.storage._dtype_to_storage_type_map()[dtype]
if torch.device(device).type == 'npu':
storage_type = eval('torch_npu.npu.' + storage_type_name)
else:
storage_type = eval('torch.' + storage_type_name)
N = 10
s = storage_type(N)
s[:] = 0
n_list = [0] * N
self.assertEqual(s, storage_type(n_list))
for i in range(N):
s[i] = i
n_list[i] = i
self.assertEqual(s, storage_type(n_list))
n_list[2:7] = [1] * 5
s[2:7] = 1
self.assertEqual(s, storage_type(n_list))
@skipIfTorchDynamo("Not a suitable test for TorchDynamo")
@onlyNativeDeviceTypes
@slowTestIf(IS_WINDOWS)
def test_storage_use_count(self, device):
a = torch.randn(10, device=device)
prev_cf = torch._C._storage_Use_Count(a.untyped_storage()._cdata)
self.assertEqual(prev_cf, 1)
b = a.view(2, 5)
self.assertEqual(torch._C._storage_Use_Count(b.untyped_storage()._cdata), prev_cf + 1)
@xfailIfTorchDynamo
@onlyNativeDeviceTypes
@dtypes(*(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16) if not device_is_910A
else all_types_and(torch.half, torch.bool)))
def test_tensor_storage_type(self, device, dtype):
a = make_tensor((10,), dtype=dtype, device=device, low=-9, high=9)
module = torch_npu.npu if (torch.device(device).type == 'npu') else torch
expected_storage_type = getattr(module, torch.storage._dtype_to_storage_type_map()[dtype])
self.assertEqual(a.storage_type(), expected_storage_type)
@onlyNativeDeviceTypes
@dtypes(*(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16, torch.uint16, torch.uint32,
torch.uint64) if not device_is_910A else all_types_and(torch.half, torch.bool)))
def test_tensor_from_storage(self, device, dtype):
a = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9)
a_s = a.storage()
b = torch.tensor(a_s, device=device, dtype=dtype).reshape(a.size())
self.assertEqual(a, b)
c = torch.tensor(a_s.untyped(), device=device, dtype=dtype).reshape(a.size())
self.assertEqual(a, c)
if not device_is_910A:
dtypes_ = all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)
else:
dtypes_ = all_types_and(torch.half, torch.bool)
for error_dtype in dtypes_:
if error_dtype == dtype:
continue
with self.assertRaisesRegex(RuntimeError, r'Expected a Storage of type'):
error_storage = a.to(error_dtype).storage()
torch.tensor(error_storage, device=device, dtype=dtype)
@onlyNativeDeviceTypes
@dtypes(*(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16) if not device_is_910A
else all_types_and(torch.half, torch.bool)))
def test_set_storage(self, device, dtype):
a = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9)
a_s = a.storage()
b = torch.tensor([], device=device, dtype=dtype).set_(a_s).reshape(a.size())
self.assertEqual(a, b)
c = torch.tensor([], device=device, dtype=dtype).set_(a_s.untyped()).reshape(a.size())
self.assertEqual(a, c)
if not device_is_910A:
dtypes_ = all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)
else:
dtypes_ = all_types_and(torch.half, torch.bool)
for error_dtype in dtypes_:
if error_dtype == dtype:
continue
with self.assertRaisesRegex(RuntimeError, r'Expected a Storage of type'):
error_storage = a.to(error_dtype).storage()
b = torch.tensor([], device=device, dtype=dtype).set_(error_storage)
def _check_storage_meta(self, s, s_check):
self.assertTrue(
isinstance(s, (torch.UntypedStorage, torch.TypedStorage)) and
isinstance(s_check, type(s)),
(
's and s_check must both be one of UntypedStorage or '
'TypedStorage, but got'
f' {type(s).__name__} and {type(s_check).__name__}'))
self.assertEqual(s.device.type, 'meta')
self.assertEqual(s.nbytes(), s_check.nbytes())
self.assertEqual(s.size(), s_check.size())
self.assertEqual(s.data_ptr(), 0)
with self.assertRaisesRegex(NotImplementedError, r'Not available'):
s[0]
if isinstance(s, torch.TypedStorage):
self.assertEqual(s.dtype, s_check.dtype)
self._check_storage_meta(s.untyped(), s_check.untyped())
@onlyNativeDeviceTypes
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
def test_typed_storage_meta(self, device, dtype):
args_list = [
[],
[0],
[100],
[[1, 2, 3, 4, 5, 6]],
]
for args in args_list:
s_check = torch.TypedStorage(*args, dtype=dtype, device=device)
s = torch.TypedStorage(*args, dtype=dtype, device='meta')
self._check_storage_meta(s, s_check)
@onlyNativeDeviceTypes
def test_untyped_storage_meta(self, device):
args_list = [
[],
[0],
[100],
[[1, 2, 3, 4, 5, 6]],
]
for args in args_list:
s_check = torch.UntypedStorage(*args, device=device)
s = torch.UntypedStorage(*args, device='meta')
self._check_storage_meta(s, s_check)
@onlyNativeDeviceTypes
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
def test_storage_meta_from_tensor(self, device, dtype):
t_check = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9)
t = t_check.to('meta')
s_check = t_check.storage()
s = t.storage()
self._check_storage_meta(s, s_check)
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
def test_storage_meta_errors(self, device, dtype):
s0 = torch.TypedStorage([1, 2, 3, 4], device='meta', dtype=dtype)
with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'):
s0.cpu()
with self.assertRaisesRegex(RuntimeError, r'only available on CPU'):
s0._share_fd_cpu_()
with self.assertRaisesRegex(RuntimeError, r'only available on CPU'):
s0._share_filename_cpu_()
if torch_npu.npu.is_available():
with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'):
s0.npu()
with self.assertRaisesRegex(RuntimeError, r'only available on NPU'):
s0._share_npu_()
with self.assertRaisesRegex(TypeError, r"cannot pin 'torch.storage.UntypedStorage' only CPU memory can be pinned"):
s0.pin_memory()
with self.assertRaisesRegex(RuntimeError, r'only available on CPU'):
s0.share_memory_()
with self.assertRaisesRegex(NotImplementedError, r'Not available'):
s0.tolist()
with tempfile.NamedTemporaryFile() as f:
with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'):
s0._write_file(f, True, True, s0.element_size())
for device in ['cpu', 'npu'] if torch_npu.npu.is_available() else ['cpu']:
s1 = torch.TypedStorage([1, 2, 3, 4], device=device, dtype=dtype)
with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'):
s1.copy_(s0)
@onlyCPU
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
def test_storage_meta_ok(self, device, dtype):
s0 = torch.TypedStorage([1, 2, 3, 4], device='meta', dtype=dtype)
s0.resize_(10)
@onlyPRIVATEUSE1
def test_module_share_memory(self):
model = torch.nn.Linear(3, 1)
model_npu = model.to('npu')
model.share_memory()
@dtypes(torch.float32, torch.complex64)
def test_deepcopy(self, device, dtype):
from copy import deepcopy
a = torch.randn(5, 5, dtype=dtype, device=device)
b = torch.randn(5, 5, dtype=dtype, device=device)
c = a.view(25)
q = [a, [a.storage(), b.storage()], b, c]
w = deepcopy(q)
self.assertEqual(w[0], q[0], atol=0, rtol=0)
self.assertEqual(w[1][0], q[1][0], atol=0, rtol=0)
self.assertEqual(w[1][1], q[1][1], atol=0, rtol=0)
self.assertEqual(w[1], q[1], atol=0, rtol=0)
self.assertEqual(w[2], q[2], atol=0, rtol=0)
w[0].add_(1)
for i in range(a.numel()):
self.assertEqual(w[1][0][i], q[1][0][i] + 1)
self.assertEqual(w[3], c + 1)
w[2].sub_(1)
for i in range(a.numel()):
self.assertEqual(w[1][1][i], q[1][1][i] - 1)
a.foo = 3
self.assertEqual(deepcopy(a).foo, 3)
@dtypes(torch.float32, torch.complex64)
def test_deepcopy_scalar(self, device, dtype):
from copy import deepcopy
a = torch.tensor(5, dtype=dtype, device=device)
self.assertEqual(a.size(), deepcopy(a).size())
self.assertEqual(a, deepcopy(a))
def check_internal_mem_overlap(self, inplace_op, num_inputs,
dtype, device,
expected_failure=False):
if isinstance(inplace_op, str):
inplace_op = getattr(torch.Tensor, inplace_op)
data = torch.randn(1, dtype=dtype, device=device).expand(3, 3)
inputs = [data] + [torch.randn_like(data)
for i in range(num_inputs - 1)]
if not expected_failure:
with self.assertRaisesRegex(RuntimeError, 'single memory location'):
inplace_op(*inputs)
else:
with self.assertRaises(AssertionError):
with self.assertRaisesRegex(RuntimeError, 'single memory location'):
inplace_op(*inputs)
def unary_check_input_output_mem_overlap(self, data, sz, op,
expected_failure=False):
def _test(op, output, input_dt):
output_exp = torch.empty_like(output)
op(input_dt, out=output_exp)
self.assertEqual(op(input_dt, out=output), output_exp, msg=op.__name__)
_test(op, output=data[0:sz], input_dt=data[0:sz])
_test(op, output=data[0:sz], input_dt=data[sz:2 * sz])
if not expected_failure:
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
_test(op, data[0:sz], data[1:sz + 1])
else:
with self.assertRaises(AssertionError):
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
_test(op, data[0:sz], data[1:sz + 1])
length = int(math.sqrt(sz))
input_ = data[:length**2].view([length, length])
out = input_.t()
if not expected_failure:
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
_test(op, out, input_)
else:
with self.assertRaises(AssertionError):
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
_test(op, out, input_)
def ternary_check_input_output_mem_overlap(self, op, device,
expected_failure=False):
sz = 9
data = torch.randn(2 * sz, device=device)
other1 = torch.randn(sz, device=device)
other2 = torch.randn(sz, device=device)
self.unary_check_input_output_mem_overlap(
data, sz, lambda input_, out:
op(input_, other1.view(input_.shape), other2.view(input_.shape), out=out),
expected_failure=expected_failure)
self.unary_check_input_output_mem_overlap(
data, sz, lambda input_, out:
op(other1.view(input_.shape), input_, other2.view(input_.shape), out=out),
expected_failure=expected_failure)
self.unary_check_input_output_mem_overlap(
data, sz, lambda input_, out:
op(other1.view(input_.shape), other2.view(input_.shape), input_, out=out),
expected_failure=expected_failure)
def _select_broadcastable_dims(self, dims_full=None):
if dims_full is None:
dims_full = []
ndims = random.randint(1, 4)
dims_full = [random.randint(1, 8) for _ in range(ndims)]
else:
ndims = len(dims_full)
smaller_ndims = random.randint(1, ndims)
dims_small = []
dims_large = []
for i in range(ndims - 1, -1, -1):
j = random.randint(1, 3)
if j == 1:
ds = dims_full[i]
dl = dims_full[i]
elif j == 2:
ds = dims_full[i]
dl = 1 if len(dims_small) < smaller_ndims else dims_full[i]
elif j == 3:
ds = 1
dl = dims_full[i]
dims_large = [dl] + dims_large
if len(dims_small) < smaller_ndims:
dims_small = [ds] + dims_small
return (dims_small, dims_large, dims_full)
def test_scalar_check(self, device):
zero_d = torch.randn((), device=device)
one_d = torch.randn((1,), device=device)
self.assertEqual((), torch.remainder(zero_d, zero_d).shape)
self.assertEqual((), torch.remainder(zero_d, 2).shape)
self.assertEqual((1,), torch.remainder(zero_d, one_d).shape)
self.assertEqual((1,), torch.remainder(one_d, zero_d).shape)
self.assertEqual((), torch.fmod(zero_d, zero_d).shape)
self.assertEqual((), torch.fmod(zero_d, 2).shape)
self.assertEqual((1,), torch.fmod(zero_d, one_d).shape)
self.assertEqual((1,), torch.fmod(one_d, zero_d).shape)
self.assertEqual((), torch.exp(zero_d).shape)
self.assertEqual((), torch.cos(zero_d).shape)
self.assertEqual((), torch.cosh(zero_d).shape)
self.assertEqual((), torch.tan(zero_d).shape)
self.assertEqual((), torch.atan(zero_d).shape)
self.assertEqual((), torch.acosh(zero_d).shape)
self.assertEqual((), torch.asinh(zero_d).shape)
self.assertEqual((), torch.atanh(zero_d).shape)
self.assertEqual((), torch.tanh(zero_d).shape)
self.assertEqual((), torch.erf(zero_d).shape)
self.assertEqual((), torch.erfc(zero_d).shape)
self.assertEqual((), torch.reciprocal(zero_d).shape)
self.assertEqual((1,), torch.exp(one_d).shape)
self.assertEqual((1,), torch.cos(one_d).shape)
self.assertEqual((1,), torch.cosh(one_d).shape)
self.assertEqual((1,), torch.tan(one_d).shape)
self.assertEqual((1,), torch.atan(one_d).shape)
self.assertEqual((1,), torch.acosh(one_d).shape)
self.assertEqual((1,), torch.asinh(one_d).shape)
self.assertEqual((1,), torch.atanh(one_d).shape)
self.assertEqual((1,), torch.tanh(one_d).shape)
self.assertEqual((1,), torch.erf(one_d).shape)
self.assertEqual((1,), torch.erfc(one_d).shape)
self.assertEqual((1,), torch.reciprocal(one_d).shape)
self.assertEqual((), torch.clamp(zero_d, min=0, max=1).shape)
self.assertEqual((), torch.clamp(zero_d, min=0).shape)
self.assertEqual((), torch.clamp(zero_d, max=1).shape)
self.assertEqual((1,), torch.clamp(one_d, min=0, max=1).shape)
self.assertEqual((1,), torch.clamp(one_d, min=0).shape)
self.assertEqual((1,), torch.clamp(one_d, max=1).shape)
self.assertEqual((), torch.logcumsumexp(zero_d, 0).shape)
self.assertEqual((), torch.cumsum(zero_d, 0).shape)
self.assertEqual((), torch.cumprod(zero_d, 0).shape)
self.assertEqual((), torch.cummax(zero_d, 0)[0].shape)
self.assertEqual((), torch.cummin(zero_d, 0)[0].shape)
self.assertEqual([(), ()], [x.shape for x in torch.sort(zero_d, 0, False)])
self.assertEqual([(), ()], [x.shape for x in torch.sort(zero_d, 0, True)])
self.assertEqual([(), ()], [x.shape for x in torch.topk(zero_d, 1, 0, False)])
self.assertEqual([(), ()], [x.shape for x in torch.topk(zero_d, 1, 0, True)])
self.assertEqual((), torch.max(zero_d, zero_d).shape)
self.assertEqual((1,), torch.max(one_d, zero_d).shape)
self.assertEqual((1,), torch.max(zero_d, one_d).shape)
self.assertEqual((), torch.min(zero_d, zero_d).shape)
self.assertEqual((1,), torch.min(one_d, zero_d).shape)
self.assertEqual((1,), torch.min(zero_d, one_d).shape)
zero_d_int = torch.tensor(1, device=device)
one_d_int = torch.tensor([1], device=device)
self.assertEqual((), (zero_d_int >> zero_d_int).shape)
self.assertEqual((), (zero_d_int >> 1).shape)
self.assertEqual((1,), (one_d_int >> zero_d_int).shape)
self.assertEqual((1,), (zero_d_int >> one_d_int).shape)
self.assertEqual((1,), (one_d_int >> 1).shape)
self.assertEqual((), (zero_d_int << zero_d_int).shape)
self.assertEqual((), (zero_d_int << 1).shape)
self.assertEqual((1,), (one_d_int << zero_d_int).shape)
self.assertEqual((1,), (zero_d_int << one_d_int).shape)
self.assertEqual((1,), (one_d_int << 1).shape)
self.assertEqual((), (zero_d_int | zero_d_int).shape)
self.assertEqual((), (zero_d_int | 1).shape)
self.assertEqual((1,), (one_d_int | zero_d_int).shape)
self.assertEqual((1,), (zero_d_int | one_d_int).shape)
self.assertEqual((1,), (one_d_int | 1).shape)
self.assertEqual((), (zero_d_int & zero_d_int).shape)
self.assertEqual((), (zero_d_int & 1).shape)
self.assertEqual((1,), (one_d_int & zero_d_int).shape)
self.assertEqual((1,), (zero_d_int & one_d_int).shape)
self.assertEqual((1,), (one_d_int & 1).shape)
self.assertEqual((), zero_d.clone().shape)
zero_d_bool = torch.tensor(True, device=device)
one_d_bool = torch.tensor([True], device=device)
self.assertEqual((1,), torch.masked_select(zero_d_bool, zero_d_bool).shape)
self.assertEqual((1,), torch.masked_select(zero_d_bool, one_d_bool).shape)
self.assertEqual((1,), torch.masked_select(one_d_bool, zero_d_bool).shape)
torch.tensor(1, dtype=torch.uint8, device=device)
torch.tensor([1], dtype=torch.uint8, device=device)
self.assertEqual([(), ()], [x.shape for x in torch.mode(zero_d, dim=0, keepdim=True)])
self.assertEqual([(), ()], [x.shape for x in torch.mode(zero_d, dim=0, keepdim=False)])
self.assertEqual([(1,), (1,)], [x.shape for x in torch.mode(one_d, dim=0, keepdim=True)])
self.assertEqual([(), ()], [x.shape for x in torch.mode(one_d, dim=0, keepdim=False)])
self.assertEqual([(), ()], [x.shape for x in torch.max(zero_d, dim=0, keepdim=True)])
self.assertEqual([(), ()], [x.shape for x in torch.max(zero_d, dim=0, keepdim=False)])
self.assertEqual([(1,), (1,)], [x.shape for x in torch.max(one_d, dim=0, keepdim=True)])
self.assertEqual([(), ()], [x.shape for x in torch.max(one_d, dim=0, keepdim=False)])
self.assertEqual((), torch.amax(zero_d, dim=0, keepdim=True).shape)
self.assertEqual((), torch.amax(zero_d, dim=0, keepdim=False).shape)
self.assertEqual((1,), torch.amax(one_d, dim=0, keepdim=True).shape)
self.assertEqual((), torch.amax(one_d, dim=0, keepdim=False).shape)
self.assertEqual([(), ()], [x.shape for x in torch.min(zero_d, dim=0, keepdim=True)])
self.assertEqual([(), ()], [x.shape for x in torch.min(zero_d, dim=0, keepdim=False)])
self.assertEqual([(1,), (1,)], [x.shape for x in torch.min(one_d, dim=0, keepdim=True)])
self.assertEqual([(), ()], [x.shape for x in torch.min(one_d, dim=0, keepdim=False)])
self.assertEqual((), torch.amin(zero_d, dim=0, keepdim=True).shape)
self.assertEqual((), torch.amin(zero_d, dim=0, keepdim=False).shape)
self.assertEqual((1,), torch.amin(one_d, dim=0, keepdim=True).shape)
self.assertEqual((), torch.amin(one_d, dim=0, keepdim=False).shape)
zero_d_clone = zero_d.clone()
one_d_clone = one_d.clone()
self.assertEqual((), zero_d_clone.set_(one_d.storage(), 0, (), ()).shape)
self.assertEqual((1,), zero_d_clone.set_(one_d.storage(), 0, (1,), (1,)).shape)
self.assertEqual((), one_d_clone.set_(one_d.storage(), 0, (), ()).shape)
self.assertEqual((1,), one_d_clone.set_(one_d.storage(), 0, (1,), (1,)).shape)
self.assertEqual((), zero_d.clone().set_(zero_d).shape)
self.assertEqual((), one_d.clone().set_(zero_d).shape)
self.assertEqual((1,), zero_d.clone().set_(one_d).shape)
self.assertEqual((1,), one_d.clone().set_(one_d).shape)
self.assertEqual((), torch.randn((2, 3), device=device).take(zero_d_int).shape)
self.assertEqual((1,), torch.randn((2, 3), device=device).take(one_d_int).shape)
self.assertEqual((), torch.gather(zero_d, 0, torch.zeros((), dtype=torch.int64, device=device)).shape)
self.assertEqual((1,), torch.gather(zero_d, 0, torch.zeros((1,), dtype=torch.int64, device=device)).shape)
self.assertEqual((), torch.gather(one_d, 0, torch.zeros((), dtype=torch.int64, device=device)).shape)
self.assertEqual((1,), torch.gather(one_d, 0, torch.zeros((1,), dtype=torch.int64, device=device)).shape)
zero_d_ge_0 = torch.rand((), device=device)
self.assertEqual((), torch.normal(zero_d, zero_d_ge_0).shape)
self.assertEqual((1,), torch.normal(one_d, zero_d_ge_0).shape)
self.assertEqual((), torch.normal(1, zero_d_ge_0).shape)
self.assertEqual((), torch.normal(zero_d, 1).shape)
self.assertEqual((1,), torch.normal(one_d, 1).shape)
w = torch.randn(2, 1, 3, 3, device=device).div_(2).requires_grad_()
self.assertRaises(RuntimeError, lambda: torch.nn.functional.conv2d(zero_d, w, groups=1))
self.assertRaises(RuntimeError, lambda: torch.nn.functional.conv2d(zero_d, w, groups=2))
self.assertRaises(ValueError, lambda: torch.nn.functional.nll_loss(zero_d, zero_d, reduction='none'))
self.assertRaises(ValueError, lambda: torch.nn.functional.nll_loss(zero_d, one_d, reduction='none'))
for (input_, target) in ((torch.randn(1, 1, device=device), torch.tensor([0], device=device)),
(torch.randn(1, 1, 1, 1, device=device), torch.tensor([[[0]]], device=device))):
self.assertEqual((), torch.nn.functional.nll_loss(input_, target, reduction='mean').shape)
self.assertEqual((), torch.nn.functional.nll_loss(input_, target, reduction='sum').shape)
def test_check_tensor_all(self, device):
default_message = 'Expected cond to be True'
check_fn = torch._check_tensor_all
expected_error = RuntimeError
with self.assertRaisesRegex(TypeError, 'cond must be a tensor'):
check_fn(True)
with self.assertRaisesRegex(TypeError, 'cond tensor must have dtype torch.bool'):
check_fn(torch.ones(1, device=device))
test_sizes = [
(),
(1,),
(10,),
(1, 1),
(1, 10),
(10, 1),
(10, 10),
(1, 1, 1),
(10, 1, 1),
(1, 10, 1),
(10, 10, 10),
]
for size in test_sizes:
t_all_true = torch.ones(size, dtype=torch.bool, device=device)
t_all_false = torch.zeros(size, dtype=torch.bool, device=device)
check_fn(t_all_true)
with self.assertRaisesRegex(expected_error, default_message):
check_fn(t_all_false)
if t_all_true.numel() > 1:
t_all_true_but_one = t_all_true.clone()
idx = (random.choice(range(dim_size)) for dim_size in size)
t_all_true_but_one[(..., *idx)] = False
with self.assertRaisesRegex(expected_error, default_message):
check_fn(t_all_true_but_one)
message = 'message'
with self.assertRaisesRegex(expected_error, message):
check_fn(t_all_false, lambda: message)
def message():
return torch.arange(4)
with self.assertRaisesRegex(expected_error, re.escape(str(message()))):
check_fn(t_all_false, message)
def message():
return f"{'test'} {[1, 2, 'a', True]} {True} {100} {torch.arange(4)}"
with self.assertRaisesRegex(expected_error, re.escape(str(message()))):
check_fn(t_all_false, message)
def test_check_tensor_internal(self, device):
test_sizes = [
(),
(1,),
(10,),
(1, 1),
(1, 10),
(10, 1),
(10, 10),
(1, 1, 1),
(10, 1, 1),
(1, 10, 1),
(10, 10, 10),
]
for size in test_sizes:
t_all_true = torch.ones(size, dtype=torch.bool, device=device)
t_all_false = torch.zeros(size, dtype=torch.bool, device=device)
torch._test_check_tensor(t_all_true)
with self.assertRaisesRegex(RuntimeError, "Test message for TORCH_CHECK_TENSOR_ALL"):
torch._test_check_tensor(t_all_false)
if t_all_true.numel() > 1:
t_all_true_but_one = t_all_true.clone()
idx = (random.choice(range(dim_size)) for dim_size in size)
t_all_true_but_one[(..., *idx)] = False
with self.assertRaisesRegex(RuntimeError, "Test message for TORCH_CHECK_TENSOR_ALL"):
torch._test_check_tensor(t_all_true_but_one)
@skipIfTorchDynamo("Not a suitable test for TorchDynamo")
@unittest.skipIf(TEST_WITH_CROSSREF, "crossref perturbs line numbering")
def test_cpp_warnings_have_python_context(self, device):
s = ".+Triggered internally at.+RangeFactories.+"
warnings.filterwarnings("ignore", "torch::jit::fuser::npu", UserWarning)
def cpp_warn_fn():
out = torch.empty((5,))
torch.arange(0, 3, out=out)
return out
with warnings.catch_warnings(record=True) as w:
cpp_warn_fn()
frameinfo = inspect.getframeinfo(inspect.currentframe())
warning = w[0]
escaped_warning_message = str(warning.message).encode('unicode_escape')
self.assertTrue(re.search(s, repr(escaped_warning_message), re.IGNORECASE) is not None)
self.assertEqual(frameinfo.lineno - 6, warning.lineno)
self.assertEqual(len(w), 1)
with warnings.catch_warnings(record=True) as w:
scripted_cpp_warn_fn = torch.jit.script(cpp_warn_fn)
scripted_cpp_warn_fn()
warning = w[0]
escaped_warning_message = str(warning.message).encode('unicode_escape')
self.assertTrue(re.search(s, repr(escaped_warning_message), re.IGNORECASE) is not None)
self.assertEqual(len(w), 1)
def warn_fn():
warnings.warn("Warning!")
with warnings.catch_warnings(record=True) as w:
scripted_warn_fn = torch.jit.script(warn_fn)
scripted_warn_fn()
frameinfo = inspect.getframeinfo(inspect.currentframe())
warning = w[0]
self.assertTrue(re.search('Warning!', str(warning.message)) is not None)
self.assertEqual(frameinfo.lineno - 6, warning.lineno)
self.assertEqual(len(w), 1)
@onlyCPU
def test_warn_always_caught(self, device):
a = np.arange(10)
a.flags.writeable = False
with self.assertWarnsOnceRegex(UserWarning, '.*non-writable.*'):
torch.from_numpy(a)
with self.assertWarnsOnceRegex(UserWarning, '.*non-writable.*'):
torch.from_numpy(a)
with self.assertWarnsOnceRegex(UserWarning, '.*non-writable.*'):
torch.from_numpy(a)
torch.from_numpy(a)
@onlyNativeDeviceTypes
def test_complex_half_experimental_warning(self, device):
msg = 'ComplexHalf support is experimental'
with self.assertWarnsOnceRegex(UserWarning, msg):
t = torch.randn(3, dtype=torch.chalf, device=device)
with self.assertWarnsOnceRegex(UserWarning, msg):
torch.rand(3, dtype=torch.chalf, device=device)
with self.assertWarnsOnceRegex(UserWarning, msg):
torch.empty(3, dtype=torch.chalf, device=device)
with self.assertWarnsOnceRegex(UserWarning, msg):
torch.ones(3, dtype=torch.chalf, device=device)
with self.assertWarnsOnceRegex(UserWarning, msg):
torch.zeros(3, dtype=torch.chalf, device=device)
with self.assertWarnsOnceRegex(UserWarning, msg):
torch.randn_like(t)
with self.assertWarnsOnceRegex(UserWarning, msg):
torch.rand_like(t)
with self.assertWarnsOnceRegex(UserWarning, msg):
torch.empty_like(t)
with self.assertWarnsOnceRegex(UserWarning, msg):
torch.ones_like(t)
with self.assertWarnsOnceRegex(UserWarning, msg):
torch.zeros_like(t)
with self.assertWarnsOnceRegex(UserWarning, msg):
t + 1
@onlyPRIVATEUSE1
def test_dtypetensor_warnings(self, device):
msg = 'The torch_npu.npu.*DtypeTensor constructors are no longer recommended'
with self.assertWarnsOnceRegex(UserWarning, msg):
torch_npu.npu.FloatTensor([0])
with self.assertWarnsOnceRegex(UserWarning, msg):
torch_npu.npu.DoubleTensor([0])
def test_set_default_tensor_type_warnings(self, device):
msg = '.*is deprecated as of PyTorch 2.1, please use torch.set_default_dtype().*'
default_type = torch.tensor([]).type()
try:
with self.assertWarnsOnceRegex(UserWarning, msg):
torch.set_default_tensor_type(torch.FloatTensor)
if torch_npu.npu.is_available():
with self.assertWarnsOnceRegex(UserWarning, msg):
torch.set_default_tensor_type(torch_npu.npu.FloatTensor)
finally:
torch.set_default_tensor_type(default_type)
def test_conv_transposed_backward_agnostic_to_memory_format(self, device):
in_channels = 64
out_channels = 128
scale_factor = 8
batch_size = 8
length = 16
conv = torch.nn.ConvTranspose1d(
in_channels, out_channels, kernel_size=scale_factor * 2, stride=scale_factor).to(device)
layer_norm = torch.nn.LayerNorm(out_channels).to(device)
input_ = torch.randn(batch_size, in_channels, length).to(device).contiguous()
input_ = conv(input_).contiguous()
input_ = layer_norm(input_.transpose(1, 2).contiguous()).contiguous()
input_.sum().backward()
conv = torch.nn.ConvTranspose3d(3, 3, kernel_size=3).to(device)
input_ = torch.randn(batch_size, 3, length, length, length, device=device)
out = conv(input_)
out.backward(torch.ones_like(out).transpose(-2, -1))
@onlyPRIVATEUSE1
@largeTensorTest('12GB')
def test_conv_transposed_large(self, device):
in_channels = 64
out_channels = 128
kernel_size = 5
conv = torch.nn.ConvTranspose3d(
in_channels, out_channels, kernel_size=kernel_size,
stride=2, padding=2, output_padding=1).to(device)
x = torch.rand([1, 64, 8, 128, 172]).to(device)
conv(x)
def test_is_set_to(self, device):
t1 = torch.empty(3, 4, 9, 10, device=device)
t2 = torch.empty(3, 4, 9, 10, device=device)
t3 = torch.tensor([], device=device).set_(t1)
t4 = t3.clone().resize_(12, 90)
self.assertFalse(t1.is_set_to(t2))
self.assertTrue(t1.is_set_to(t3))
self.assertTrue(t3.is_set_to(t1), "is_set_to should be symmetric")
self.assertFalse(t1.is_set_to(t4))
self.assertFalse(torch.tensor([]).is_set_to(torch.tensor([])),
"Tensors with no storages should not appear to be set "
"to each other")
t1 = torch.tensor([True, True], dtype=torch.bool, device=device)
t2 = torch.tensor([0], dtype=torch.bool, device=device).set_(t1)
self.assertTrue(t1.is_set_to(t2))
t1 = torch.empty([2, 3, 4], device=device)
t2 = t1.view(4, 3, 2)
self.assertFalse(t1.is_set_to(t2))
self.assertFalse(t2.is_set_to(t1))
t1 = torch.empty([2, 5, 0], device=device)
t2 = t1.view([0])
self.assertFalse(t1.is_set_to(t2))
self.assertFalse(t2.is_set_to(t1))
@skipIfMPS
@skipMeta
@parametrize(
"fn",
[
"dist", "atan2", "pow", "lerp", "add", "sub", "mul", "div", "fmod", "remainder", "eq", "ge", "gt", "le",
"lt", "max", "min", "ne", "addcdiv", "addcmul", "masked_scatter", "masked_select", "masked_fill", "map",
"map2", "copy",
],
)
def test_broadcast(self, fn, device):
fns_3_args = {"map2"}
fns_value_kwarg = {"addcdiv", "addcmul"}
(dims_small, dims_large, dims_full) = self._select_broadcastable_dims()
full1d = torch.randn(*dims_full, device=device).flatten().float()
small = torch.randn(*dims_small, device=device).float()
large = torch.randn(*dims_large, device=device).float()
small_expanded = small.expand(*dims_full)
large_expanded = large.expand(*dims_full)
small2 = None
small2_expanded = None
if fn in fns_3_args or fn in fns_value_kwarg:
(dims_small2, _, _) = self._select_broadcastable_dims(dims_full)
small2 = torch.randn(*dims_small2, device=device).float()
small2_expanded = small2.expand(*dims_full)
if small.is_npu and fn in ['map', 'map2']:
return
if hasattr(large_expanded, fn):
expanded = {large: large_expanded, small: small_expanded, small2: small2_expanded}
def tensorfn(myfn, t1, t2):
if fn == "lerp":
return myfn(t1, 0.5)
elif fn == "masked_select":
return myfn(t1 < 0)
elif fn == "masked_scatter":
return myfn(t1 < 0.5, full1d)
elif fn == "masked_fill":
return myfn(t1 < 0.5, 1.0)
elif fn in fns_3_args:
return myfn(1, t1, t2)
elif fn in fns_value_kwarg:
return myfn(t1, t2, value=1)
else:
return myfn(t1)
for first, second, third in [(large, small, small2), (small, large, small2),
(small2, small, large), (small2, large, small)]:
if first is None:
break
method_expanded = getattr(expanded[first], fn)
method = getattr(first, fn)
r1 = tensorfn(method_expanded, expanded[second], expanded[third])
r2 = tensorfn(method, second, third)
self.assertEqual(r1, r2)
if hasattr(torch, fn):
fntorch = getattr(torch, fn)
expanded = {large: large_expanded, small: small_expanded, small2: small2_expanded}
def torchfn(t1, t2, t3):
if fn == "lerp":
return fntorch(t1, t2, 0.5)
elif fn == "masked_select":
return fntorch(t1, t2 < 0)
elif fn == "masked_scatter":
return fntorch(t1, t2 < 0.5, full1d)
elif fn == "masked_fill":
return fntorch(t1, t2 < 0.5, 1.0)
elif fn in fns_3_args:
return fntorch(t1, 1.0, t2, t3)
elif fn in fns_value_kwarg:
return fntorch(t1, t2, t3, value=1.0)
else:
return fntorch(t1, t2)
for first, second, third in [(large, small, small2), (small, large, small2),
(small2, small, large), (small2, large, small)]:
if first is None:
break
r1 = torchfn(expanded[first], expanded[second], expanded[third])
r2 = torchfn(first, second, third)
self.assertEqual(r1, r2)
if not hasattr(large_expanded, fn + "_"):
return
large_expanded_clone = large_expanded.clone()
def tensorfn_inplace(t0, t1, t2=None):
t0_fn = getattr(t0, fn + "_")
if fn == "lerp":
return t0_fn(t1, 0.5)
elif fn == "masked_scatter":
return t0_fn(t1 < 0.5, full1d)
elif fn == "masked_fill":
return t0_fn(t1 < 0.5, 1.0)
elif fn == "map":
return t0_fn(t1, lambda x, y: x + y)
elif fn == "map2":
return t0_fn(t1, t2, lambda x, y, z: x + y + z)
elif fn in fns_3_args:
return t0_fn(1.0, t1, t2)
elif fn in fns_value_kwarg:
return t0_fn(t1, t2, value=1.0)
else:
return t0_fn(t1)
if (0 not in large_expanded.stride() and 0 not in large_expanded_clone.stride()):
r1 = tensorfn_inplace(large_expanded, small_expanded, small2_expanded)
r2 = tensorfn_inplace(large_expanded_clone, small, small2)
self.assertEqual(r1, r2)
def broadcastable(t0, t1, t2=None):
try:
t1.expand_as(t0)
if t2 is not None:
t2.expand_as(t0)
except RuntimeError:
return False
return True
def _test_in_place_broadcastable(t0, t1, t2=None):
if not broadcastable(t0, t1, t2):
same_size = t0.numel() == t1.numel() and (t0.numel() == t2.numel() if t2 is not None else True)
if not same_size:
if not TEST_WITH_TORCHINDUCTOR:
self.assertRaises(RuntimeError, lambda: tensorfn_inplace(t0, t1, t2))
else:
tensorfn_inplace(t0, t1, t2)
if fn not in fns_3_args and fn not in fns_value_kwarg:
_test_in_place_broadcastable(small, large_expanded)
_test_in_place_broadcastable(small, large)
else:
_test_in_place_broadcastable(small2, small_expanded, large_expanded)
_test_in_place_broadcastable(small2, small, large)
@onlyCPU
@skipIfTorchInductor("pytorch issues 113707")
@dtypes(*get_all_qint_dtypes())
def test_nondeterministic_resize_quantized(self, device, dtype):
a = torch.tensor([-1, 0, 1, 2, 3], dtype=torch.float, device=device)
b = torch.quantize_per_tensor(a, 0.1, 10, dtype)
self.check_nondeterministic_alert(
lambda: b.resize_((10,)),
'quantized_resize_cpu_')
@skipXLA
@skipIfTorchInductor("pytorch issues 113707")
@dtypes(*(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16, torch.uint16, torch.uint32,
torch.uint64) if not device_is_910A else all_types_and(torch.half, torch.bool)))
def test_deterministic_resize(self, device, dtype):
test_cases = [
((10,), (1,), (5,)),
((10,), (0,), (10,)),
((10,), (1,), (20,)),
((2, 3, 4), None, (2, 3, 4)),
((2, 3, 4), None, (6, 3, 4)),
((2, 3, 4), None, (2, 5, 4)),
((2, 3, 4), None, (2, 3, 6)),
((2, 3, 4), None, (3, 4, 5)),
((2, 3, 4), (1, 4, 12), (2, 3, 4)),
((2, 3, 4), (1, 4, 12), (4, 3, 4)),
((2, 3, 4), (1, 4, 12), (2, 4, 4)),
((2, 3, 4), (1, 4, 12), (2, 3, 5)),
((2, 3, 4), (1, 4, 12), (3, 4, 5)),
((2, 3, 4), (1, 0, 1), (2, 4, 5)),
]
for size, stride, resize_size in test_cases:
if stride is None:
a = torch.zeros(size, dtype=dtype, device=device)
else:
a = torch.empty_strided(size, stride, dtype=dtype, device=device).fill_(0)
old_storage = a.untyped_storage().clone()
with DeterministicGuard(True, fill_uninitialized_memory=True):
a.resize_(resize_size)
new_storage = a.untyped_storage()
old_tensor = torch.tensor(old_storage, dtype=dtype)
old_numel = old_tensor.numel()
new_tensor = torch.tensor(new_storage, dtype=dtype)
new_numel = new_tensor.numel()
if new_numel > old_numel:
self.assertEqual(new_tensor[:old_numel], old_tensor)
fill_section = new_tensor[old_numel:]
if dtype.is_floating_point or dtype.is_complex:
self.assertTrue(fill_section.isnan().all())
else:
if dtype == torch.bool:
max_val = True
else:
max_val = torch.iinfo(dtype).max
self.assertTrue(fill_section.eq(max_val).all())
else:
self.assertEqual(old_tensor, new_tensor)
@skipXLA
@skipIfTorchInductor("pytorch issues 113707")
@dtypes(*(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16, torch.uint16, torch.uint32,
torch.uint64) if not device_is_910A else all_types_and(torch.half, torch.bool)))
def test_deterministic_empty(self, device, dtype):
gen_fns = [
lambda: torch.empty(10, 9, device=device, dtype=dtype),
lambda: torch.empty(10, 9, out=torch.zeros(1, device=device, dtype=dtype)),
lambda: torch.empty_like(torch.zeros(10, 9, device=device, dtype=dtype)),
lambda: torch.empty_like(torch.zeros(10, 9, device=device, dtype=dtype), memory_format=torch.contiguous_format),
lambda: torch.empty_strided((10, 9), (1, 5), device=device, dtype=dtype),
lambda: torch.empty_permuted((2, 3, 5), (1, 0, 2), device=device, dtype=dtype),
]
for gen_fn in gen_fns:
with DeterministicGuard(True, fill_uninitialized_memory=True):
res = gen_fn()
if dtype.is_floating_point or dtype.is_complex:
self.assertTrue(res.isnan().all())
else:
if dtype == torch.bool:
max_val = True
else:
max_val = torch.iinfo(dtype).max
self.assertTrue(res.eq(max_val).all())
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_AvgPool3d(self, device):
module = torch.nn.AvgPool3d(3)
input_ = torch.randn(2, 3, 3, 3, requires_grad=True, device=device)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'avg_pool3d_backward_npu',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_AdaptiveAvgPool2d(self, device):
module = torch.nn.AdaptiveAvgPool2d(3)
input_ = torch.randn(2, 3, 3, requires_grad=True, device=device)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'adaptive_avg_pool2d_backward_npu',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_AdaptiveAvgPool3d(self, device):
module = torch.nn.AdaptiveAvgPool3d(3)
input_ = torch.randn(2, 3, 3, 3, requires_grad=True, device=device)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'adaptive_avg_pool3d_backward_npu',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_MaxPool3d(self, device):
module = torch.nn.MaxPool3d(3)
input_ = torch.randn(2, 3, 3, 3, requires_grad=True, device=device)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'max_pool3d_with_indices_backward_npu',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_AdaptiveMaxPool2d(self, device):
module = torch.nn.AdaptiveMaxPool2d(3)
input_ = torch.randn(2, 3, 3, requires_grad=True, device=device)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'adaptive_max_pool2d_backward_npu',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_FractionalMaxPool2d(self, device):
module = torch.nn.FractionalMaxPool2d(2, output_ratio=0.5)
input_ = torch.randn(2, 3, 3, 3, requires_grad=True, device=device)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'fractional_max_pool2d_backward_npu',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_FractionalMaxPool3d(self, device):
module = torch.nn.FractionalMaxPool3d(2, output_ratio=0.5)
input_ = torch.randn(2, 3, 3, 3, 3, requires_grad=True, device=device)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'fractional_max_pool3d_backward_npu',
torch.device(device).type == 'npu')
@dtypes(*floating_types_and(torch.half))
@onlyNativeDeviceTypes
def test_nondeterministic_alert_MaxUnpool1d(self, device, dtype):
if dtype == torch.half and torch.device(device).type == 'cpu':
self.skipTest('float16 not implemented on CPU')
module = torch.nn.MaxUnpool1d(3, 1)
input_ = torch.randn(1, 1, 7, dtype=dtype, device=device)
indices = torch.zeros_like(input_, dtype=torch.long, device=device)
self.check_nondeterministic_alert(
lambda: module(input_, indices),
'max_unpooling2d_forward_out')
@dtypes(*floating_types_and(torch.half))
@onlyNativeDeviceTypes
def test_nondeterministic_alert_MaxUnpool2d(self, device, dtype):
if dtype == torch.half and torch.device(device).type == 'cpu':
self.skipTest('float16 not implemented on CPU')
module = torch.nn.MaxUnpool2d(3, 1)
input_ = torch.randn(1, 1, 7, 7, dtype=dtype, device=device)
indices = torch.zeros_like(input_, dtype=torch.long, device=device)
self.check_nondeterministic_alert(
lambda: module(input_, indices),
'max_unpooling2d_forward_out')
@dtypes(*floating_types_and(torch.half))
@onlyNativeDeviceTypes
def test_nondeterministic_alert_MaxUnpool3d(self, device, dtype):
if dtype == torch.half and torch.device(device).type == 'cpu':
self.skipTest('float16 not implemented on CPU')
module = torch.nn.MaxUnpool3d(3, 1)
input_ = torch.randn(1, 1, 7, 7, 7, dtype=dtype, device=device)
indices = torch.zeros_like(input_, dtype=torch.long, device=device)
self.check_nondeterministic_alert(
lambda: module(input_, indices),
'max_unpooling3d_forward_out')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_interpolate_linear(self, device):
input_ = torch.randn(1, 2, 4, device=device, requires_grad=True)
res = torch.nn.functional.interpolate(
input_,
size=12,
mode='linear',
align_corners=False)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad),
'upsample_linear1d_backward_out_npu',
torch.device(device).type == 'npu')
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_interpolate_bilinear(self, device):
input_ = torch.randn(1, 2, 4, 4, device=device, requires_grad=True)
res = torch.nn.functional.interpolate(
input_,
size=12,
mode='bilinear',
align_corners=False)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad),
'upsample_bilinear2d_backward_out_npu',
torch.device(device).type == 'npu')
def test_no_nondeterministic_alert_interpolate_bilinear(self, device):
input = torch.randn(1, 2, 4, 4, device=device, requires_grad=True)
def fn():
res = torch.nn.functional.interpolate(
input,
size=12,
mode='bilinear',
align_corners=False)
grad = torch.ones_like(res)
return res.backward(grad)
self.check_nondeterministic_alert(
fn,
'upsample_bilinear2d_backward_out_cuda',
False)
def test_no_nondeterministic_alert_interpolate_trilinear(self, device):
input = torch.randn(1, 2, 4, 4, 4, device=device, requires_grad=True)
def fn():
res = torch.nn.functional.interpolate(
input,
size=12,
mode='trilinear',
align_corners=False)
grad = torch.ones_like(res)
return res.backward(grad)
self.check_nondeterministic_alert(
fn,
'upsample_trilinear3d_backward_out_cuda',
False)
@skipIfTorchInductor("aot-autograd issue")
def test_deterministic_replication_pad2d(self, device):
test_cases = [
[(1, 2, 4, 4), (0, 0, 0, 0)],
[(1, 2, 4, 4), (3, 4, 5, 6)],
[(3, 8, 7), (0, 0, 0, 0)],
[(3, 8, 7), (4, 3, 2, 7)],
]
if torch.device(device).type != 'xla':
test_cases += [
[(4, 3, 5, 10), (-9, 4, 5, 6)],
[(3, 8, 7), (-4, -2, -2, -3)],
]
for size, padding in test_cases:
input_ = torch.randn(*size, device=device, requires_grad=True)
grad = None
with DeterministicGuard(True):
res = torch.nn.functional.pad(
input_,
padding,
mode='replicate')
res.backward(torch.ones_like(res))
if grad is None:
grad = input_.grad
else:
self.assertEqual(grad, input_.grad, atol=0, rtol=0)
input_.grad = None
@skipIfTorchInductor("pytorch issues 113707")
def test_deterministic_interpolate_bilinear(self, device):
input_ = torch.randn(1, 2, 4, 4, device=device, requires_grad=True)
grad = None
with DeterministicGuard(True):
for _ in range(5):
res = torch.nn.functional.interpolate(
input_,
size=12,
mode='bilinear',
align_corners=False)
res.backward(torch.ones_like(res))
if grad is None:
grad = input_.grad
else:
self.assertEqual(grad, input_.grad, atol=0, rtol=0)
input_.grad = None
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_interpolate_bicubic(self, device):
input_ = torch.randn(1, 2, 4, 4, device=device, requires_grad=True)
res = torch.nn.functional.interpolate(
input_,
size=12,
mode='bicubic',
align_corners=False)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad),
'upsample_bicubic2d_backward_out_npu',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_interpolate_trilinear(self, device):
input_ = torch.randn(1, 2, 4, 4, 4, device=device, requires_grad=True)
res = torch.nn.functional.interpolate(
input_,
size=12,
mode='trilinear',
align_corners=False)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad),
'upsample_trilinear3d_backward_out_npu',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_ReflectionPad1d(self, device):
module = torch.nn.ReflectionPad1d((1, 2))
input_ = torch.randn(2, 3, 8, device=device, requires_grad=True)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'reflection_pad1d_backward_out_npu',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_ReflectionPad3d(self, device):
module = torch.nn.ReflectionPad3d((1, 2, 3, 4, 5, 6))
input_ = torch.randn(2, 3, 8, 8, 8, device=device, requires_grad=True)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'reflection_pad3d_backward_out_npu',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_ReplicationPad1d(self, device):
module = torch.nn.ReplicationPad1d((1, 2))
input_ = torch.randn(2, 3, 4, device=device, requires_grad=True)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'replication_pad1d_backward_npu',
torch.device(device).type == 'npu')
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_ReplicationPad2d(self, device):
module = torch.nn.ReplicationPad2d((1, 2, 3, 4))
input_ = torch.randn(2, 3, 4, 4, device=device, requires_grad=True)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'replication_pad2d_backward_npu',
torch.device(device).type == 'npu')
with DeterministicGuard(True):
res = module(input)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'replication_pad2d_backward_npu',
False)
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_ReplicationPad3d(self, device):
module = torch.nn.ReplicationPad3d((1, 2, 3, 4, 5, 6))
input_ = torch.randn(2, 3, 4, 4, 4, device=device, requires_grad=True)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'replication_pad3d_backward_npu',
torch.device(device).type == 'npu')
@skipIfTorchDynamo("Warning is not raised.")
def test_nondeterministic_alert_NLLLoss(self, device):
module = torch.nn.NLLLoss()
input_ = torch.randn(2, 3, 5, 5, device=device)
target = torch.rand(2, 5, 5, device=device).mul(3).floor().long()
self.check_nondeterministic_alert(
lambda: module(input_, target),
'nll_loss2d_forward_out_npu_template',
torch.device(device).type == 'npu')
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_CTCLoss(self, device):
module = torch.nn.CTCLoss()
input_ = torch.randn(50, 3, 15, device=device, requires_grad=True)
target = torch.randint(0, 14, (3, 30), device=device)
input_lengths = [50, 50, 50]
target_lengths = [30, 25, 20]
res = module(input_, target, input_lengths, target_lengths)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'ctc_loss_backward_gpu',
torch.device(device).type == 'npu')
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_EmbeddingBag_max(self, device):
module = torch.nn.EmbeddingBag(
4, 3, None, 2., False, 'max',
_weight=torch.randn(4, 3, device=device, requires_grad=True))
input_ = torch.randint(0, 3, (4, 3), device=device)
res = module(input_)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'embedding_bag_backward_npu_max',
torch.device(device).type == 'npu')
@skipIfRocmArch(MI300_ARCH)
@skipIfTorchInductor("pytorch/issues/113707")
@onlyPRIVATEUSE1
def test_deterministic_cumsum(self, device):
test_cases = [
[(1025,), 0],
[(8193,), 0],
[(8191,), 0],
[(128256,), 0],
[(1282560,), 0],
[(12825600,), 0],
]
for size, dim in test_cases:
input_ = 100 * torch.randn(*size, device=device)
with DeterministicGuard(True):
res0 = input_.cumsum(dim)
for _ in range(3):
res1 = input_.cumsum(dim)
self.assertEqual(res0, res1, atol=0, rtol=0)
res_cpu = input_.cpu().cumsum(dim)
self.assertEqual(res0, res_cpu, atol=1e-3, rtol=1e-2)
num_sm = 132
elems_per_cta = 256 * 16
N = num_sm * elems_per_cta
input = torch.rand(N, dtype=torch.complex128, device=device)
with DeterministicGuard(True):
res0 = input.cumsum(dim)
for _ in range(3):
res1 = input.cumsum(dim)
self.assertEqual(res0, res1, atol=0, rtol=0)
res_cpu = input.cpu().cumsum(dim)
self.assertEqual(res0, res_cpu, atol=1e-3, rtol=1e-2)
@onlyPRIVATEUSE1
@largeTensorTest('49GB')
def test_cumsum_64bit_indexing(self, device):
b = torch.ones(2 * 4096 * 8, 100000, dtype=torch.float, device='npu')
b /= 100000
d = b.cumsum(dim=-1)
chunk = 2**30 // b.shape[-1]
for i in range(0, b.shape[0], chunk):
end = min(i + chunk, b.shape[0])
b[i:end, :].cumsum_(dim=-1)
self.assertEqual(b[0, :], d[0, :], atol=3e-5, rtol=3e-5)
self.assertEqual(b[-1, :], d[-1, :], atol=3e-5, rtol=3e-5)
@expectedFailureMeta
@onlyNativeDeviceTypes
def test_nondeterministic_alert_put(self, device):
a = torch.randn(10, device=device)
indices = torch.tensor([0, 0], device=device)
values = torch.tensor([0., 1.], device=device)
for op_call in [torch.Tensor.put, torch.Tensor.put_]:
self.check_nondeterministic_alert(
lambda: op_call(a, indices, values, accumulate=False),
'put_')
@skipIfTorchInductor("warning is logged from the FallbackKernel: torch.ops.aten.put_.default when warn_only=True")
def test_nondeterministic_alert_put_accumulate(self, device):
a = torch.randn(10, device=device)
indices = torch.tensor([0, 0], device=device)
values = torch.tensor([0., 1.], device=device)
for op_call in [torch.Tensor.put, torch.Tensor.put_]:
self.check_nondeterministic_alert(
lambda: op_call(a, indices, values, accumulate=True),
'put_',
torch.device(device).type == 'npu')
@dtypes(torch.float32)
@dtypesIfPRIVATEUSE1(torch.float32, torch.int32)
@skipIfMPS
def test_nondeterministic_alert_histc(self, device, dtype):
a = torch.tensor([], device=device, dtype=dtype)
for op_call in [torch.histc, torch.Tensor.histc]:
self.check_nondeterministic_alert(
lambda: op_call(a, min=0, max=3),
'_histc_npu with floating point input',
torch.device(device).type == 'cuda' and dtype.is_floating_point)
@skipIfMPS
def test_nondeterministic_alert_bincount(self, device):
a = torch.tensor([], device=device, dtype=torch.long)
weights = torch.tensor([], device=device)
for op_call in [torch.bincount, torch.Tensor.bincount]:
self.check_nondeterministic_alert(
lambda: op_call(a, weights),
'_bincount_npu',
torch.device(device).type == 'npu')
self.check_nondeterministic_alert(
lambda: op_call(a),
'_bincount_npu',
False)
@dtypes(torch.double)
def test_nondeterministic_alert_kthvalue(self, device, dtype):
def test_func(call_type):
S = 10
k = 5
a = torch.randn(S, device=device)
if call_type == 'function':
torch.kthvalue(a, k)
elif call_type == 'method':
a.kthvalue(k)
elif call_type == 'out':
values = torch.empty_like(a)
indices = torch.empty((), device=device, dtype=torch.long)
torch.kthvalue(a, k, out=(values, indices))
else:
self.fail(f"'{call_type}' is not a valid call type")
for call_type in ['function', 'method', 'out']:
self.check_nondeterministic_alert(
lambda: test_func('function'),
'kthvalue NPU',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_grid_sample_2d(self, device):
input_ = torch.empty(1, 1, 2, 2, device=device, requires_grad=True)
grid = torch.empty(1, 1, 1, 2, device=device)
res = torch.nn.functional.grid_sample(input_, grid, align_corners=False)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'grid_sampler_2d_backward_npu',
torch.device(device).type == 'npu')
@skipIfMPS
@skipIfTorchInductor("pytorch issues 113707")
def test_nondeterministic_alert_grid_sample_3d(self, device):
input_ = torch.empty(1, 1, 2, 2, 2, device=device, requires_grad=True)
grid = torch.empty(1, 1, 1, 2, 3, device=device)
res = torch.nn.functional.grid_sample(input_, grid, align_corners=False)
grad = torch.ones_like(res)
self.check_nondeterministic_alert(
lambda: res.backward(grad, retain_graph=True),
'grid_sampler_3d_backward_npu',
torch.device(device).type == 'npu')
def test_invalid_shapes_grid_sampler(self, device):
make_arg = partial(
make_tensor, device=device, dtype=torch.float64, requires_grad=True)
inputs = (
((5, 5, 5, 5, 5,), (1, 1, 1, 4, 4,)),
((5, 5, 5, 5,), (1, 1, 4, 4,)),
)
interpolation_mode = 0
padding_mode = 0
align_corners = True
err = "but got input with"
for input_, grid in inputs:
input_ = make_arg(input_)
grid = make_arg(grid, low=-1, high=1)
with self.assertRaisesRegex(RuntimeError, err):
torch.grid_sampler(
input_, grid, interpolation_mode, padding_mode,
align_corners)
with self.assertRaisesRegex(RuntimeError, err):
torch.grid_sampler_2d(
input_, grid, interpolation_mode, padding_mode,
align_corners)
with self.assertRaisesRegex(RuntimeError, err):
torch.grid_sampler_3d(
input_, grid, interpolation_mode, padding_mode,
align_corners)
with self.assertRaisesRegex(RuntimeError, err):
torch._grid_sampler_2d_cpu_fallback(
input_, grid, interpolation_mode, padding_mode,
align_corners)
if device != 'cpu' and TEST_CUDNN and not TEST_WITH_ROCM:
with self.assertRaisesRegex(RuntimeError, err):
torch.cudnn_grid_sampler(input_, grid)
def test_dist(self, device):
def run_test(x, y):
for p in [0, 1, 2, 3, 4, inf, -inf]:
dist_xy = torch.dist(x, y, p)
dist_xy_norm = torch.norm(x - y, p)
self.assertEqual(dist_xy, dist_xy_norm)
run_test(torch.randn(5, device=device), torch.randn(5, device=device))
x = torch.zeros(3, device=device)
y = torch.zeros(3, device=device)
y[1] = 1.
run_test(x, y)
@dtypes(torch.double)
def test_nondeterministic_alert_median(self, device, dtype):
def test_func(call_type):
S = 10
a = torch.randn(S, device=device)
if call_type == 'function':
torch.median(a)
elif call_type == 'function with indices':
torch.median(a, 0)
elif call_type == 'method':
a.median()
elif call_type == 'method with indices':
a.median(0)
elif call_type == 'out with indices':
result = torch.empty_like(a)
indices = torch.empty((), dtype=torch.long, device=device)
torch.median(a, 0, out=(result, indices))
else:
self.fail(f"'{call_type}' is not a valid call type")
def test_func_expect_error(call_type, should_error):
self.check_nondeterministic_alert(
lambda: test_func(call_type),
'median NPU with indices output',
should_error)
is_npu = torch.device(device).type == 'npu'
test_func_expect_error('function', False)
test_func_expect_error('function with indices', is_npu)
test_func_expect_error('method', False)
test_func_expect_error('method with indices', is_npu)
test_func_expect_error('out with indices', is_npu)
def _test_gather_backward_one_dim(self, device, deterministic: bool = False) -> None:
with DeterministicGuard(deterministic):
m = random.randint(2000, 3000)
elems = random.randint(10 * m, 20 * m)
dim = 0
src = torch.randn(m, device=device, requires_grad=True)
idx = torch.randint(m, (elems,), device=device)
res = torch.gather(src, dim, idx)
weight = torch.rand_like(res, device=device) * 10 ** 6
res.backward(weight)
assert src.grad is not None
grad = src.grad.detach().clone()
if torch.device(device).type == 'npu':
for _ in range(2):
src.grad.data.zero_()
res = torch.gather(src, dim, idx)
res.backward(weight)
self.assertEqual(src.grad, grad, atol=0, rtol=0)
else:
expected = torch.zeros_like(src, device=device)
for i in range(elems):
expected[idx[i]] += weight[i]
self.assertEqual(grad, expected, atol=0, rtol=0)
@onlyNativeDeviceTypes
def test_gather_backward_deterministic_path(self, device) -> None:
self._test_gather_backward_one_dim(device, True)
@onlyCPU
def test_gather_backward_one_dim(self, device) -> None:
self._test_gather_backward_one_dim(device, False)
@onlyNativeDeviceTypes
def test_scatter_add_one_dim_deterministic(self, device) -> None:
with DeterministicGuard(True):
m = random.randint(20, 30)
elems = random.randint(2000 * m, 3000 * m)
dim = 0
src = torch.randn(elems, device=device)
idx = torch.randint(m, (elems,), device=device)
x = torch.zeros(m, device=device)
res = x.scatter_add(dim, idx, src)
for i in range(5):
res_next = x.scatter_add(dim, idx, src)
self.assertEqual(res, res_next, atol=0, rtol=0)
res = res_next
expected = torch.zeros(m, device=device)
for i in range(elems):
expected[idx[i]] += src[i]
self.assertEqual(res, expected, atol=1e-4, rtol=1e-5)
@onlyNativeDeviceTypes
def test_scatter_zero_size_index(self, device) -> None:
null_index = torch.zeros((0, 4), dtype=torch.int64)
null_arr = torch.zeros((0, 4))
original = torch.arange(4, dtype=torch.float32)
result = original.scatter(0, null_index, null_arr)
self.assertEqual(result, original, atol=0, rtol=0)
@onlyPRIVATEUSE1
@skipIfTorchInductor("FIXME")
def test_sync_warning(self, device):
def _sync_raises_helper(f, level):
with CudaSyncGuard(level):
if level == 1:
with self.assertWarnsRegex(UserWarning, "called a synchronizing "):
f()
elif level == 2:
with self.assertRaisesRegex(RuntimeError, "called a synchronizing "):
f()
def _no_sync_helper(f, level):
with CudaSyncGuard(level):
f()
def _ind_put_fn(x, ind, val):
x[ind] = val
return x
def _ind_get_fn(x, ind):
return x[ind]
def _cond_fn(x):
if x:
return x
else:
return 2 * x
size = 4
x = torch.rand(size, device=device)
y = torch.rand((), device=device)
ind = torch.randint(size, (3,), device=device)
ind_cpu = ind.cpu()
repeats = torch.full((1,), 2, device=device)
mask = torch.randint(2, (size,), device=device, dtype=bool)
mask_cpu = mask.cpu()
expect_no_sync = (lambda: _ind_put_fn(x, mask, 1.),
lambda: _ind_put_fn(x, mask_cpu, y),
lambda: _ind_put_fn(x, ind, y),
lambda: _ind_get_fn(x, mask_cpu),
lambda: _ind_get_fn(x, ind),
lambda: torch.nn.functional.one_hot(ind, num_classes=size),
lambda: torch.randperm(20000, device=device),
lambda: torch.repeat_interleave(x, 2, output_size=2 * size),
lambda: torch.repeat_interleave(x, repeats, output_size=2 * size),
lambda: torch.any(y))
expect_sync = (lambda: _ind_put_fn(x, mask, y),
lambda: _ind_put_fn(x, ind_cpu, y),
lambda: _ind_get_fn(x, mask),
lambda: _ind_get_fn(x, ind_cpu),
lambda: x.nonzero(),
lambda: _cond_fn(y),
lambda: torch.nn.functional.one_hot(ind),
lambda: torch.repeat_interleave(x, repeats))
for f, level in product(expect_no_sync, (1, 2)):
_no_sync_helper(f, level)
for f, level in product(expect_sync, (1, 2)):
_sync_raises_helper(f, level)
@dtypes(*floating_types_and(torch.half, torch.bfloat16))
@skipIfMPS
def test_log_normal(self, device, dtype):
a = torch.tensor([10], dtype=dtype, device=device).log_normal_()
self.assertEqual(a.dtype, dtype)
self.assertEqual(a.size(), torch.Size([1]))
@dtypes(*all_types_and(torch.half, torch.bfloat16))
@skipIfMPS
def test_geometric(self, device, dtype):
a = torch.tensor([10], dtype=dtype, device=device).geometric_(0.5)
self.assertEqual(a.dtype, dtype)
self.assertEqual(a.size(), torch.Size([1]))
@skipIfMPS
def test_repeat_interleave(self, device):
y = torch.tensor([[1, 2], [3, 4]], device=device)
temp = y.repeat_interleave(2)
self.assertEqual(torch.Size([8]), temp.size())
for dtype in [torch.int, torch.long]:
lengths = torch.tensor([1, 2], dtype=dtype, device=device)
output_size = torch.sum(lengths)
a = torch.repeat_interleave(
y,
lengths,
dim=0,
)
self.assertEqual(a.dtype, y.dtype)
self.assertEqual(a.size(), torch.Size([3, 2]))
a_with_output = torch.repeat_interleave(
y,
lengths,
dim=0,
output_size=output_size,
)
self.assertEqual(a_with_output.dtype, y.dtype)
self.assertEqual(a_with_output.size(), torch.Size([3, 2]))
@dtypes(*floating_types())
@dtypesIfCPU(*floating_types_and(torch.bfloat16, torch.half))
@dtypesIfPRIVATEUSE1(*floating_types_and(torch.half))
def test_bernoulli_p(self, device, dtype):
for trivial_p in ([0, 1], [1, 0, 1, 1, 0, 1]):
x = torch.tensor(trivial_p, dtype=dtype, device=device)
self.assertEqual(x.bernoulli().tolist(), trivial_p)
def isBinary(t):
return torch.ne(t, 0).mul_(torch.ne(t, 1)).sum().item() == 0
p = torch.rand(5, 5, dtype=dtype, device=device)
self.assertTrue(isBinary(p.bernoulli()))
p = torch.rand(5, dtype=dtype, device=device).expand(5, 5)
self.assertTrue(isBinary(p.bernoulli()))
p = torch.rand(5, 5, dtype=dtype, device=device)
torch.bernoulli(torch.rand_like(p), out=p)
self.assertTrue(isBinary(p))
@dtypes(*floating_types())
@dtypesIfCPU(*all_types_and(torch.bool, torch.half))
@dtypesIfPRIVATEUSE1(*all_types_and(torch.bool, torch.half))
def test_bernoulli_self(self, device, dtype):
def isBinary(t):
return torch.ne(t, 0).mul_(torch.ne(t, 1)).sum().item() == 0
t = torch.empty(10, 10, dtype=dtype, device=device)
t.fill_(2)
t.bernoulli_(0.5)
self.assertTrue(isBinary(t))
for p_dtype in floating_types_and(*[torch.half] if device.startswith('npu') else []):
p = torch.rand(10, dtype=p_dtype, device=device).expand(10, 10)
t.fill_(2)
t.bernoulli_(p)
self.assertTrue(isBinary(t))
t.fill_(2)
torch.bernoulli(torch.rand_like(t, dtype=p_dtype), out=t)
self.assertTrue(isBinary(t))
t.fill_(2)
t.bernoulli_(torch.rand_like(t, dtype=p_dtype))
self.assertTrue(isBinary(t))
@slowTest
@dtypes(*floating_types_and(torch.half))
@dtypesIfPRIVATEUSE1(*floating_types_and(torch.half))
def test_bernoulli_edge_cases(self, device, dtype):
a = torch.zeros(10000, 10000, dtype=dtype, device=device)
num_ones = (torch.bernoulli(a) == 1).sum()
self.assertEqual(num_ones, 0)
b = torch.ones(10000, 10000, dtype=dtype, device=device)
num_zeros = (torch.bernoulli(b) == 0).sum()
self.assertEqual(num_zeros, 0)
@dtypes(*(floating_types_and(torch.half, torch.bfloat16) if not device_is_910A else floating_types_and(torch.half)))
@skipIfMPS
def test_exponential(self, device, dtype):
a = torch.tensor([10], dtype=dtype, device=device).exponential_(0.5)
self.assertEqual(a.dtype, dtype)
self.assertEqual(a.size(), torch.Size([1]))
t = torch.empty((1,), device=device, dtype=dtype).exponential_(float('inf'))
self.assertTrue(t.item() == 0)
with self.assertRaises(RuntimeError):
torch.empty((1,), device=device, dtype=dtype).exponential_(-0.5)
@onlyPRIVATEUSE1
@dtypes(torch.half, torch.float)
def test_exponential_no_zero(self, device, dtype):
x = torch.empty(50000000, device=device, dtype=dtype).exponential_()
self.assertTrue(x.min() > 0)
def _generate_correlation_tensors(self, device, dtype):
yield make_tensor((0, 0), dtype=dtype, device=device)
yield make_tensor((1, 0), dtype=dtype, device=device)
yield make_tensor((0, 1), dtype=dtype, device=device)
yield make_tensor((2,), dtype=dtype, device=device)
yield make_tensor((2, 1), dtype=dtype, device=device)
yield make_tensor((2, 2), dtype=dtype, device=device)
yield make_tensor((2, 3), dtype=dtype, device=device)
yield make_tensor((5, 10), dtype=dtype, device=device)
yield make_tensor((5, 10), dtype=dtype, device=device, noncontiguous=True)
if dtype != torch.int:
yield torch.tensor([0, -2, nan, 10.2, inf], dtype=dtype, device=device)
@onlyNativeDeviceTypes
@dtypes(torch.int, torch.float, torch.cfloat)
def test_corrcoef(self, device, dtype):
for x in self._generate_correlation_tensors(device, dtype):
res = torch.corrcoef(x)
ref = np.corrcoef(x.cpu().numpy())
self.assertEqual(res, ref, atol=1e-04, rtol=1e-03, exact_dtype=False)
@skipRocmIfTorchInductor
@dtypes(torch.int, torch.float, torch.cfloat)
def test_cov(self, device, dtype):
def check(t, correction=1, fweights=None, aweights=None):
res = torch.cov(t, correction=correction, fweights=fweights, aweights=aweights)
t = t.cpu().numpy()
fweights = fweights.cpu().numpy() if fweights is not None else None
aweights = aweights.cpu().numpy() if aweights is not None else None
ref = np.cov(t, ddof=correction, fweights=fweights, aweights=aweights)
self.assertEqual(res, ref, atol=1e-05, rtol=1e-05, exact_dtype=False)
for x in self._generate_correlation_tensors(device, dtype):
check(x)
num_observations = x.numel() if x.ndim < 2 else x.size(1)
if num_observations > 0:
fweights = torch.randint(1, 10, (num_observations,), device=device)
aweights = make_tensor((num_observations,), dtype=torch.float, device=device, low=1)
for correction, fw, aw in product([0, 1, 2], [None, fweights], [None, aweights]):
check(x, correction, fweights, aweights)
@skipIfNoSciPy
@dtypes(*(floating_types_and(torch.half, torch.bfloat16) if not device_is_910A else floating_types_and(torch.half)))
def test_uniform_kstest(self, device, dtype):
from scipy import stats
size = 1000
for from_ in [-42, 0, 4.2]:
for to_ in [-4.2, 0, 42]:
if to_ > from_:
t = torch.empty(size, dtype=dtype, device=device).uniform_(from_, to_)
res = stats.kstest(t.cpu().to(torch.double), 'uniform', args=(from_, (to_ - from_)))
self.assertTrue(res.statistic < 0.1)
@skipIfNoSciPy
@dtypes(*floating_types_and(torch.half))
@dtypesIfPRIVATEUSE1(*(floating_types_and(torch.half, torch.bfloat16)
if not device_is_910A else floating_types_and(torch.half)))
def test_normal_kstest(self, device, dtype):
from scipy import stats
size = 1000
for mean in [-10, 0, 50]:
for std in [1, 5, 10]:
t = torch.empty(size, dtype=dtype, device=device).normal_(mean=mean, std=std)
res = stats.kstest(t.cpu().to(torch.double), 'norm', args=(mean, std))
self.assertTrue(res.statistic < 0.1)
@skipIfMPS
@skipIfNoSciPy
@skipRocmIfTorchInductor
@dtypes(*floating_types_and(torch.half, torch.bfloat16))
def test_lognormal_kstest(self, device, dtype):
from scipy import stats
size = 1000
for mean in [-3, 0, 7]:
for std in [1, 5, 7]:
t = torch.empty(size, dtype=dtype, device=device).log_normal_(mean=mean, std=std)
res = stats.kstest(t.cpu().to(torch.double), 'lognorm', args=(std, 0, math.exp(mean)))
if dtype == torch.half:
self.assertTrue(res.statistic < 0.3)
else:
self.assertTrue(res.statistic < 0.1)
@skipIfMPS
@skipIfNoSciPy
@dtypes(*(floating_types_and(torch.half, torch.bfloat16) if not device_is_910A else floating_types_and(torch.half)))
def test_exponential_kstest(self, device, dtype):
from scipy import stats
size = 1000
for lambd in [0.5, 1.0, 5.0]:
t = torch.empty(size, dtype=dtype, device=device).exponential_(lambd=lambd)
res = stats.kstest(t.cpu().to(torch.double), 'expon', args=(0, 1 / lambd,))
self.assertTrue(res.statistic < 0.1)
@skipIfMPS
@skipIfNoSciPy
@skipRocmIfTorchInductor
@dtypes(*floating_types_and(torch.half, torch.bfloat16))
def test_cauchy_kstest(self, device, dtype):
from scipy import stats
size = 1000
for median in [-10, 0, 50]:
for sigma in [0.5, 1.0, 10.0]:
t = torch.empty(size, dtype=dtype, device=device).cauchy_(median=median, sigma=sigma)
res = stats.kstest(t.cpu().to(torch.double), 'cauchy', args=(median, sigma))
self.assertTrue(res.statistic < 0.1)
@slowTest
@onlyPRIVATEUSE1
@dtypes(torch.bfloat16, torch.float32)
def test_cauchy_no_inf(self, device, dtype):
for _ in range((2**16) * 2):
x = torch.empty((2**16), dtype=dtype, device=device)
x.cauchy_()
self.assertFalse(x.isinf().sum())
@dtypes(*floating_types_and(torch.half, torch.bfloat16))
def test_cauchy(self, device, dtype):
a = torch.tensor([10], dtype=dtype, device=device).cauchy_(0.0, 0.5)
self.assertEqual(a.dtype, dtype)
self.assertEqual(a.size(), torch.Size([1]))
t = torch.empty((1,), device=device, dtype=dtype).cauchy_(float('inf'), 0.5)
self.assertTrue(t.item() == float('inf'))
with self.assertRaises(RuntimeError):
torch.empty((1,), device=device, dtype=dtype).cauchy_(0.0, 0.0)
@skipIfMPS
@skipIfNoSciPy
@skipRocmIfTorchInductor
@dtypes(*all_types_and(torch.half, torch.bfloat16))
def test_geometric_kstest(self, device, dtype):
from scipy import stats
size = 1000
for p in [0.2, 0.5, 0.8]:
t = torch.empty(size, dtype=dtype, device=device).geometric_(p=p)
actual = np.histogram(t.cpu().to(torch.double), np.arange(1, 100))[0]
expected = stats.geom(p).pmf(np.arange(1, 99)) * size
res = stats.chisquare(actual, expected)
self.assertEqual(res.pvalue, 1.0, atol=0.1, rtol=0)
def test_pairwise_distance_empty(self, device):
shape = (2, 0)
x = torch.randn(shape, device=device)
y = torch.randn(shape, device=device)
self.assertEqual(torch.zeros(2, device=device), torch.pairwise_distance(x, y))
self.assertEqual(torch.zeros((2, 1), device=device), torch.pairwise_distance(x, y, keepdim=True))
shape = (0, 2)
x = torch.randn(shape, device=device)
y = torch.randn(shape, device=device)
self.assertEqual(torch.zeros(0, device=device), torch.pairwise_distance(x, y))
self.assertEqual(torch.zeros((0, 1), device=device), torch.pairwise_distance(x, y, keepdim=True))
def test_pdist_empty(self, device):
shape = (0, 2)
x = torch.randn(shape, device=device)
self.assertEqual(torch.empty(0, device=device), torch.pdist(x))
shape = (1, 2)
x = torch.randn(shape, device=device)
self.assertEqual(torch.empty(0, device=device), torch.pdist(x))
shape = (3, 0)
x = torch.randn(shape, device=device)
self.assertEqual(torch.zeros(3, device=device), torch.pdist(x))
def test_cdist_empty(self, device):
x = torch.randn((0, 5), device=device)
y = torch.randn((4, 5), device=device)
self.assertEqual(torch.empty(0, 4, device=device), torch.cdist(x, y))
x = torch.randn((2, 5), device=device)
y = torch.randn((0, 5), device=device)
self.assertEqual(torch.empty(2, 0, device=device), torch.cdist(x, y))
x = torch.randn((2, 0), device=device)
y = torch.randn((3, 0), device=device)
self.assertEqual(torch.zeros(2, 3, device=device), torch.cdist(x, y))
x = torch.randn((2, 0), device=device)
y = torch.randn((0, 0), device=device)
self.assertEqual(torch.empty(2, 0, device=device), torch.cdist(x, y))
def _brute_cdist(self, x, y, p=2):
r1 = x.shape[-2]
r2 = y.shape[-2]
if r1 == 0 or r2 == 0:
return torch.empty(r1, r2, device=x.device)
return torch.norm(x[..., None, :] - y[..., None, :, :], p=p, dim=-1)
@skipIfMPS
def test_cdist_norm(self, device):
for r1 in [3, 4, 5, 6]:
for m in [2, 3, 4, 10]:
for r2 in [4, 6, 7, 8]:
for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]:
x = torch.randn(r1, m, device=device)
y = torch.randn(r2, m, device=device)
if p == 2:
for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']:
actual = torch.cdist(x, y, p=2, compute_mode=cm)
expected = self._brute_cdist(x, y, p=2)
self.assertEqual(expected, actual, rtol=0, atol=0.02)
else:
actual = torch.cdist(x, y, p=p)
expected = self._brute_cdist(x, y, p=p)
self.assertEqual(expected, actual)
@skipIfMPS
def test_cdist_norm_batch(self, device):
for r1 in [3, 4, 5, 6]:
for m in [2, 3, 4, 10]:
for r2 in [4, 6, 7, 8]:
for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]:
x = torch.randn(2, 3, 6, r1, m, device=device)
y = torch.randn(2, 3, 6, r2, m, device=device)
if p == 2:
for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']:
actual = torch.cdist(x, y, p=2, compute_mode=cm)
expected = self._brute_cdist(x, y, p=2)
self.assertEqual(expected, actual, rtol=0, atol=0.02)
else:
actual = torch.cdist(x, y, p=p)
expected = self._brute_cdist(x, y, p=p)
self.assertEqual(expected, actual)
@onlyPRIVATEUSE1
def test_cdist_NPU_backward(self, device):
for l1 in [1, 511, 513]:
for l2 in [1, 511, 513]:
for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]:
x1 = torch.randn(4, l1, 32, device=device, requires_grad=True)
x2 = x1.clone().detach_().requires_grad_()
y1 = torch.randn(4, l2, 32, device=device, requires_grad=True)
y2 = y1.clone().detach_().requires_grad_()
if p == 2:
for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']:
z1 = torch.cdist(x1, y1, p=2, compute_mode=cm).mean()
z2 = self._brute_cdist(x2, y2, p=2).mean()
z1.backward()
z2.backward()
self.assertEqual(x1.grad, x2.grad, rtol=0, atol=0.001)
self.assertEqual(y1.grad, y2.grad, rtol=0, atol=0.001)
else:
z1 = torch.cdist(x1, y1, p=p).mean()
z2 = self._brute_cdist(x2, y2, p=p).mean()
self.assertEqual(x1.grad, x2.grad, rtol=0, atol=0.001)
self.assertEqual(y1.grad, y2.grad, rtol=0, atol=0.001)
@tf32_on_and_off(0.05 if TEST_WITH_ROCM else 0.005)
@reduced_f32_on_and_off(0.08)
def test_cdist_large(self, device):
for cm in ['use_mm_for_euclid_dist_if_necessary', 'use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']:
x = torch.randn(1000, 10, device=device)
y = torch.randn(1000, 10, device=device)
actual = torch.cdist(x, y, p=2, compute_mode=cm)
expected = self._brute_cdist(x, y, p=2)
self.assertEqual(expected, actual)
@slowTest
@tf32_on_and_off(0.01)
@reduced_f32_on_and_off(0.08)
def test_cdist_large_batch(self, device):
for cm in ['use_mm_for_euclid_dist_if_necessary', 'use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']:
x = torch.randn(4, 3, 1000, 10, device=device)
y = torch.randn(4, 3, 1000, 10, device=device)
actual = torch.cdist(x, y, p=2, compute_mode=cm)
expected = self._brute_cdist(x, y, p=2)
self.assertEqual(expected, actual)
@tf32_on_and_off(0.005)
@reduced_f32_on_and_off(0.04)
def test_cdist_non_contiguous(self, device):
for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']:
x = torch.randn(5, 7, device=device).mT
y = torch.randn(5, 3, device=device).mT
actual = torch.cdist(x, y, p=2, compute_mode=cm)
expected = self._brute_cdist(x, y, p=2)
self.assertFalse(x.is_contiguous())
self.assertFalse(y.is_contiguous())
self.assertEqual(expected, actual)
x = torch.randn(7, 5, device=device)
y = torch.randn(5, 3, device=device).t()
actual = torch.cdist(x, y, p=2, compute_mode=cm)
expected = self._brute_cdist(x, y, p=2)
self.assertTrue(x.is_contiguous())
self.assertFalse(y.is_contiguous())
self.assertEqual(expected, actual)
x = torch.randn(5, 7, device=device).t()
y = torch.randn(3, 5, device=device)
actual = torch.cdist(x, y, p=2, compute_mode=cm)
expected = self._brute_cdist(x, y, p=2)
self.assertFalse(x.is_contiguous())
self.assertTrue(y.is_contiguous())
self.assertEqual(expected, actual)
@tf32_on_and_off(0.005)
@reduced_f32_on_and_off(0.04)
def test_cdist_non_contiguous_batch(self, device):
for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']:
x = torch.randn(4, 3, 2, 5, 7, device=device).mT
y = torch.randn(4, 3, 2, 5, 3, device=device).mT
actual = torch.cdist(x, y, p=2, compute_mode=cm)
expected = self._brute_cdist(x, y, p=2)
self.assertFalse(x.is_contiguous())
self.assertFalse(y.is_contiguous())
self.assertEqual(expected, actual)
x = torch.randn(7, 2, 7, 5, device=device)
y = torch.randn(7, 2, 5, 3, device=device).mT
actual = torch.cdist(x, y, p=2, compute_mode=cm)
expected = self._brute_cdist(x, y, p=2)
self.assertTrue(x.is_contiguous())
self.assertFalse(y.is_contiguous())
self.assertEqual(expected, actual)
x = torch.randn(4, 5, 7, device=device).mT
y = torch.randn(4, 3, 5, device=device)
actual = torch.cdist(x, y, p=2, compute_mode=cm)
expected = self._brute_cdist(x, y, p=2)
self.assertFalse(x.is_contiguous())
self.assertTrue(y.is_contiguous())
self.assertEqual(expected, actual)
def test_cdist_euclidean_large(self, device):
def _test_euclidean_large_cdist(sizex, sizey=None):
if sizey is None:
sizey = sizex
x = torch.randn(sizex, device=device, dtype=torch.float)
y = torch.randn(sizey, device=device, dtype=torch.float)
eps = 1e-6
x = x - (((x - y) < eps).float() * 2 * eps)
x.requires_grad = True
y.requires_grad = True
dist = torch.cdist(x, y, p=2)
loss = dist.sum()
loss.backward()
_test_euclidean_large_cdist((2000, 5))
@skipIfMPS
def test_cdist_grad_p_lt_1_no_nan(self, device):
for p in [0.99, 0.7, 0.5, 0.1, 0.01]:
x = torch.randn(1, 2, device=device)
y = x.detach().clone() + torch.tensor([[1., 0.]], device=device)
x.requires_grad = True
y.requires_grad = True
result = torch.cdist(x, y, p=p)
result.backward(torch.ones_like(result))
self.assertFalse(torch.isnan(x.grad).any())
self.assertFalse(torch.isnan(y.grad).any())
def test_cdist_same_inputs(self, device):
sizex = (1, 27, 32)
for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]:
x = torch.randn(sizex, device=device, dtype=torch.float)
dist_grad = torch.randn((1, 27, 27), device=device, dtype=torch.float)
y = x.clone()
x.requires_grad = True
d = torch.cdist(x, y)
d.backward(dist_grad)
assert torch.isfinite(x.grad).all()
@skipIfMPS
def test_cumsum(self, device):
x = torch.rand(100, 100, device=device)
res1 = torch.cumsum(x, 1)
res2 = torch.tensor([]).to(device)
torch.cumsum(x, 1, out=res2)
self.assertEqual(res1, res2)
x.cumsum_(1)
self.assertEqual(res1, x)
a = torch.tensor([[True, False, True],
[False, False, False],
[True, True, True]], device=device)
b = a.byte()
aRes = torch.cumsum(a, 0)
bRes = torch.cumsum(b, 0)
self.assertEqual(aRes, bRes)
self.assertEqual(aRes, torch.tensor([[1, 0, 1],
[1, 0, 1],
[2, 1, 2]]))
aRes = torch.cumsum(a, 1)
bRes = torch.cumsum(b, 1)
self.assertEqual(aRes, bRes)
self.assertEqual(aRes, torch.tensor([[1, 1, 2],
[0, 0, 0],
[1, 2, 3]]))
shapes = [[2, 0], [2, 1, 4], [0, 2, 3], [1], [5]]
for shape in shapes:
for dim in range(len(shape)):
raw_tensor = torch.zeros(*shape, requires_grad=True)
integrated = raw_tensor.cumsum(dim=dim)
integrated.sum().backward()
self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape)
raw_tensor = torch.tensor(3., requires_grad=True)
integrated = raw_tensor.cumsum(dim=-1)
self.assertEqual(raw_tensor, integrated)
integrated.sum().backward()
self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape)
@skipIfMPS
def test_cumprod(self, device):
x = torch.rand(100, 100, device=device)
res1 = torch.cumprod(x, 1)
res2 = torch.tensor([]).to(device)
if not TEST_WITH_TORCHINDUCTOR:
torch.cumprod(x, 1, out=res2)
self.assertEqual(res1, res2)
x.cumprod_(1)
self.assertEqual(res1, x)
a = torch.tensor([[True, False, True],
[False, False, False],
[True, True, True]], dtype=torch.bool, device=device)
b = a.byte()
aRes = torch.cumprod(a, 0)
bRes = torch.cumprod(b, 0)
self.assertEqual(aRes, bRes)
self.assertEqual(aRes, torch.tensor([[1, 0, 1],
[0, 0, 0],
[0, 0, 0]]))
aRes = torch.cumprod(a, 1)
bRes = torch.cumprod(b, 1)
self.assertEqual(aRes, bRes)
self.assertEqual(aRes, torch.tensor([[1, 0, 0],
[0, 0, 0],
[1, 1, 1]]))
shapes = [[2, 0], [2, 1, 4], [0, 2, 3], [1], [5]]
for shape in shapes:
for dim in range(len(shape)):
raw_tensor = torch.zeros(*shape, requires_grad=True)
integrated = raw_tensor.cumprod(dim=dim)
integrated.sum().backward()
self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape)
raw_tensor = torch.tensor(3., requires_grad=True)
integrated = raw_tensor.cumprod(dim=-1)
self.assertEqual(raw_tensor, integrated)
integrated.sum().backward()
self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape)
@skipIfMPS
def test_cummax_cummin(self, device):
def test_ops(op, string_of_function_name, expected_output1, expected_output2):
x = torch.rand(100, 100, device=device)
out1 = op(x, 1)
res2 = torch.empty(0, device=device)
indices2 = torch.empty(0, dtype=torch.int64, device=device)
op(x, 1, out=(res2, indices2))
self.assertEqual(out1[0], res2)
self.assertEqual(out1[1], indices2)
a = torch.tensor([[True, False, True],
[False, False, False],
[True, True, True]], dtype=torch.bool, device=device)
b = a.byte()
aRes = op(a, 0)
bRes = op(b, 0)
self.assertEqual(aRes[0], bRes[0].bool())
self.assertEqual(aRes[0], expected_output1.bool())
x = torch.tensor([4, inf, 1.5, -inf, 0, nan, 1])
xRes = op(x, 0)[0]
self.assertEqual(xRes, expected_output2)
t = torch.randn(10)
values = torch.empty(0, dtype=torch.int16)
indices = torch.empty(0, dtype=torch.int64)
with self.assertRaisesRegex(
RuntimeError,
'expected scalar_type Float but found Short'):
op(t, 0, out=(values, indices))
x = torch.rand([])
dim = 100
with self.assertRaisesRegex(
IndexError,
'Expected reduction dim -1 or 0 for scalar but got 100'):
op(x, dim)
shapes = [[2, 0], [2, 1, 4], [0, 2, 3], [1], [5]]
for shape in shapes:
for dim in range(len(shape)):
raw_tensor = torch.zeros(*shape, requires_grad=True)
integrated = getattr(raw_tensor, string_of_function_name)(dim=dim)
integrated[0].sum().backward()
self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape)
raw_tensor = torch.tensor(3., requires_grad=True)
integrated = getattr(raw_tensor, string_of_function_name)(dim=-1)
integrated[0].sum().backward()
self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape)
expected_out = torch.tensor([4, inf, inf, inf, inf, nan, nan])
test_ops(torch.cummax, "cummax", torch.tensor([[1, 0, 1],
[1, 0, 1],
[1, 1, 1]]), expected_out)
expected_out = torch.tensor([4, 4, 1.5, -inf, -inf, nan, nan])
test_ops(torch.cummin, "cummin", torch.tensor([[1, 0, 1],
[0, 0, 0],
[0, 0, 0]]), expected_out)
@skipIfMPS
@unittest.skipIf(device_is_910A, "float('inf')/float('nan') is not supported on 910A")
def test_logcumsumexp(self, device):
def logcumsumexp(a, axis):
return torch.cumsum(a.exp(), axis=axis).log_()
axis = -1
a = torch.randn(100, 100, device=device)
actual = a.logcumsumexp(axis)
expected = logcumsumexp(a, axis)
self.assertEqual(a.dtype, actual.dtype)
self.assertEqual(expected.shape, actual.shape)
self.assertEqual(expected, actual)
x = torch.tensor([-float('inf'), -float('inf'), 1.0, 1.0, float('inf'),
float('inf'), float('nan'), 1.0, 1.0], device=device)
x2d = x.unsqueeze(0).expand(2, -1)
for inp in (x, x2d):
actual = inp.logcumsumexp(axis)
expected = logcumsumexp(inp, axis)
self.assertEqual(expected, actual)
b = torch.randn(5, 2, device=device)
inplace_out = torch.zeros(5, 2, device=device)
expected = logcumsumexp(b, axis)
torch.logcumsumexp(b, axis=axis, out=inplace_out)
self.assertEqual(inplace_out, expected)
b = torch.randn(5, 2, device=device, dtype=torch.float64)
inplace_out = torch.zeros(5, 2, device=device, dtype=torch.float32)
with self.assertRaisesRegex(
RuntimeError,
'expected scalar_type Double but found Float'):
torch.logcumsumexp(b, axis, out=inplace_out)
def _test_diff_numpy(self, t, dims=None):
def to_np(t):
if t.dtype == torch.bfloat16:
return t.to(dtype=torch.float, device="cpu").numpy()
else:
return t.cpu().numpy()
for dim in dims if dims else range(t.dim()):
prepend = t.narrow(dim, 0, 1)
append = t.narrow(dim, 0, 1)
np_t = to_np(t)
for n in range(t.size(dim)):
actual = torch.diff(t, dim=dim, n=n)
expected = torch.from_numpy(np.diff(np_t, axis=dim, n=n))
self.assertEqual(actual, expected.to(t.dtype))
for n in range(1, t.size(dim) + 4):
actual = torch.diff(t, dim=dim, n=n, prepend=prepend, append=append)
expected = torch.from_numpy(np.diff(np_t, axis=dim, n=n, prepend=to_np(prepend), append=to_np(append)))
self.assertEqual(actual, expected.to(t.dtype))
for n in range(1, t.size(dim) * 3):
actual = torch.diff(t, dim=dim, n=n, prepend=t, append=t)
expected = torch.from_numpy(np.diff(np_t, axis=dim, n=n, prepend=np_t, append=np_t))
self.assertEqual(actual, expected.to(t.dtype))
@onlyNativeDeviceTypes
@dtypes(*all_types_and_complex_and(torch.half, torch.bool))
def test_diff_noncontig(self, device, dtype):
shapes = (
(1,),
(1, 5),
(3, 5),
(1, 5, 1),
(2, 3, 5))
for shape in shapes:
contig = make_tensor(shape, dtype=dtype, device=device, low=-9, high=9)
non_contig = torch.empty(shape + (2, 2), device=device, dtype=dtype)[..., 0]
non_contig = non_contig.select(-1, -1)
non_contig.copy_(contig)
self.assertTrue(not non_contig.is_contiguous() or shape == (1,))
self._test_diff_numpy(non_contig)
@dtypes(*all_types_and_complex_and(torch.bool))
@dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool))
@dtypesIfPRIVATEUSE1(*all_types_and_complex_and(torch.half, torch.bool))
def test_diff(self, device, dtype):
shapes = (
(1,),
(1, 5),
(3, 5),
(1, 5, 1),
(2, 3, 5))
for shape in shapes:
contig = make_tensor(shape, dtype=dtype, device=device, low=-9, high=9)
self._test_diff_numpy(contig)
t = torch.ones(2, 3)
with self.assertRaisesRegex(
RuntimeError, 'diff expects prepend or append to be the same dimension as input'):
invalid_prepend = torch.tensor([1, 2, 3], device=device, dtype=dtype)
t.diff(dim=0, prepend=invalid_prepend)
with self.assertRaisesRegex(
RuntimeError, 'diff expects the shape of tensor to prepend or append to match that of input'):
invalid_prepend = torch.tensor([[0, 1]], device=device, dtype=dtype)
t.diff(dim=0, prepend=invalid_prepend)
with self.assertRaisesRegex(
RuntimeError, 'diff expects input to be at least one-dimensional'):
scalar = torch.tensor(2, device=device, dtype=dtype)
torch.diff(scalar)
def _wrap_to_list(self, input_array):
return list(input_array) if isinstance(input_array, (list, tuple)) else [input_array]
def _inf_nan_preprocess(self, actual, expected):
for i in range(len(expected)):
expected[i] = np.nan_to_num(expected[i], nan=nan, posinf=nan, neginf=nan)
if actual[i].dtype == torch.complex64:
actual[i].real = torch.nan_to_num(actual[i].real, nan=nan, posinf=nan, neginf=nan)
actual[i].imag = torch.nan_to_num(actual[i].imag, nan=nan, posinf=nan, neginf=nan)
else:
actual[i] = torch.nan_to_num(actual[i], nan=nan, posinf=nan, neginf=nan)
return actual, expected
@onlyNativeDeviceTypes
@dtypes(torch.long, torch.float32, torch.complex64)
def test_gradient_all(self, device, dtype):
def create_scalar(shape):
return make_tensor((1,), device='cpu', dtype=dtype, low=1.).item()
def create_list(shape):
return make_tensor((len(shape),), device='cpu', dtype=dtype, low=1.).tolist()
def create_coordinate_tensors(shape):
tensor_list = []
for i in range(len(shape)):
tensor_list.append(make_tensor((shape[i],), device=device, dtype=dtype))
return tensor_list
def filter_shape(shape, dim):
filtered_shape = []
for i in range(len(dim)):
filtered_shape.append(shape[dim[i]])
return filtered_shape
test_cases = (
((5,), (0,)),
((4, 4), (0, 1)),
((3, 3, 3), (-1, 0)),
((4, 4, 4), (2,)),
((4, 4, 4), (0, 1)),
((4, 4, 4, 3), (0, 2, 3)),
((4, 5, 3, 4, 3), (1, 2)),
((4, 3, 6, 5, 3), (2, 4)),
((4, 3, 3, 5, 3), (0, 1, 2, 3, 4)),
((1, 3, 3), (1, 2)),
((1, 5), (1,)),
)
for case, contig, edge_order, space_fn in product(test_cases, [True, False], [1, 2],
(create_scalar, create_list, create_coordinate_tensors)):
shape, dims = case
filtered_shape = filter_shape(shape, dims)
spacing = space_fn(filtered_shape)
t = make_tensor(shape, device=device, dtype=dtype, noncontiguous=not contig)
t_np = t.cpu().numpy()
actual = torch.gradient(t, spacing=spacing, dim=dims, edge_order=edge_order)
if space_fn == create_coordinate_tensors and spacing[0].device != 'cpu':
spacing = [space.cpu().detach().numpy() for space in spacing]
expected = np.gradient(t_np, *self._wrap_to_list(spacing), axis=dims, edge_order=edge_order)
actual, expected = self._inf_nan_preprocess(list(actual), self._wrap_to_list(expected))
self.assertEqual(actual, expected, equal_nan=True, atol=1e-4, rtol=0, exact_dtype=False)
@onlyNativeDeviceTypes
@slowTestIf(TEST_WITH_TORCHINDUCTOR)
@dtypes(torch.long, torch.float32, torch.complex64)
def test_gradient_extreme_cases(self, device, dtype):
actual = torch.gradient(torch.tensor([2, -2, inf, inf, -inf, -inf, inf, 3, -inf, 2, nan, nan, 3, inf, nan]))
expected = np.gradient(np.array([2, -2, inf, inf, -inf, -inf, inf, 3, -inf, 2, nan, nan, 3, inf, nan]))
self.assertEqual(actual, self._wrap_to_list(expected), exact_dtype=False)
large_size = 100000
t = make_tensor((large_size,), dtype=dtype, device=device)
t_np = t.cpu().numpy()
coordinates_np = np.random.randn(large_size)
coordinates = [torch.tensor(coordinates_np, device=device)]
actual = torch.gradient(t, spacing=coordinates, dim=0, edge_order=1)
expected = [np.gradient(t_np, coordinates_np, axis=0, edge_order=1)]
self.assertEqual(actual, expected, exact_dtype=False)
actual = torch.gradient(t, spacing=coordinates, dim=0, edge_order=2)
expected = [np.gradient(t_np, coordinates_np, axis=0, edge_order=2)]
self.assertEqual(actual, expected, exact_dtype=False)
@onlyNativeDeviceTypes
def test_gradient_type_promotion(self, device):
inputs = (
make_tensor((4, 4), device=device, dtype=torch.float32),
make_tensor((4, 4), device=device, dtype=torch.complex64),
make_tensor((4, 4), device=device, dtype=torch.int64),
)
spacing = (
make_tensor((1,), device='cpu', dtype=torch.float32).item(),
make_tensor((1,), device='cpu', dtype=torch.int64).item(),
make_tensor((1,), device='cpu', dtype=torch.complex64).item(),
make_tensor((2,), device='cpu', dtype=torch.float32, low=0.1).tolist(),
make_tensor((2,), device='cpu', dtype=torch.int64, low=1).tolist(),
make_tensor((2,), device='cpu', dtype=torch.complex64).tolist(),
[make_tensor((4,), device=device, dtype=torch.float32),
make_tensor((4,), device=device, dtype=torch.float32)],
[make_tensor((4,), device=device, dtype=torch.int64),
make_tensor((4,), device=device, dtype=torch.int64)],
[make_tensor((4,), device=device, dtype=torch.complex64),
make_tensor((4,), device=device, dtype=torch.complex64)],
)
for input_, spacing_or_coord, edge_order in product(inputs, spacing, [1, 2]):
input_np = input_.cpu().numpy()
input_np = input_.cpu().numpy()
actual = torch.gradient(input_, spacing=spacing_or_coord, dim=(0, 1), edge_order=edge_order)
spacing_or_coord_wrapped = self._wrap_to_list(spacing_or_coord)
spacing_or_coord_np = []
if torch.is_tensor(spacing_or_coord_wrapped[0]) and torch.device(spacing_or_coord_wrapped[0].device).type != 'cpu':
for i in range(len(spacing_or_coord_wrapped)):
spacing_or_coord_np.append(spacing_or_coord_wrapped[i].detach().clone().cpu().numpy())
else:
spacing_or_coord_np = spacing_or_coord_wrapped
expected = np.gradient(input_np, *spacing_or_coord_np, axis=(0, 1), edge_order=edge_order)
if actual[0].dtype == torch.complex64 and input_.dtype != torch.complex64:
for i in range(len(actual)):
self.assertEqual(actual[i].real, expected[i].real, exact_dtype=False)
self.assertEqual(expected[i].imag, torch.zeros(actual[i].shape), exact_dtype=False)
else:
actual, expected = self._inf_nan_preprocess(list(actual), list(expected))
self.assertEqual(actual, expected, equal_nan=True, exact_dtype=False)
@onlyNativeDeviceTypes
@dtypes(torch.long, torch.float32, torch.complex64)
def test_gradient_spacing_list_length_error(self, device, dtype):
t = make_tensor((2, 2), device=device, dtype=dtype)
spacing = (make_tensor((2,), device=device, dtype=dtype),)
with self.assertRaisesRegex(RuntimeError, r'expected spacing to be'):
torch.gradient(t, spacing=spacing)
spacing = (make_tensor((2,), device=device, dtype=dtype),) * 2
torch.gradient(t, spacing=spacing)
spacing = (make_tensor((2,), device=device, dtype=dtype),) * 3
with self.assertRaisesRegex(RuntimeError, r'expected spacing to be'):
torch.gradient(t, spacing=spacing)
spacing = (2,)
with self.assertRaisesRegex(RuntimeError, r'expected spacing to be'):
torch.gradient(t, spacing=spacing)
spacing = (2, 2)
torch.gradient(t, spacing=spacing)
spacing = (2, 2, 2)
with self.assertRaisesRegex(RuntimeError, r'expected spacing to be'):
torch.gradient(t, spacing=spacing)
def _test_large_cum_fn_helper(self, x, fn):
expected = fn(x.cpu().float())
actual = fn(x).cpu().float()
torch.testing.assert_close(expected, actual)
@unittest.skipIf(IS_FBCODE and IS_REMOTE_GPU, "sandcastle OOM with current tpx gpu/re configuration")
@unittest.skipIf(IS_JETSON, "psutil issue for largeTensorTest. Too large for Jetson.")
@onlyPRIVATEUSE1
@dtypes(torch.half)
@largeTensorTest('25GB', device='cpu')
@largeTensorTest('4GB', device='npu')
def test_large_cumsum(self, device, dtype):
x = torch.empty(2**30 + 200, device=device, dtype=dtype)
x[::3] = -3
x[1::3] = 2
x[2::3] = 1
self._test_large_cum_fn_helper(x, lambda x: torch.cumsum(x, 0))
@onlyPRIVATEUSE1
@dtypes(torch.half)
@largeTensorTest('25GB', device='cpu')
@largeTensorTest('4GB', device='npu')
@unittest.skipIf(IS_JETSON, "psutil issue for largeTensorTest. Too large for Jetson.")
def test_large_cumprod(self, device, dtype):
x = torch.empty(2**30 + 200, device=device, dtype=dtype)
x[::3] = 8
x[1::3] = .25
x[2::3] = .5
self._test_large_cum_fn_helper(x, lambda x: torch.cumprod(x, 0))
@skipIfTorchDynamo("Torchdynamo fails with unknown reason")
@skipIfMPS
def test_discontiguous_out_cumsum(self, device):
x = torch.randn(4, 8, device=device)
y = torch.empty(4, 16, device=device)[:, ::2]
out = torch.cumsum(x, 0)
torch.cumsum(x, 0, out=y)
self.assertFalse(y.is_contiguous())
self.assertEqual(out, y, atol=0., rtol=0.)
def _test_cumminmax_helper(self, x, fn, expected_val, expected_ind):
val, ind = fn(x, -1)
self.assertEqual(val, expected_val, atol=0, rtol=0)
self.assertEqual(ind, expected_ind, atol=0, rtol=0)
out_val = torch.empty_like(val).t().contiguous().t()
out_ind = torch.empty_like(ind).t().contiguous().t()
fn(x, -1, out=(out_val, out_ind))
if not TEST_WITH_TORCHINDUCTOR:
self.assertFalse(out_val.is_contiguous())
self.assertFalse(out_ind.is_contiguous())
self.assertEqual(out_val, expected_val, atol=0, rtol=0)
self.assertEqual(out_ind, expected_ind, atol=0, rtol=0)
@skipIfMPS
def test_cummax_discontiguous(self, device):
x = torch.tensor([[0, 1, 2, 3, 2, 1], [4, 5, 6, 5, 6, 7]], device=device, dtype=torch.float).t().contiguous().t()
expected_val = torch.tensor([[0, 1, 2, 3, 3, 3], [4, 5, 6, 6, 6, 7]], device=device, dtype=torch.float)
expected_ind = torch.tensor([[0, 1, 2, 3, 3, 3], [0, 1, 2, 2, 4, 5]], device=device, dtype=torch.long)
self._test_cumminmax_helper(x, torch.cummax, expected_val, expected_ind)
@skipIfMPS
def test_cummin_discontiguous(self, device):
x = torch.tensor([[3, 2, 1, 0, 1, 2], [7, 6, 5, 4, 5, 2]], device=device, dtype=torch.float).t().contiguous().t()
expected_val = torch.tensor([[3, 2, 1, 0, 0, 0], [7, 6, 5, 4, 4, 2]], device=device, dtype=torch.float)
expected_ind = torch.tensor([[0, 1, 2, 3, 3, 3], [0, 1, 2, 3, 3, 5]], device=device, dtype=torch.long)
self._test_cumminmax_helper(x, torch.cummin, expected_val, expected_ind)
def test_bool_tensor_value_change(self, device):
x = torch.tensor([True, False], dtype=torch.bool, device=device)
x[0] = False
x[1] = True
self.assertEqual(x, torch.tensor([False, True], dtype=torch.bool, device=device))
def test_copy_all_dtypes_and_devices(self, device):
from copy import copy
if not device_is_910A:
dtypes_ = all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)
else:
dtypes_ = all_types_and(torch.half, torch.bool)
for dt in dtypes_:
x = torch.tensor([1, 2, 3, 4], dtype=dt, device=device)
x_clone = x.clone()
y = copy(x)
y.fill_(1)
self.assertEqual(x, y)
@onlyCPU
def test_bfloat16_neg_abs(self, device):
src = torch.randn(256)
src[0] = torch.nan
src[1] = -torch.nan
src[2] = torch.inf
src[3] = -torch.inf
src_bf16 = src.bfloat16()
self.assertEqual(src.neg().bfloat16(), src_bf16.neg())
self.assertEqual(src.abs().bfloat16(), src_bf16.abs())
@onlyCPU
@dtypes(torch.bfloat16, torch.half)
def test_reduced_type_float_copy(self, device, dtype):
for shape in [(20, 7), (249, 137), (1029, 917), (1, 7, 19, 17), (3, 77, 1091)]:
input_ = torch.randn(shape, dtype=torch.float, device=device)
out1 = input_.to(dtype=dtype)
self.assertEqual(input_, out1, atol=None, rtol=None, exact_dtype=False)
out2 = out1.to(torch.float)
self.assertEqual(out2, out1, atol=0, rtol=0, exact_dtype=False)
input_s = input_[..., ::2, :]
out1 = input_s.to(dtype=dtype)
self.assertEqual(input_s, out1, atol=None, rtol=None, exact_dtype=False)
out2 = out1.to(torch.float)
self.assertEqual(out2, out1, atol=0, rtol=0, exact_dtype=False)
@onlyNativeDeviceTypes
def test_copy_math_view(self, device):
for dst_dtype, src_dtype in [
(torch.float32, torch.float32),
(torch.float64, torch.float32),
(torch.int64, torch.int32),
(torch.complex128, torch.complex64),
]:
src = make_tensor((100,), dtype=src_dtype, device=device)
dst = torch.empty(100, dtype=dst_dtype, device=device)
dst.copy_(src)
self.assertEqual(dst, src, exact_dtype=False)
dst.copy_(src._neg_view())
self.assertEqual(dst, src.neg(), exact_dtype=False)
dst._neg_view().copy_(torch._neg_view(src))
self.assertEqual(dst, src, exact_dtype=False)
dst._neg_view().copy_(src)
self.assertEqual(dst, src.neg(), exact_dtype=False)
dst._neg_view().copy_(dst)
self.assertEqual(dst, src, exact_dtype=False)
for dst_dtype, src_dtype in [
(torch.complex64, torch.complex64),
(torch.complex128, torch.complex64),
]:
src = make_tensor((100,), dtype=src_dtype, device=device)
dst = torch.empty(100, dtype=dst_dtype, device=device)
dst.conj().copy_(src)
self.assertEqual(dst, src.conj_physical(), exact_dtype=False)
dst.conj().copy_(src._neg_view())
self.assertEqual(dst, src.neg().conj_physical(), exact_dtype=False)
@onlyNativeDeviceTypes
@dtypes(torch.int64, torch.float32, torch.complex64)
def test_copy_transpose_math_view(self, device, dtype):
src = make_tensor((100, 100), dtype=dtype, device=device).transpose(0, 1)
dst = torch.empty((100, 100), dtype=dtype, device=device)
dst._neg_view().copy_(src)
self.assertEqual(dst, -src)
dst._neg_view().copy_(src._neg_view())
self.assertEqual(dst, src)
dst.copy_(src._neg_view())
self.assertEqual(dst, -src)
if dtype.is_complex:
dst.conj().copy_(src)
self.assertEqual(dst, src.conj_physical())
dst.conj().copy_(src.conj())
self.assertEqual(dst, src)
dst.copy_(src.conj())
self.assertEqual(dst, src.conj_physical())
def test_clone_all_dtypes_and_devices(self, device):
if not device_is_910A:
dtypes_ = all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)
else:
dtypes_ = all_types_and(torch.half, torch.bool)
for dt in dtypes_:
x = torch.tensor((1, 1), dtype=dt, device=device)
y = x.clone()
self.assertEqual(x, y)
def test_clone_zero_stride_dim(self, device):
x = torch.randn(10)
y = x.as_strided([2, 1, 5], [1, 0, 2])
self.assertEqual(y, y.clone())
def test_clone_not_memory_dense(self):
x = torch.randn(10, 8).t()[::2, ::2]
y = x.clone()
self.assertTrue(y.stride() == (1, 4))
@parametrize("use_cpu_scalar", [True, False])
@dtypesIfPRIVATEUSE1(*set(get_all_math_dtypes('npu')))
@dtypes(*set(get_all_math_dtypes('cpu')))
def test_addcmul(self, device, dtype, use_cpu_scalar):
def _number(floating, integer, dtype):
if dtype in [torch.half, torch.float, torch.double, torch.bfloat16]:
return floating
elif dtype in [torch.cfloat, torch.cdouble]:
return floating * (1 + 1j)
else:
return integer
def rand_tensor(size, dtype, device):
if dtype.is_floating_point or dtype.is_complex:
return torch.rand(size=size, dtype=dtype, device=device)
if dtype == torch.uint8:
return torch.randint(1, 5, size=size, dtype=dtype, device=device)
else:
return torch.randint(-5, 5, size=size, dtype=dtype, device=device)
a = rand_tensor((2, 2), dtype=dtype, device=device)
b = rand_tensor((2, 2), dtype=dtype, device=device)
if use_cpu_scalar:
c = rand_tensor([], device="cpu", dtype=dtype)
else:
c = rand_tensor((2, 2), dtype=dtype, device=device)
alpha = _number(0.5, 3, dtype)
actual = torch.addcmul(a, b, c, value=alpha)
expected = a + alpha * b * c
self.assertEqual(expected, actual)
with self.assertWarnsOnceRegex(
UserWarning, "This overload of addcmul is deprecated"):
self.assertEqual(actual, torch.addcmul(a, alpha, b, c))
if self.device_type == 'npu' and dtype == torch.half:
a = torch.tensor([60000.0], device=device, dtype=dtype)
b = torch.tensor([60000.0], device=device, dtype=dtype)
c = torch.tensor([2.0], device=device, dtype=dtype)
out = torch.addcmul(a, b, c, value=-1)
self.assertTrue(not (out.isnan() or out.isinf()))
@onlyPRIVATEUSE1
def test_addcmul_cuda_errors_with_cpu_scalars(self, device):
alpha = 0.5
a = torch.rand((2, 2), device=device)
b = torch.rand((2, 2), device=device)
c = torch.rand((2, 2), device=device)
scalar = torch.rand([], device="cpu")
with self.assertRaisesRegex(RuntimeError, r'CPU Scalar support for tensor1 argument'):
torch.addcmul(a, scalar, c, value=alpha)
with self.assertRaisesRegex(RuntimeError, r'CPU Scalar support for self argument'):
torch.addcmul(scalar, b, c, value=alpha)
def test_narrow_empty(self, device):
x = torch.randn(2, 3, 4, device=device)
for d in range(x.dim()):
y = x.narrow(d, x.size(d), 0)
sz = list(x.size())
sz[d] = 0
self.assertEqual(sz, y.size())
def test_narrow_copy_non_contiguous(self, device):
inp = torch.randn(10, 2, device=device).movedim(-1, 0)
expected = torch.narrow_copy(inp.contiguous(), 1, 0, 10)
actual = torch.narrow_copy(inp, 1, 0, 10)
self.assertEqual(expected, actual)
@dtypes(*(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16) if not device_is_910A
else all_types_and(torch.half)))
@slowTestIf(IS_WINDOWS)
def test_take(self, device, dtype):
idx_size = (4,)
make_arg = partial(make_tensor, device=device, dtype=dtype)
make_idx = partial(make_tensor, low=0, device=device, dtype=torch.int64)
def ref_take(src, idx):
if dtype == torch.bfloat16:
src = src.half()
src = src.cpu().numpy()
idx = idx.cpu().numpy()
out = torch.from_numpy(np.take(src, idx)).to(device=device, dtype=dtype)
return out
for src_contig, idx_contig, idx_reshape in product([True, False], repeat=3):
for src_size in ((5,), (4, 5)):
src = make_arg(src_size, noncontiguous=not src_contig)
idx = make_idx(idx_size, high=src.numel(), noncontiguous=not idx_contig)
if idx_reshape:
idx = idx.reshape(2, 2)
out = torch.take(src, idx)
out2 = ref_take(src, idx)
self.assertEqual(out, out2)
for size_s, size_i in product([(), (1,)], repeat=2):
source = make_arg(size_s)
idx = make_idx(size_i, high=1)
out = source.take(idx)
self.assertEqual(out.item(), source.item())
@dtypes(*(all_types_and_complex_and(torch.half, torch.bfloat16) if not device_is_910A
else all_types_and(torch.half)))
def test_put(self, device, dtype):
src_size = (4,)
make_arg = partial(make_tensor, device=device, dtype=dtype)
make_idx = partial(make_tensor, low=0, device=device, dtype=torch.int64)
def ref_put(dst, idx, src, accumulate):
new_dst = dst.clone(memory_format=torch.contiguous_format).view(-1)
new_idx = idx.contiguous().view(-1)
new_src = src.contiguous().view(-1)
method = new_dst.index_add_ if accumulate else new_dst.index_copy_
return method(0, new_idx, new_src).view_as(dst)
for dst_contig, src_contig, idx_contig, idx_reshape, accumulate in product([True, False], repeat=5):
for dst_size in ((5,), (4, 5)):
dst = make_arg(dst_size, noncontiguous=not dst_contig)
src = make_arg(src_size, noncontiguous=not src_contig)
if accumulate:
idx = make_idx(src_size, high=dst.numel())
else:
idx = torch.randperm(dst.numel(), dtype=torch.int64, device=device)[:src_size[0]]
if not idx_contig:
idx = torch.repeat_interleave(idx, 2, dim=-1)[..., ::2]
if idx_reshape:
idx = idx.reshape(2, 2)
out = torch.put(dst, idx, src, accumulate)
reference = ref_put(dst, idx, src, accumulate)
self.assertEqual(out, reference)
dst.put_(idx, src, accumulate)
self.assertEqual(dst, reference)
scalars = ((make_arg(size_t),
make_idx(size_i, high=1),
make_arg(size_s))
for size_t, size_i, size_s in product([(), (1,)], repeat=3))
for (dest, idx, source), accumulate in product(scalars, [True, False]):
dest_init = dest.clone()
out = torch.put(dest, idx, source, accumulate=accumulate)
dest1 = dest.clone()
dest1.put_(idx, source, accumulate=accumulate)
for d in [out, dest1]:
if accumulate:
self.assertEqual(d.item(), (dest_init + source).item())
else:
self.assertEqual(d.item(), source.item())
dest = make_arg((3, 2))
reference = dest.clone()
idx = make_idx((0,), high=1)
source = make_arg((0,))
for accumulate in [True, False]:
out = torch.put(dest, idx, source, accumulate=accumulate)
self.assertEqual(out, reference)
dest.put_(idx, source, accumulate=accumulate)
self.assertEqual(dest, reference)
@dtypes(*(all_types_and_complex_and(torch.half, torch.bfloat16) if not device_is_910A
else all_types_and(torch.half)))
def test_put_accumulate(self, device, dtype):
low_precision = dtype == torch.half or dtype == torch.bfloat16
sizes = ((100,)) if low_precision else ((200,), (3002,))
rtol, atol = (1e-1, 1e-2) if low_precision else (1e-3, 1e-4)
make_arg = partial(make_tensor, low=-2, high=3, device=device, dtype=dtype)
make_idx = partial(torch.zeros, device=device, dtype=torch.int64)
args = ((make_idx(size), make_arg(size)) for size in sizes)
for idx, source in args:
orig = make_arg((1,))
out = orig.put(idx, source, accumulate=True)
self.assertEqual(out, orig + source.sum(), rtol=rtol, atol=atol)
@skipIfMPS
def test_take_empty(self, device):
for input_shape in [(0,), (0, 1, 2, 0), (1, 2, 3)]:
for indices_shape in [(0,), (0, 1, 2, 0)]:
input_ = torch.empty(input_shape, device=device)
indices = torch.empty(indices_shape, dtype=torch.int64, device=device)
self.assertEqual(indices, torch.take(input_, indices), exact_dtype=False)
def test_put_empty(self, device):
for dst_shape in [(0,), (0, 1, 2, 0), (1, 2, 3)]:
for indices_shape in [(0,), (0, 1, 2, 0)]:
for accumulate in [False, True]:
dst = torch.randn(dst_shape, device=device)
indices = torch.empty(indices_shape, dtype=torch.int64, device=device)
src = torch.randn(indices_shape, device=device)
self.assertEqual(dst, dst.put_(indices, src, accumulate=accumulate))
def scatter_allow_reduce(self, device, dtype, reduceop):
device_type = torch.device(device).type
return device_type != 'npu' or (reduceop == 'multiply' and dtype.is_floating_point)
@dtypes(*floating_and_complex_types())
@dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
@dtypesIfPRIVATEUSE1(*(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16) if not device_is_910A
else all_types_and(torch.half, torch.bool)))
def test_scatter_reduce_operations_to_large_input(self, device, dtype):
index = torch.tensor([[1], [2]], device=device, dtype=torch.long)
test_data = [
(torch.zeros(4, 4, device=device, dtype=dtype),
torch.ones(2, 2, device=device, dtype=dtype),
torch.tensor([[0, 0, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
[0, 0, 0, 0]],
device=device, dtype=dtype), "add"),
(torch.tensor([2], device=device, dtype=dtype).repeat(4, 4),
torch.tensor([6], device=device, dtype=dtype).repeat(2, 2),
torch.tensor([[2, 2, 2, 2],
[12, 2, 2, 2],
[12, 2, 2, 2],
[2, 2, 2, 2]], device=device, dtype=dtype), "multiply"),
]
for input_, src, result, operation in test_data:
if not self.scatter_allow_reduce(device, dtype, operation):
continue
input_.scatter_(0, index, src, reduce=operation)
self.assertEqual(input_, result)
@dtypes(*floating_and_complex_types())
@dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
@dtypesIfPRIVATEUSE1(*(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16) if not device_is_910A
else all_types_and(torch.half, torch.bool)))
def test_scatter_reduce_scalar(self, device, dtype):
index = torch.tensor([[1], [2]], device=device, dtype=torch.long)
test_data = [
(torch.zeros(4, 4, device=device, dtype=dtype), 1,
torch.tensor([[0, 0, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
[0, 0, 0, 0]],
device=device, dtype=dtype), "add"),
(torch.tensor([2], device=device, dtype=dtype).repeat(4, 4), 2,
torch.tensor([[2, 2, 2, 2],
[4, 2, 2, 2],
[4, 2, 2, 2],
[2, 2, 2, 2]], device=device, dtype=dtype), "multiply"),
]
for input_, src, result, operation in test_data:
if not self.scatter_allow_reduce(device, dtype, operation):
continue
input_.scatter_(0, index, src, reduce=operation)
self.assertEqual(input_, result)
def test_scatter_add_non_unique_index(self, device):
height = 2
width = 65536
input_ = torch.ones(height, width, device=device)
index = torch.zeros(height, width, dtype=torch.long, device=device)
src = torch.ones(height, width, device=device)
input_.scatter_add_(0, index, src)
self.assertEqual(input_,
torch.tensor([[3], [1]], device=device,
dtype=torch.float32).repeat(1, width))
@dtypes(*floating_and_complex_types())
@dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
@dtypesIfPRIVATEUSE1(*(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16) if not device_is_910A
else all_types_and(torch.half, torch.bool)))
def test_scatter_reduce_non_unique_index(self, device, dtype):
height = 2
width = 2
index = torch.zeros(height, width, dtype=torch.long, device=device)
test_data = [
(torch.ones(height, width, device=device, dtype=dtype),
torch.ones(height, width, device=device, dtype=dtype),
torch.tensor([[3], [1]], device=device, dtype=dtype).repeat(1, width), "add"),
(torch.tensor([2], device=device, dtype=dtype).repeat(height, width),
torch.tensor([2], device=device, dtype=dtype).repeat(height, width),
torch.tensor([[8], [2]], device=device,
dtype=dtype).repeat(1, width), "multiply"),
]
for input_, src, result, operation in test_data:
if not self.scatter_allow_reduce(device, dtype, operation):
continue
input_.scatter_(0, index, src, reduce=operation)
self.assertEqual(input_, result, msg=f"result: {result} input: {input_} method: {str(operation)}")
@onlyPRIVATEUSE1
@dtypes(*complex_types())
def test_scatter_reduce_multiply_unsupported_dtypes(self, device, dtype):
height = 2
width = 2
index = torch.zeros(height, width, dtype=torch.long, device=device)
input_ = torch.ones(height, width, device=device, dtype=dtype)
src = torch.ones(height, width, device=device, dtype=dtype)
with self.assertRaises(RuntimeError):
input_.scatter_(0, index, src, reduce="multiply")
def test_scatter_to_large_input(self, device):
input_ = torch.zeros(4, 4, device=device)
src = torch.ones(2, 2, device=device)
index = torch.tensor([[1], [2]], device=device, dtype=torch.long)
input_.scatter_(0, index, src)
self.assertEqual(input_, torch.tensor([[0, 0, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
[0, 0, 0, 0]], device=device, dtype=torch.float32))
def test_scatter_add_to_large_input(self, device):
input_ = torch.zeros(4, 4, device=device)
src = torch.ones(2, 2, device=device)
index = torch.tensor([[1], [2]], device=device, dtype=torch.long)
input_.scatter_add_(0, index, src)
self.assertEqual(input_, torch.tensor([[0, 0, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
[0, 0, 0, 0]], device=device, dtype=torch.float32))
def test_scatter_bool(self, device):
x = torch.tensor([[True, True, True], [True, True, True]], device=device)
res = torch.zeros(3, 3, dtype=torch.bool, device=device)
res = res.scatter_(0, torch.tensor([[0, 1, 2], [0, 1, 2]], device=device), x)
self.assertEqual(res, torch.tensor([[True, False, False],
[False, True, False],
[False, False, True]], device=device))
def test_scatter_add_bool(self, device):
x = torch.tensor([[True, True, True, True, True], [True, True, True, True, True]], device=device)
res = torch.zeros(3, 5, dtype=torch.bool, device=device)
res = res.scatter_add_(0, torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]], device=device), x)
self.assertEqual(res, torch.tensor([[True, True, True, True, True],
[False, True, False, True, False],
[True, False, True, False, True]], device=device))
@onlyNativeDeviceTypes
@dtypes(*(all_types_and_complex_and(torch.half, torch.bfloat16) if not device_is_910A
else all_types_and(torch.half)))
def test_masked_scatter(self, device, dtype):
dt = dtype
num_copy, num_dest = 3, 10
dest = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dt, device=device)
dest2 = dest.clone()
dest_ones = dest.clone()
dest_ones_expected = dest.clone()
src = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=dt, device=device)
src_ones = torch.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=dt, device=device)
mask = torch.tensor((0, 0, 0, 0, 1, 0, 1, 0, 1, 0), dtype=torch.bool, device=device)
dest.masked_scatter_(mask, src)
j = 0
for i in range(num_dest):
if mask[i]:
dest2[i] = src[j]
dest_ones_expected[i] = src_ones[j]
j += 1
self.assertEqual(dest, dest2, atol=0, rtol=0)
dest_ones.masked_scatter_(mask, src_ones)
self.assertEqual(dest_ones, dest_ones_expected, atol=0, rtol=0)
if self.device_type != 'npu':
src = torch.zeros(num_copy - 1, dtype=dt, device=device)
with self.assertRaises(RuntimeError):
dest.masked_scatter_(mask, src)
dest = torch.empty((5, 0, 5), dtype=dt, device=device)
mask = torch.ones_like(dest, dtype=torch.bool, device=device)
src = torch.empty((0,), dtype=dt, device=device)
dest.masked_scatter_(mask, src)
dest = torch.empty((5, 0, 5), dtype=dt, device=device)
mask = torch.ones((5, 1, 5), dtype=torch.bool, device=device)
src = torch.empty((0,), dtype=dt, device=device)
dest.masked_scatter_(mask, src)
@skipIfMPS
def test_masked_scatter_bool_tensor(self, device):
src = torch.tensor([True, True, True], device=device)
dst = torch.tensor([False, False, False], device=device)
mask = torch.tensor([False, True, False], device=device)
dst.masked_scatter_(mask, src)
self.assertEqual(dst, torch.tensor([False, True, False], device=device))
mask = torch.tensor([True, False, True], device=device)
dst = dst.masked_scatter(mask, src)
self.assertEqual(dst, torch.tensor([True, True, True], device=device))
@onlyPRIVATEUSE1
@largeTensorTest('30GB')
def test_masked_scatter_large_tensor(self, device):
t_cpu = torch.empty(2**31 + 1, dtype=torch.bool).random_()
t = t_cpu.to(device)
result_cpu = t_cpu.masked_scatter(t_cpu, t_cpu)
result = t.masked_scatter(t, t)
self.assertEqual(result, result_cpu)
@dtypes(*(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16) if not device_is_910A
else all_types_and(torch.half, torch.bool)))
def test_masked_select(self, device, dtype):
for maskType in integral_types_and(torch.bool):
num_src = 10
src = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=dtype, device=device)
mask = torch.randint(2, (num_src,), device=device, dtype=maskType)
if maskType is not torch.bool:
with self.assertRaisesRegex(RuntimeError, r'expected BoolTensor for mask'):
dst = src.masked_select(mask)
continue
else:
dst = src.masked_select(mask)
dst2 = []
for i in range(num_src):
if mask[i]:
dst2 += [src[i]]
self.assertEqual(dst, torch.tensor(dst2), atol=0, rtol=0)
dst3 = torch.empty(0, device=device, dtype=dtype)
torch.masked_select(src, mask, out=dst3)
self.assertEqual(dst3, torch.tensor(dst2, dtype=dst3.dtype), atol=0, rtol=0)
if dtype == torch.half and torch.device(device).type == 'cpu':
return
a = torch.rand(100, 100, device=device).mul(100).to(dtype)
mask_first_el_each_row = torch.zeros(100, device=device, dtype=torch.bool)
mask_first_el_each_row[0] = True
a_masked = a.masked_select(mask_first_el_each_row)
self.assertEqual(a_masked, a[:, 0])
mask_first_row = torch.zeros(100, 1, device=device, dtype=torch.bool)
mask_first_row[0][0] = True
a_masked = a.masked_select(mask_first_row)
self.assertEqual(a_masked, a[0, :])
a = torch.rand(100, device=device).mul(100).to(dtype)
mask_copy_3_times = torch.tensor([[True], [True], [False], [True]], device=device)
a_masked = a.masked_select(mask_copy_3_times)
self.assertEqual(a_masked, a.unsqueeze(0).expand(3, 100).flatten())
def test_masked_select_discontiguous(self, device):
for size in (10, 200):
vals = torch.rand(size, size, device=device)
mask = torch.full((size, size), False, dtype=torch.bool, device=device)
mask[:, ::2] = True
vals_list = (vals, vals.t())
mask_list = (mask, mask.t())
out_dc = torch.empty(size * size, device=device)[::2]
for v, m in product(vals_list, mask_list):
if m.is_contiguous():
expected = v[:, ::2].clone().reshape((-1, ))
else:
expected = v[::2].clone().reshape((-1, ))
out = torch.masked_select(v, m)
self.assertEqual(out, expected, atol=0, rtol=0)
torch.masked_select(v, m, out=out_dc)
self.assertEqual(out_dc, expected, atol=0, rtol=0)
@dtypes(*product((all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16) if not device_is_910A
else all_types_and(torch.half, torch.bool)), (torch.uint8, torch.bool)))
def test_masked_fill(self, device, dtypes):
dtype = dtypes[0]
mask_dtype = dtypes[1]
num_dest = 10
dst = torch.zeros(num_dest, dtype=dtype)
mask = torch.randint(2, (num_dest,), dtype=mask_dtype)
val = random.random()
dst2 = dst.clone()
if mask_dtype is not torch.bool:
with self.assertRaisesRegex(RuntimeError, 'only supports boolean masks'):
dst.masked_fill_(mask, val)
return
dst.masked_fill_(mask, val)
for i in range(num_dest):
if mask[i]:
dst2[i] = val
self.assertEqual(dst, dst2, atol=0, rtol=0)
dst = ((torch.randn(num_dest, num_dest, num_dest) * 10).to(dtype)).permute((2, 0, 1))
dst2 = dst.contiguous()
if dtype.is_complex:
mask = dst.abs() > 0
else:
mask = dst > 0
self.assertTrue(not dst.is_contiguous())
self.assertTrue(dst2.is_contiguous())
dst.masked_fill_(mask.to(mask_dtype), val)
dst2.masked_fill_(mask.to(mask_dtype), val)
self.assertEqual(dst, dst2, atol=0, rtol=0)
def test_masked_fill_bool_tensor(self, device):
dst = torch.tensor([True, False, True], device=device)
mask = torch.tensor([False, True, False], device=device)
dst.masked_fill_(mask, True)
self.assertEqual(dst, torch.tensor([True, True, True], device=device))
dst = dst.masked_fill(mask, False)
self.assertEqual(dst, torch.tensor([True, False, True], device=device))
def test_tensor_shape_empty(self, device):
x = torch.randn((0, 1, 3, 0), device=device)
self.assertEqual((0,), torch.flatten(x, 0, 3).shape)
self.assertEqual((0, 0), torch.flatten(x, 0, 2).shape)
self.assertEqual((0, 3, 0), torch.flatten(x, 1, 2).shape)
self.assertEqual((0, 1, 1, 3, 0), torch.unsqueeze(x, 1).shape)
self.assertEqual((0, 3, 0), torch.squeeze(x, 1).shape)
self.assertEqual((0, 3, 0), torch.squeeze(x).shape)
self.assertEqual((0, 0, 3, 1), torch.transpose(x, 1, 3).shape)
y = torch.randn((5, 0), device=device)
self.assertEqual((0, 5), y.t().shape)
self.assertEqual((0, 1, 0), torch.select(x, 2, 2).shape)
self.assertEqual((9, 0, 5, 6, 0), x.repeat(9, 7, 5, 2, 3).shape)
self.assertEqual((3, 0, 0, 1), x.permute(2, 3, 0, 1).shape)
self.assertEqual((0,), torch.diagonal(torch.randn((5, 0), device=device)).shape)
self.assertEqual((0,), torch.diagonal(torch.randn((0, 5), device=device)).shape)
self.assertEqual((0,), torch.diagonal(torch.randn((5, 0), device=device), offset=1).shape)
self.assertEqual((0,), torch.diagonal(torch.randn((0, 5), device=device), offset=1).shape)
self.assertEqual((5, 6, 0), torch.diagonal(torch.randn((3, 4, 5, 6), device=device), offset=45252).shape)
self.assertEqual((5, 6, 0), torch.diagonal(torch.randn((3, 4, 5, 6), device=device), offset=-45252).shape)
self.assertEqual((0, 0), torch.diagflat(torch.tensor([], device=device)).shape)
self.assertEqual(torch.zeros(1, 1), torch.diagflat(torch.tensor([], device=device), offset=1))
self.assertEqual((0, 0), torch.diagflat(torch.tensor([[]], device=device)).shape)
self.assertEqual(torch.zeros(1, 1), torch.diagflat(torch.tensor([[]], device=device), offset=1))
self.assertEqual((4, 0, 1, 3, 0), torch.stack((x, x, x, x)).shape)
self.assertEqual([(0, 1, 3, 0)],
[z.shape for z in torch.chunk(x, 1, dim=0)])
self.assertEqual([(0, 1, 3, 0), ] * 3, [z.shape for z in torch.chunk(x, 3, dim=0)])
self.assertEqual([(0, 1, 1, 0), ] * 3, [z.shape for z in torch.chunk(x, 3, dim=2)])
self.assertEqual([(0, 1, 0, 0), (0, 1, 1, 0), (0, 1, 2, 0)],
[z.shape for z in torch.split(x, (0, 1, 2), dim=2)])
self.assertRaises(RuntimeError, lambda: torch.split(x, 0, dim=1))
self.assertEqual([(0, 1, 3, 0)], [z.shape for z in torch.split(x, 1, dim=0)])
self.assertEqual([(0, 1, 3, 0)], [z.shape for z in torch.split(x, 0, dim=0)])
def test_dim_function_empty(self, device):
shape = (0, 1, 2, 0)
x = torch.randn(shape, device=device)
self.assertEqual(0, x.size(3))
self.assertEqual(2, x.size(2))
self.assertEqual(2, x.stride(0))
self.assertEqual(1, x.stride(2))
self.assertEqual(x, torch.nn.functional.glu(x, 0))
self.assertEqual((0, 1, 1, 0), torch.nn.functional.glu(x, 2).shape)
self.assertEqual(x, torch.nn.functional.softmax(x, 0))
self.assertEqual(x, torch.nn.functional.softmax(x, 2))
self.assertEqual(x, torch.nn.functional.softmax(x, 3))
self.assertEqual(x, torch.nn.functional.log_softmax(x, 0))
self.assertEqual(x, torch.nn.functional.log_softmax(x, 2))
self.assertEqual(x, torch.nn.functional.log_softmax(x, 3))
self.assertEqual(shape, torch.cumsum(x, 0).shape)
self.assertEqual(shape, torch.cumsum(x, 2).shape)
self.assertEqual(shape, torch.cumprod(x, 0).shape)
self.assertEqual(shape, torch.cumprod(x, 2).shape)
self.assertEqual(shape, torch.cummax(x, 0)[0].shape)
self.assertEqual(shape, torch.cummax(x, 2)[0].shape)
self.assertEqual(shape, torch.cummin(x, 0)[0].shape)
self.assertEqual(shape, torch.cummin(x, 2)[0].shape)
self.assertEqual(shape, torch.logcumsumexp(x, 0).shape)
self.assertEqual(shape, torch.logcumsumexp(x, 2).shape)
self.assertEqual(x, x.flip(0))
self.assertEqual(x, x.flip(2))
self.assertEqual(x, x.roll(0, 1).roll(0, -1))
self.assertEqual(x, x.roll(1, x.size(1)))
self.assertEqual(x, x.roll(1))
self.assertEqual(x, x.roll((1, 1), (3, 1)))
self.assertEqual((), x.unbind(0))
self.assertEqual((torch.empty((0, 1, 0), device=device), torch.empty((0, 1, 0), device=device)),
x.unbind(2))
y = torch.randn((0, 1, 3, 0), device=device)
self.assertEqual(y.shape, torch.cross(y, y).shape)
self.assertEqual(shape, torch.renorm(x, 1, 0, 5).shape)
self.assertEqual(shape, torch.renorm(x, 1, 2, 5).shape)
self.assertEqual([shape, shape], [z.shape for z in torch.sort(x, dim=0)])
self.assertEqual([shape, shape], [z.shape for z in torch.sort(x, dim=2)])
self.assertEqual([shape, shape], [z.shape for z in torch.topk(x, 0, dim=0)])
self.assertEqual([(0, 1, 1, 0), (0, 1, 1, 0)], [z.shape for z in torch.topk(x, 1, dim=2)])
y = torch.randn((2, 3, 4), device=device)
self.assertEqual([(2, 3, 0), (2, 3, 0)], [z.shape for z in torch.topk(y, 0)])
self.assertEqual(shape, torch.gather(x, 0, torch.empty(shape, dtype=torch.int64, device=device)).shape)
self.assertEqual(shape, torch.gather(x, 2, torch.empty(shape, dtype=torch.int64, device=device)).shape)
larger_shape = torch.empty((0, 1, 3, 0), dtype=torch.int64, device=device)
self.assertEqual(larger_shape.shape, torch.gather(x, 2, larger_shape).shape)
smaller_shape = torch.empty((0, 1, 0, 0), dtype=torch.int64, device=device)
self.assertEqual(smaller_shape.shape, torch.gather(x, 2, smaller_shape).shape)
y = torch.randn((2, 3, 4), device=device)
self.assertEqual((0, 3, 4),
torch.gather(y, 0, torch.empty((0, 3, 4), dtype=torch.int64, device=device)).shape)
for dim in [0, 2]:
y = torch.randn(shape, device=device)
y_src = torch.randn(shape, device=device)
ind = torch.empty(shape, dtype=torch.int64, device=device)
self.assertEqual(shape, y.scatter_(dim, ind, y_src).shape)
self.assertEqual(shape, y.scatter_add_(dim, ind, y_src).shape)
z = torch.randn((2, 3, 4), device=device)
z_src = torch.randn((2, 3, 4), device=device)
self.assertEqual(z, z.scatter_(2, torch.empty((2, 3, 0), dtype=torch.int64, device=device), z_src))
self.assertEqual(z, z.scatter_add_(2, torch.empty((2, 3, 0), dtype=torch.int64, device=device), z_src))
c = x.clone()
c_clone = c.clone()
ind_empty = torch.tensor([], dtype=torch.int64, device=device)
ind_01 = torch.tensor([0, 1], dtype=torch.int64, device=device)
self.assertEqual(c_clone, c.index_fill_(0, ind_empty, -1))
self.assertEqual(c_clone, c.index_fill_(2, ind_empty, -1))
self.assertEqual(c_clone, c.index_fill_(2, ind_01, -1))
self.assertEqual(c_clone, c.index_copy_(0, ind_empty, torch.empty((0, 1, 2, 0), device=device)))
self.assertEqual(c_clone, c.index_copy_(2, ind_empty, torch.empty((0, 1, 0, 0), device=device)))
self.assertEqual(c_clone, c.index_copy_(2, ind_01, torch.empty((0, 1, 2, 0), device=device)))
self.assertEqual(c_clone, c.index_add_(0, ind_empty, torch.empty((0, 1, 2, 0), device=device)))
self.assertEqual(c_clone, c.index_add_(2, ind_empty, torch.empty((0, 1, 0, 0), device=device)))
self.assertEqual(c_clone, c.index_add_(2, ind_01, torch.empty((0, 1, 2, 0), device=device)))
c = torch.randn((0, 1, 2), device=device)
c_clone = c.clone()
self.assertEqual(c_clone, c.index_fill_(0, ind_empty, -1))
self.assertEqual(c_clone, c.index_copy_(0, ind_empty, torch.empty((0, 1, 2), device=device)))
self.assertEqual(c_clone, c.index_add_(0, ind_empty, torch.empty((0, 1, 2), device=device)))
self.assertEqual(c_clone, c.index_fill_(0, ind_empty, -1))
self.assertEqual(c_clone, c.index_copy_(0, ind_empty, torch.empty((0, 1, 2), device=device)))
self.assertEqual(c_clone, c.index_add_(0, ind_empty, torch.empty((0, 1, 2), device=device)))
z = torch.randn((2, 3, 4), device=device)
self.assertEqual(z, z.index_fill_(0, ind_empty, -1))
z = torch.randn((2, 3, 4), device=device)
self.assertEqual(z, z.index_copy_(0, ind_empty, torch.empty((0, 3, 4), device=device)))
z = torch.randn((2, 3, 4), device=device)
self.assertEqual(z, z.index_add_(0, ind_empty, torch.empty((0, 3, 4), device=device)))
self.assertEqual(x, x.index_select(0, ind_empty))
self.assertEqual((0, 1, 0, 0), x.index_select(2, ind_empty).shape)
self.assertEqual(x, x.index_select(2, ind_01))
z = torch.randn((2, 3, 4), device=device)
self.assertEqual((0, 3, 4), z.index_select(0, ind_empty).shape)
c = torch.randn((0, 1, 2), device=device)
self.assertEqual(c, c.index_select(0, ind_empty))
c = torch.randn((0, 1, 2), device=device)
self.assertEqual(c, c.index_select(0, ind_empty))
w = torch.randn((0, 3), device=device)
self.assertEqual((0, 2), w.index_select(1, ind_01).shape)
w = torch.randn((3, 0), device=device)
self.assertEqual((2, 0), w.index_select(0, ind_01).shape)
ind_01_int32 = torch.tensor([0, 1], dtype=torch.int32, device=device)
self.assertEqual((2, 0), w.index_select(0, ind_01_int32).shape)
s = torch.randn([], device=device)
ind_0 = torch.tensor([0], dtype=torch.int32, device=device)
self.assertEqual([], s.index_select(0, ind_0).shape)
if device == 'cpu':
w = torch.randn((0, 3), device=device)
with self.assertRaisesRegex(RuntimeError, "self indexing axis dim should be positive"):
torch.index_select(w, 0, ind_01)
ind_05 = torch.tensor([0, 5], dtype=torch.int64, device=device)
with self.assertRaisesRegex(RuntimeError, "INDICES element is out of DATA bounds"):
torch.index_select(w, 1, ind_05)
with self.assertRaisesRegex(RuntimeError, "Index to scalar can have only 1 value"):
torch.index_select(s, 0, ind_empty)
with self.assertRaisesRegex(RuntimeError, "Index to scalar can have only 1 value"):
torch.ones([]).index_select(0, torch.Tensor([0, 0]).int())
@unittest.skipIf(IS_FBCODE and IS_REMOTE_GPU, "sandcastle OOM with current tpx gpu/re configuration")
@skipIfRocm
@onlyPRIVATEUSE1
@largeTensorTest('32GB', device='cpu')
@largeTensorTest('5GB', device='npu')
def test_pdist_norm_large(self, device):
x = torch.randn(50000, 1, dtype=torch.float32)
expected_cpu = torch.pdist(x, p=2)
actual_cpu = torch.pdist(x.to(device), p=2).cpu()
self.assertTrue(torch.allclose(expected_cpu, actual_cpu))
@onlyNativeDeviceTypes
@dtypesIfPRIVATEUSE1(*set(get_all_math_dtypes('npu')))
@dtypes(*set(get_all_math_dtypes('cpu')))
def test_addcdiv(self, device, dtype):
def _number(floating, integer, dtype):
if dtype in [torch.half, torch.float, torch.double, torch.bfloat16]:
return floating
elif dtype in [torch.cfloat, torch.cdouble]:
return floating * (1 + 1j)
else:
return integer
def non_zero_rand(size, dtype, device):
if dtype.is_floating_point or dtype.is_complex:
a = torch.rand(size=size, dtype=dtype, device=device)
elif dtype == torch.uint8:
a = torch.randint(1, 5, size=size, dtype=dtype, device=device)
else:
a = torch.randint(-5, 5, size=size, dtype=dtype, device=device)
return a + (a == 0).to(dtype)
def _test_addcdiv():
a = non_zero_rand((2, 2), dtype=dtype, device=device)
b = non_zero_rand((2, 2), dtype=dtype, device=device)
c = non_zero_rand((2, 2), dtype=dtype, device=device)
alpha = _number(0.5, 3, dtype)
expected = a + (alpha * b) / c
actual = torch.addcdiv(a, b, c, value=alpha)
self.assertEqual(expected, actual)
with self.assertWarnsOnceRegex(
UserWarning, "This overload of addcdiv is deprecated"):
self.assertEqual(actual, torch.addcdiv(a, alpha, b, c))
if not (dtype.is_floating_point or dtype.is_complex):
with self.assertRaises(RuntimeError):
_test_addcdiv()
else:
_test_addcdiv()
if self.device_type == 'npu' and dtype == torch.half:
a = torch.tensor([60000.0], device=device, dtype=dtype)
b = torch.tensor([60000.0], device=device, dtype=dtype)
c = torch.tensor([1.0], device=device, dtype=dtype)
out = torch.addcmul(a, b, c, value=-2)
self.assertTrue(not (out.isnan() or out.isinf()))
def test_nullary_op_mem_overlap(self, device):
ops = (
("random_", ()),
("uniform_", ()),
("cauchy_", ()),
("log_normal_", ()),
("exponential_", ()),
("geometric_", (0.5,)),
("normal_", ()),
)
x = torch.rand((1, 3)).expand((3, 3))
for op, args in ops:
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
getattr(x, op)(*args)
@xfailIfTorchDynamo
@skipIfTorchInductor("pytorch/issues/126474")
@dtypes(torch.double)
def test_ternary_op_mem_overlap(self, device, dtype):
if device == "cpu" and TEST_WITH_TORCHINDUCTOR:
self.skipTest("Failing on cpu")
ops = [
("addcmul", True, True, 'cpu'),
("addcmul", True, True, 'npu'),
("addcdiv", True, True, 'cpu'),
("addcdiv", True, True, 'npu'),
("lerp", True, True, 'cpu'),
("lerp", True, True, 'npu')
]
for (fn, has_input_output_mem_overlap_check,
has_internal_mem_overlap_check, dev) in ops:
if dev != device:
continue
out_op = getattr(torch, fn)
inplace_op = getattr(torch.Tensor, fn + '_')
self.check_internal_mem_overlap(
inplace_op, 3, dtype, device,
expected_failure=not has_internal_mem_overlap_check)
self.ternary_check_input_output_mem_overlap(out_op, dev,
expected_failure=not has_input_output_mem_overlap_check)
@expectedFailureMeta
@dtypes(torch.double)
@onlyNativeDeviceTypes
def test_copy_mem_overlap(self, device, dtype):
self.check_internal_mem_overlap(
torch.Tensor.copy_, num_inputs=2, dtype=dtype, device=device)
sz = 9
doubles = torch.randn(2 * sz, dtype=dtype, device=device)
self.unary_check_input_output_mem_overlap(
doubles, sz, lambda input_, out: out.copy_(input_))
@onlyNativeDeviceTypes
def test_index_add_mem_overlap(self, device):
x = torch.rand((1,), device=device).expand((6,))
y = torch.rand((6,), device=device)
ind = torch.tensor([2, 1, 0], device=device)
value = torch.rand((3,), device=device)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
x.index_add_(0, ind, value)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
y.index_add_(0, ind, y[:3])
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.index_add_(0, ind, ind.clone())
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.index_add_(0, ind.clone(), ind)
@onlyPRIVATEUSE1
@skipCUDAIfNotRocm
def test_index_add_large_inputs(self, device):
D = 6144
x = torch.zeros([16384, D], device=device, dtype=torch.bfloat16)
index = torch.randint(0, 16384, (1, 32, 16384), device=device, dtype=torch.int64)
output = torch.ones([1, 32, 16384, D], device=device, dtype=torch.bfloat16)
x_before = x.clone()
for batch in range(output.shape[1]):
for idx in range(output.shape[2]):
idx_val = index[0, batch, idx].item()
x_before[idx_val] += output[0, batch, idx]
x.index_add_(0, index.view(-1), output.view(-1, D))
self.assertEqual(x_before, x)
@onlyNativeDeviceTypes
def test_index_copy_mem_overlap(self, device):
x = torch.rand((1,), device=device).expand((6,))
y = torch.rand((6,), device=device)
ind = torch.tensor([2, 1, 0], device=device)
value = torch.rand((3,), device=device)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
x.index_copy_(0, ind, value)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
y.index_copy_(0, ind, y[:3])
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.index_copy_(0, ind, ind.clone())
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.index_copy_(0, ind.clone(), ind)
@expectedFailureMeta
@onlyNativeDeviceTypes
def test_index_fill_mem_overlap(self, device):
x = torch.rand((1,), device=device).expand((6,))
ind = torch.tensor([2, 1, 0], device=device)
with self.assertWarnsRegex(UserWarning, "index_fill_ on expanded tensors"):
x.index_fill_(0, ind, 1.0)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.index_fill_(0, ind, 0)
@expectedFailureMeta
@onlyNativeDeviceTypes
def test_shift_mem_overlap(self, device):
x = torch.rand(3, device=device)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
x[:-1] <<= x[1:]
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
x[:-1] >>= x[1:]
@expectedFailureMeta
@onlyNativeDeviceTypes
def test_bernoulli_mem_overlap(self, device):
x = torch.rand((1,), device=device).expand((6,))
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
x.bernoulli_()
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
x.bernoulli_(p=0.1)
p = torch.rand(6, device=device)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
x.bernoulli_(p=p)
@expectedFailureMeta
@onlyNativeDeviceTypes
def test_put_mem_overlap(self, device):
x = torch.rand((1,), device=device).expand((6,))
y = torch.rand((6,), device=device)
ind = torch.tensor([2, 1, 0], device=device)
value = torch.rand((3,), device=device)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
x.put_(ind, value)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
y.put_(ind[0], y[0])
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.put_(ind, ind)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
y.put_(ind, y[:3])
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.put_(ind, ind.clone())
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.put_(ind.clone(), ind)
@expectedFailureMeta
@onlyNativeDeviceTypes
def test_index_put_mem_overlap(self, device):
x = torch.rand((1,), device=device).expand((6,))
y = torch.rand((6,), device=device)
ind = torch.tensor([2, 1, 0], device=device)
value = torch.rand((3,), device=device)
with self.assertWarnsRegex(UserWarning, 'expanded tensors'):
x.index_put_((ind,), value)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
y.index_put_((ind,), y[0])
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.index_put_((ind,), ind)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
y.index_put_((ind,), y[:3])
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.index_put_((ind,), ind.clone())
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.index_put_((ind.clone(),), ind)
@expectedFailureMeta
@onlyNativeDeviceTypes
def test_masked_fill_mem_overlap(self, device):
x = torch.rand((1,), device=device).expand((6,))
mask = torch.tensor([True, False, True, True, False, False], device=device)
with self.assertWarnsRegex(UserWarning, 'expanded tensors'):
x.masked_fill_(mask, 0.)
fill_val = torch.tensor(0., device=device)
with self.assertWarnsRegex(UserWarning, 'expanded tensors'):
x.masked_fill_(mask, fill_val)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
mask[1:].masked_fill_(mask[:-1], False)
@expectedFailureMeta
@onlyNativeDeviceTypes
def test_masked_scatter_mem_overlap(self, device):
x = torch.rand((1,), device=device).expand((6,))
src = torch.rand((3,), device=device)
mask = torch.tensor([True, False, True, True, False, False], device=device)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
x.masked_scatter_(mask, src)
@onlyNativeDeviceTypes
def test_scatter_mem_overlap(self, device):
x = torch.rand((1,), device=device).expand((6,))
src = torch.rand((3,), device=device)
ind = torch.tensor([2, 1, 0], device=device, dtype=torch.int64)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
x.scatter_(0, ind, src)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
src.scatter_(0, ind, src)
with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
ind.scatter_(0, ind, ind.clone())
@onlyPRIVATEUSE1
def test_multinomial_device_constrain(self, device):
x = torch.empty(3, device="cpu")
y = torch.empty(3, device=device)
self.assertRaisesRegex(
RuntimeError, "Expected all tensors to be on the same device",
lambda: torch.multinomial(x, 2, out=y))
@deviceCountAtLeast(2)
@onlyPRIVATEUSE1
@skipIfTorchInductor("FIXME: error not thrown")
def test_multinomial_NPU_device_constrain(self, devices):
x = torch.empty(3, device=devices[0])
y = torch.empty(3, device=devices[1], dtype=torch.long)
self.assertRaisesRegex(
RuntimeError, "Expected all tensors to be on the same device",
lambda: torch.multinomial(x, 2, out=y))
@deviceCountAtLeast(2)
@onlyPRIVATEUSE1
def test_device_guard(self, devices):
x = torch.randn((1, 2, 3), device=devices[1])
y = torch.zeros((1, 3, 2), device=devices[1])
scalar = torch.tensor(5, device=devices[1])
torch.cudnn_is_acceptable(x)
x.is_distributed()
x.is_floating_point()
x.is_complex()
x.is_same_size(y)
x.is_signed()
x.size(0)
x.stride(0)
x.numel()
x.is_set_to(y)
x.data_ptr()
scalar.is_nonzero()
y[0][1] = 5
y_sparse = y.to_sparse()
y_sparse.sparse_dim()
y_sparse._dimI()
y_sparse.dense_dim()
y_sparse._dimV()
y_sparse._nnz()
y_sparse.is_coalesced()
y_sparse._indices()
y_sparse._values()
y_sparse.indices()
y_sparse.values()
def inplace():
return torch.randn((1, 2, 3), device=devices[1])
inplace().as_strided_(y.size(), y.stride())
inplace().resize_(y.size())
inplace().squeeze_()
inplace().squeeze_(0)
inplace().unsqueeze_(2)
inplace().transpose_(1, 2)
inplace().squeeze_().t_()
inplace().set_(x.storage())
inplace().set_(x.storage(), x.storage_offset(), x.size(), x.stride())
inplace().set_(x)
inplace().set_()
y_sparse._coalesced_(True)
x.as_strided(y.size(), y.stride())
x.expand((5, 2, 3))
x.expand_as(x)
x.sum_to_size((1,))
torch.broadcast_tensors(x, x)
x.reshape((1, 3, 2))
x.reshape_as(y)
x.squeeze()
x.squeeze(0)
x.squeeze().t()
x.transpose(1, 2)
x.unsqueeze(2)
x.view((1, 3, 2))
x.view_as(y)
x.chunk(2, dim=1)
x.split(1, dim=2)
x.split_with_sizes([1, 2], dim=2)
x.unfold(dimension=2, size=1, step=1)
x.narrow(1, 1, 1)
x.select(1, 1)
torch.isnan(x)
torch.empty((1, 3, 2), out=y)
torch.empty_like(x)
torch.empty_like(x, dtype=torch.int64)
x.to(x)
x.to(y)
x.to(x, copy=True)
def test_is_signed(self, device):
self.assertEqual(torch.IntTensor(5).to(device).is_signed(), True)
self.assertEqual(torch.ByteTensor(5).to(device).is_signed(), False)
self.assertEqual(torch.CharTensor(5).to(device).is_signed(), True)
self.assertEqual(torch.FloatTensor(5).to(device).is_signed(), True)
self.assertEqual(torch.HalfTensor(10).to(device).is_signed(), True)
def test_tensor_type(self):
for t in torch._tensor_classes:
if 'npu' in t.__module__:
self.assertEqual(t.is_npu, True)
else:
self.assertEqual(t.is_npu, False)
if 'xpu' in t.__module__:
self.assertEqual(t.is_xpu, True)
else:
self.assertEqual(t.is_xpu, False)
@deviceCountAtLeast(2)
@skipCUDAMemoryLeakCheckIf(True)
@onlyPRIVATEUSE1
def test_tensor_set_errors_multigpu(self, devices):
f_npu0 = torch.randn((2, 3), dtype=torch.float32, device=devices[0])
f_npu1 = torch.randn((2, 3), dtype=torch.float32, device=devices[1])
self.assertRaises(RuntimeError, lambda: f_npu0.set_(f_npu1.storage()))
self.assertRaises(RuntimeError,
lambda: f_npu0.set_(f_npu1.storage(), 0, f_npu1.size(), f_npu1.stride()))
self.assertRaises(RuntimeError, lambda: f_npu0.set_(f_npu1))
@onlyPRIVATEUSE1
@deviceCountAtLeast(1)
def test_serialization(self, devices):
def _test_serialization(filecontext_lambda):
t0 = torch_npu.npu.FloatTensor(5).fill_(1)
with torch_npu.npu.device(devices[-1]):
tn = torch_npu.npu.FloatTensor(3).fill_(2)
torch_npu.npu.set_device(devices[0])
b = (t0, tn)
with filecontext_lambda() as f:
torch.save(b, f)
f.seek(0)
c = torch.load(f)
self.assertEqual(b, c, atol=0, rtol=0)
u0, un = c
self.assertEqual(str(u0.device), devices[0])
self.assertEqual(str(un.device), devices[-1])
_test_serialization(tempfile.NamedTemporaryFile)
_test_serialization(BytesIOContext)
def test_memory_format_preserved_after_permute(self, device):
x = torch.randn(4, 3, 8, 8, device=device)
nhwc = x.contiguous(memory_format=torch.channels_last)
y = nhwc.permute(0, 1, 3, 2).permute(0, 1, 3, 2)
self.assertTrue(y.is_contiguous(memory_format=torch.channels_last))
x = torch.randn(4, 3, 8, 8, 8, device=device)
ndhwc = x.contiguous(memory_format=torch.channels_last_3d)
y = ndhwc.permute(0, 1, 4, 3, 2).permute(0, 1, 4, 3, 2)
self.assertTrue(y.is_contiguous(memory_format=torch.channels_last_3d))
def test_memory_format_propagation_rules(self, device):
contiguous = torch.rand(10, 3, 5, 5, device=device)
cl = torch.rand(10, 3, 5, 5, device=device).contiguous(memory_format=torch.channels_last)
ambiguous = torch.rand(10, 3, 1, 1, device=device).contiguous(memory_format=torch.channels_last)
self.assertTrue(ambiguous.is_contiguous(memory_format=torch.channels_last))
self.assertTrue(ambiguous.is_contiguous(memory_format=torch.contiguous_format))
bias = torch.rand(1, 1, 1, 1, device=device).contiguous(memory_format=torch.channels_last)
def _test_propagation_rules(self, contiguous, cl, ambiguous, bias):
options = ((ambiguous, contiguous, torch.contiguous_format),
(ambiguous, cl, torch.channels_last),
(contiguous, ambiguous, torch.contiguous_format),
(contiguous, cl, torch.contiguous_format),
(cl, ambiguous, torch.channels_last),
(cl, contiguous, torch.channels_last),
(bias, cl, torch.channels_last),
(cl, bias, torch.channels_last),)
for a, b, mf in options:
result = a + b
self.assertTrue(result.is_contiguous(memory_format=mf))
_test_propagation_rules(self, contiguous, cl, ambiguous, bias)
cl = cl.to(memory_format=torch.channels_last)
ambiguous = ambiguous.to(memory_format=torch.channels_last)
bias = bias.to(memory_format=torch.channels_last)
_test_propagation_rules(self, contiguous, cl, ambiguous, bias)
for mf in (torch.channels_last, torch.contiguous_format):
ambiguous = torch.rand(10, 3, 1, 1, device=device).to(memory_format=mf)
bias = torch.rand(3, 1, 1, device=device)
result = ambiguous + bias
self.assertEqual(ambiguous.stride(), result.stride())
result = bias + ambiguous
self.assertEqual(ambiguous.stride(), result.stride())
result = ambiguous * 5
self.assertEqual(ambiguous.stride(), result.stride())
@skipIfMPS
def test_memory_format_empty_like(self, device):
def test_helper(x, memory_format):
xc = x.contiguous(memory_format=memory_format)
like = torch.empty_like(xc, memory_format=torch.preserve_format)
self.assertFalse(like.is_contiguous())
self.assertTrue(like.is_contiguous(memory_format=memory_format))
like_x = torch.empty_like(x, memory_format=torch.preserve_format)
self.assertTrue(like_x.is_contiguous())
self.assertFalse(like_x.is_contiguous(memory_format=memory_format))
like = torch.empty_like(x, memory_format=memory_format)
self.assertFalse(like.is_contiguous())
self.assertTrue(like.is_contiguous(memory_format=memory_format))
like = torch.empty_like(xc, memory_format=torch.contiguous_format)
self.assertTrue(like.is_contiguous())
self.assertFalse(like.is_contiguous(memory_format=memory_format))
like = torch.empty_like(xc)
self.assertFalse(like.is_contiguous())
self.assertTrue(like.is_contiguous(memory_format=memory_format))
sparse = x.to_sparse()
with self.assertRaises(RuntimeError):
torch.empty_like(sparse, memory_format=torch.preserve_format)
test_helper(torch.randn(4, 3, 8, 8, device=device), torch.channels_last)
test_helper(torch.randn(4, 3, 8, 8, 8, device=device), torch.channels_last_3d)
def test_memory_format_consistency(self, device):
x = torch.randn(10, 3, 1, 1, device=device)
x_rep = x.as_strided(x.size(), x.stride())
self.assertEqual(x.size(), x_rep.size())
self.assertEqual(x.stride(), x_rep.stride())
self.assertEqual(x.is_contiguous(), x_rep.is_contiguous())
self.assertEqual(x.is_contiguous(memory_format=torch.channels_last), x_rep.is_contiguous(memory_format=torch.channels_last))
self.assertEqual(
x.is_contiguous(memory_format=torch.channels_last_3d), x_rep.is_contiguous(memory_format=torch.channels_last_3d))
def test_memory_format_operators(self, device):
def _chunk_op(x, y):
x1, x2 = x.chunk(2, dim=1)
return x1 + x2
def _unsqueeze_op_add(x, y):
return x[0].unsqueeze(0) + 3
def _unsqueeze_op_clone(x, y):
return x[0].unsqueeze(0).clone()
def _test_helper(x, y, bias, memory_format):
return_contig_fns = [
lambda x, y: y + x,
lambda x, y: y * x,
lambda x, y: y.addcdiv(x, y, value=2),
lambda x, y: y.addcmul(x, y, value=2),
]
bias_fns = [
lambda x, b: x + b,
lambda x, b: b + x,
]
fns = [
lambda x, y: x.clone(),
lambda x, y: x + 3,
lambda x, y: 3 * x,
lambda x, y: x + y,
lambda x, y: x * y,
lambda x, y: abs(x),
lambda x, y: x.abs(),
lambda x, y: x.abs_(),
lambda x, y: x.acos(),
lambda x, y: x.acos_(),
lambda x, y: x.add(y, alpha=3),
lambda x, y: x.add_(y, alpha=3),
lambda x, y: x.addcdiv(y, y, value=2),
lambda x, y: x.addcdiv_(y, y, value=2),
lambda x, y: x.addcmul(y, y, value=2),
lambda x, y: x.addcmul_(y, y, value=2),
lambda x, y: x.acosh(),
lambda x, y: x.acosh_(),
lambda x, y: x.asinh(),
lambda x, y: x.asinh_(),
lambda x, y: x.atanh(),
lambda x, y: x.atanh_(),
lambda x, y: x.asin(),
lambda x, y: x.asin_(),
lambda x, y: x.atan(),
lambda x, y: x.atan2(y),
lambda x, y: x.atan2_(y),
lambda x, y: x.ceil(),
lambda x, y: x.ceil_(),
lambda x, y: x.clamp(-1, 1),
lambda x, y: x.cos(),
lambda x, y: x.cosh(),
lambda x, y: x.div(0.5),
lambda x, y: x.div_(0.5),
lambda x, y: x.div(y),
lambda x, y: x.div_(y),
lambda x, y: x.digamma(),
lambda x, y: x.digamma_(),
lambda x, y: x.erf(),
lambda x, y: x.erfc(),
lambda x, y: x.erfinv(),
lambda x, y: x.erfinv_(),
lambda x, y: x.exp(),
lambda x, y: x.expm1(),
lambda x, y: x.expm1_(),
lambda x, y: x.floor(),
lambda x, y: x.floor_(),
lambda x, y: x.fmod(2),
lambda x, y: x.frac(),
lambda x, y: x.hypot(y),
lambda x, y: x.hypot_(y),
lambda x, y: x.i0(),
lambda x, y: x.i0_(),
lambda x, y: x.lerp(y, 0.5),
lambda x, y: x.log(),
lambda x, y: x.log_(),
lambda x, y: x.log10(),
lambda x, y: x.log10_(),
lambda x, y: x.log1p(),
lambda x, y: x.log1p_(),
lambda x, y: x.log2(),
lambda x, y: x.log2_(),
lambda x, y: x.mul(3),
lambda x, y: x.mul_(3),
lambda x, y: x.neg(),
lambda x, y: x.neg_(),
lambda x, y: x.pow(3),
lambda x, y: x.pow_(3),
lambda x, y: x.pow(0.0),
lambda x, y: x.pow(1.0),
lambda x, y: x.reciprocal(),
lambda x, y: x.remainder(2),
lambda x, y: x.round(),
lambda x, y: x.round_(),
lambda x, y: x.rsqrt(),
lambda x, y: x.rsqrt_(),
lambda x, y: x.sigmoid(),
lambda x, y: x.sigmoid_(),
lambda x, y: x.logit(),
lambda x, y: x.logit_(),
lambda x, y: x.logit(1e-6),
lambda x, y: x.logit_(1e-6),
lambda x, y: x.sign(),
lambda x, y: x.sign_(),
lambda x, y: x.sgn(),
lambda x, y: x.sgn_(),
lambda x, y: x.sin(),
lambda x, y: x.sin_(),
lambda x, y: x.sinh(),
lambda x, y: x.sinh_(),
lambda x, y: x.sqrt(),
lambda x, y: x.sqrt_(),
lambda x, y: x.tan(),
lambda x, y: x.tanh(),
lambda x, y: x.trunc(),
lambda x, y: x.trunc_(),
_chunk_op,
_unsqueeze_op_add,
_unsqueeze_op_clone,
]
x_c = x.contiguous()
y_c = y.contiguous()
b_c = bias.contiguous()
for fn in fns:
is_inplace = '_(' in inspect.getsource(fn)
x_clone = x.clone() if is_inplace else x
x_c_clone = x_c.clone() if is_inplace else x_c
result_c = fn(x_c_clone, y_c)
result = fn(x_clone, y)
self.assertEqual(result, result_c, f"Failed for '{inspect.getsource(fn).strip()}'")
self.assertTrue(
result.is_contiguous(memory_format=memory_format),
f"result of the '{inspect.getsource(fn).strip()}' is not in '{memory_format}' format")
for fn in bias_fns:
result_c = fn(x_c, b_c)
result = fn(x, bias)
self.assertEqual(result, result_c, f"Failed for '{inspect.getsource(fn).strip()}'")
self.assertTrue(
result.is_contiguous(memory_format=memory_format),
f"result of the '{inspect.getsource(fn).strip()}' is not in '{memory_format}' format")
for fn in return_contig_fns:
result_c = fn(x_c, y_c)
result = fn(x, y)
self.assertEqual(result, result_c, f"Failed for '{inspect.getsource(fn).strip()}'")
self.assertTrue(
result.is_contiguous(memory_format=torch.contiguous_format),
f"result of the '{inspect.getsource(fn).strip()}' is not in '{torch.contiguous_format}' format")
_test_helper(
torch.randn((4, 3, 8, 8), device=device).contiguous(memory_format=torch.channels_last),
abs(torch.randn((4, 3, 8, 8), device=device)) + 1,
torch.randn((1, 3, 1, 1), device=device).contiguous(memory_format=torch.channels_last),
torch.channels_last)
_test_helper(
torch.randn((4, 3, 8, 8, 8), device=device).contiguous(memory_format=torch.channels_last_3d),
abs(torch.randn((4, 3, 8, 8, 8), device=device)) + 1,
torch.randn((1, 3, 1, 1, 1), device=device).contiguous(memory_format=torch.channels_last_3d),
torch.channels_last_3d)
def test_strides_propagation(self, device):
def _test_helper(x, op, unary=False):
def compare_strides(s1, s2, div):
sdiv = [s // div for s in s1]
self.assertEqual(sdiv, s2)
dim = x.dim()
div = x.stride(-1)
for p in permutations(range(dim)):
xp = x.permute(p)
if not unary:
y = torch.randn(xp.size(-1), device=x.device, dtype=x.dtype)
for inputs in ((xp, xp), (xp, y), (y, xp)):
res = op(*inputs)
compare_strides(xp.stride(), res.stride(), div)
self.assertEqual(xp.size(), res.size())
out = torch.empty(0, device=xp.device, dtype=res.dtype)
res = op(*inputs, out=out)
compare_strides(xp.stride(), res.stride(), div)
self.assertEqual(xp.size(), res.size())
else:
res = op(xp)
compare_strides(xp.stride(), res.stride(), div)
self.assertEqual(xp.size(), res.size())
out = torch.empty(0, device=xp.device, dtype=res.dtype)
res = op(xp, out=out)
compare_strides(xp.stride(), res.stride(), div)
self.assertEqual(xp.size(), res.size())
binary_ops = (torch.eq, torch.add)
unary_ops = (torch.exp,)
xs = (torch.randn(2, 3, 4, device=device), torch.randn(2, 3, 8, device=device)[:, :, ::2],
torch.randn(1, 1, 4, 12, device=device)[:, :, :, ::2])
for op in binary_ops:
for x in xs:
_test_helper(x, op)
for op in unary_ops:
for x in xs:
_test_helper(x, op, unary=True)
@onlyPRIVATEUSE1
@unittest.skipIf(PYTORCH_CUDA_MEMCHECK, "is_pinned uses failure to detect pointer property")
@skipIfTorchDynamo("NotImplementedError: PrimTorch does not support pinned memory")
def test_pin_memory_from_constructor(self, device):
def _get_like(t, **kwargs):
return [
torch.rand_like(t, **kwargs),
torch.randn_like(t, **kwargs),
torch.empty_like(t, **kwargs),
torch.full_like(t, 4, **kwargs),
torch.zeros_like(t, **kwargs),
torch.ones_like(t, **kwargs),
]
def _get_tensors(**kwargs):
return [
torch.tensor([10, 11], **kwargs),
torch.randn(3, 5, **kwargs),
torch.rand(3, **kwargs),
torch.zeros(3, **kwargs),
torch.randperm(3, **kwargs),
torch.empty(6, **kwargs),
torch.ones(6, **kwargs),
torch.eye(6, **kwargs),
torch.arange(3, 5, **kwargs)]
pinned_tensors = _get_tensors(pin_memory=True) + _get_like(torch.empty(5, dtype=torch.float64), pin_memory=True)
for x in pinned_tensors:
self.assertTrue(x.is_pinned())
tensors = _get_tensors() + _get_like(torch.empty(5, dtype=torch.float64, pin_memory=True))
for x in tensors:
self.assertFalse(x.is_pinned())
@deviceCountAtLeast(1)
@onlyPRIVATEUSE1
@parametrize("non_blocking", (True, False))
def test_storage_all_devices(self, devices, non_blocking):
for device in devices:
t = torch.randn(6, device=device)
self.assertEqual(t.dtype, t.storage().dtype)
s = t.untyped_storage()
s_cpu = s.to(device='cpu', non_blocking=non_blocking)
if non_blocking:
torch.npu.synchronize()
self.assertTrue(s_cpu.is_pinned())
else:
self.assertFalse(s_cpu.is_pinned())
t_cpu = torch.empty(()).set_(s_cpu)
self.assertEqual(t.cpu(), t_cpu)
@skipXLA
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
def test_lazy_clone(self, device, dtype):
t = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype)
t_orig_storage_addr = torch._C._storage_address(t)
orig_data_ptr = torch._C._data_address(t)
clone = t._lazy_clone()
self.assertTrue(torch._C._is_cow_tensor(clone))
self.assertTrue(torch._C._is_cow_tensor(t))
self.assertTrue(torch._C._storage_address(t) == t_orig_storage_addr)
self.assertTrue(torch._C._storage_address(clone) != t_orig_storage_addr)
self.assertTrue(torch._C._data_address(t) == orig_data_ptr)
self.assertTrue(torch._C._data_address(clone) == orig_data_ptr)
@skipXLA
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
def test_lazy_clone_view(self, device, dtype):
t = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype)
t_orig_storage_addr = torch._C._storage_address(t)
orig_data_ptr = torch._C._data_address(t)
clone = t._lazy_clone()
view = t.view([4])
self.assertTrue(torch._C._is_cow_tensor(t))
self.assertTrue(torch._C._is_cow_tensor(view))
self.assertTrue(torch._C._is_cow_tensor(clone))
self.assertTrue(torch._C._storage_address(t) == t_orig_storage_addr)
self.assertTrue(torch._C._storage_address(view) == t_orig_storage_addr)
self.assertTrue(torch._C._storage_address(clone) != t_orig_storage_addr)
self.assertTrue(torch._C._data_address(t) == orig_data_ptr)
self.assertTrue(torch._C._data_address(clone) == orig_data_ptr)
self.assertTrue(torch._C._data_address(view) == orig_data_ptr)
@skipXLA
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
def test_lazy_clone_view_materialize(self, device, dtype):
t = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype)
t_orig_storage_addr = torch._C._storage_address(t)
orig_data_ptr = torch._C._data_address(t)
clone = t._lazy_clone()
view = t.view([4])
view += torch.ones(1, device=device, dtype=dtype)
self.assertFalse(torch._C._is_cow_tensor(t))
self.assertFalse(torch._C._is_cow_tensor(view))
self.assertTrue(torch._C._is_cow_tensor(clone))
self.assertTrue(torch._C._storage_address(t) == t_orig_storage_addr)
self.assertTrue(torch._C._storage_address(view) == t_orig_storage_addr)
self.assertTrue(torch._C._storage_address(clone) != t_orig_storage_addr)
t_new_data_addr = torch._C._data_address(t)
self.assertTrue(t_new_data_addr != orig_data_ptr)
self.assertTrue(torch._C._data_address(view) == t_new_data_addr)
self.assertTrue(torch._C._data_address(clone) == orig_data_ptr)
clone += torch.ones(1, device=device, dtype=dtype)
self.assertFalse(torch._C._is_cow_tensor(t))
self.assertFalse(torch._C._is_cow_tensor(view))
self.assertFalse(torch._C._is_cow_tensor(clone))
self.assertTrue(torch._C._storage_address(t) == t_orig_storage_addr)
self.assertTrue(torch._C._storage_address(view) == t_orig_storage_addr)
self.assertTrue(torch._C._storage_address(clone) != t_orig_storage_addr)
self.assertTrue(torch._C._data_address(t) == t_new_data_addr)
self.assertTrue(torch._C._data_address(view) == t_new_data_addr)
self.assertTrue(torch._C._data_address(clone) == orig_data_ptr)
@skipXLA
@dtypes(*(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16) if not device_is_910A else
all_types_and(torch.half, torch.bool)))
def test_lazy_clone_binary_op_no_materialize(self, device, dtype):
t = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype)
clone = t._lazy_clone()
t + clone
self.assertTrue(torch._C._is_cow_tensor(t))
self.assertTrue(torch._C._is_cow_tensor(clone))
@skipXLA
@skipIfTorchDynamo("Torchdynamo fails and we do not need to test it here anyway")
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
def test_parallel_cow_materialize_error(self, device, dtype):
def run(num_threads, num_parallel, skip_first, should_error):
orig_num_threads = torch.get_num_threads()
try:
torch.set_num_threads(num_threads)
a = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype)._lazy_clone()
if should_error:
with self.assertRaisesRegex(RuntimeError, r'Materializing a storage'):
torch._test_parallel_materialize(
a, num_parallel, skip_first)
else:
torch._test_parallel_materialize(a, num_parallel, skip_first)
b = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype)
torch._test_parallel_materialize(b, num_parallel, skip_first)
finally:
torch.set_num_threads(orig_num_threads)
run(1, 1, False, True)
run(1, 1, True, False)
run(1, 10, False, True)
run(1, 10, True, True)
run(10, 1, False, True)
run(10, 1, True, False)
run(10, 10, False, True)
run(10, 10, True, True)
run(10, 2, False, True)
run(10, 2, True, True)
@skipIfMPS
@dtypesIfPRIVATEUSE1(torch.float, torch.double, torch.half)
@dtypes(torch.float, torch.double, torch.half)
def test_multinomial(self, device, dtype):
def make_prob_dist(shape, is_contiguous):
if is_contiguous:
if dtype == torch.half:
return torch.zeros(shape, device=device).uniform_().to(dtype=torch.half)
return torch.zeros(shape, device=device, dtype=dtype).uniform_()
elif len(shape) == 1:
if dtype == torch.half:
return torch.zeros((shape + [5]), device=device).uniform_().to(dtype=torch.half)[:, 2]
return torch.zeros((shape + [5]), device=device, dtype=dtype).uniform_()[:, 2]
else:
new_shape = [2, shape[1], 7, 1, shape[0], 1, 10]
if dtype == torch.half:
prob_dist = torch.zeros(new_shape, device=device).uniform_().to(dtype=torch.half)
else:
prob_dist = torch.zeros(new_shape, device=device, dtype=dtype).uniform_()
prob_dist = prob_dist.transpose(1, 4)
prob_dist = prob_dist[1, :, 5, 0, :, 0, 4]
assert not prob_dist.is_contiguous()
return prob_dist
for is_contiguous in (True, False):
n_row = 3
for n_col in range(4, 5 + 1):
prob_dist = make_prob_dist([n_row, n_col], is_contiguous)
zero_prob_indices = torch.LongTensor(n_row).random_(-2, n_col).tolist()
for i, j in enumerate(zero_prob_indices):
if j >= 0:
prob_dist[i, j] = 0
n_sample = n_col * 3
sample_indices = torch.multinomial(prob_dist, n_sample, True)
self.assertEqual(prob_dist.dim(), 2)
self.assertEqual(sample_indices.size(1), n_sample)
for i in range(n_row):
zero_prob_idx = zero_prob_indices[i]
if zero_prob_idx < 0:
continue
for j in range(n_sample):
self.assertNotEqual(sample_indices[i, j], zero_prob_idx,
msg="sampled an index with zero probability")
n_row = 3
for n_col in range(2, 10 + 1, 2):
prob_dist = make_prob_dist([n_row, n_col], is_contiguous)
zero_prob_indices = torch.LongTensor(n_row).random_(-1, n_col).tolist()
for i, j in enumerate(zero_prob_indices):
if j >= 0:
prob_dist[i, j] = 0
n_sample = max(1, n_col - 2)
sample_indices = torch.multinomial(prob_dist, n_sample, False)
self.assertEqual(prob_dist.dim(), 2)
self.assertEqual(sample_indices.size(1), n_sample)
for i in range(n_row):
row_samples = {}
zero_prob_idx = zero_prob_indices[i]
for j in range(n_sample):
sample_idx = sample_indices[i, j]
if zero_prob_idx >= 0:
self.assertNotEqual(sample_idx, zero_prob_idx,
msg="sampled an index with zero probability")
self.assertNotIn(sample_idx, row_samples, "sampled an index twice")
row_samples[sample_idx] = True
n_col = 4
prob_dist = make_prob_dist([n_col], is_contiguous).fill_(1)
zero_prob_idx = 1
prob_dist[zero_prob_idx] = 0
n_sample = 20
sample_indices = torch.multinomial(prob_dist, n_sample, True)
for sample_index in sample_indices:
self.assertNotEqual(sample_index, zero_prob_idx, msg="sampled an index with zero probability")
sample_indices.dim()
self.assertEqual(sample_indices.dim(), 1, msg="wrong number of dimensions")
self.assertEqual(prob_dist.dim(), 1, msg="wrong number of prob_dist dimensions")
self.assertEqual(sample_indices.size(0), n_sample, msg="wrong number of samples")
n_row, n_col = 2, 3
prob_dist = make_prob_dist([n_row, n_col], True)
n_sample = 1
sample_indices = torch.multinomial(prob_dist, n_sample, True)
self.assertEqual(sample_indices.dim(), 2, msg="wrong number of dimensions")
self.assertEqual(sample_indices.size(1), n_sample, msg="wrong number of samples")
@onlyPRIVATEUSE1
@dtypes(torch.float, torch.double, torch.half)
def test_multinomial_deterministic(self, device, dtype):
gen = torch.Generator(device=device)
trials = 5
seed = 0
prob_dist = torch.rand(10000, 1000, device=device, dtype=dtype)
n_sample = 1
for i in range(trials):
gen.manual_seed(seed)
samples_1 = torch.multinomial(prob_dist, n_sample, True, generator=gen)
gen.manual_seed(seed)
samples_2 = torch.multinomial(prob_dist, n_sample, True, generator=gen)
self.assertEqual(samples_1, samples_2)
self.assertEqual(samples_1.dim(), 2, msg="wrong number of dimensions")
self.assertEqual(samples_1.size(1), n_sample, msg="wrong number of samples")
@slowTest
@dtypes(torch.float)
def test_multinomial_rng_state_advance(self, device, dtype):
corpus_size = 100000
freqs = torch.ones(corpus_size, dtype=torch.float, device=device)
n_sample = 100
samples1 = torch.multinomial(freqs, n_sample, replacement=True)
samples2 = torch.multinomial(freqs, n_sample, replacement=True)
samples = torch.cat([samples1, samples2])
self.assertLessEqual(2 * n_sample - samples.unique().size(0), 2)
samples1 = torch.multinomial(freqs, n_sample, replacement=False)
samples2 = torch.multinomial(freqs, n_sample, replacement=False)
samples = torch.cat([samples1, samples2])
self.assertLessEqual(2 * n_sample - samples.unique().size(0), 1)
def _test_memory_format_transformations(self, device, input_generator_fn, transformation_fn,
memory_format, compare_data=True, default_is_preserve=False):
assert memory_format == torch.channels_last or memory_format == torch.channels_last_3d
xc = input_generator_fn(device)
if not TEST_WITH_TORCHINDUCTOR:
if memory_format == torch.channels_last:
xc = xc[..., ::2, ::2]
else:
xc = xc[..., ::2, ::2, ::2]
clone = transformation_fn(xc, memory_format=torch.preserve_format)
self.assertFalse(clone.is_contiguous())
self.assertTrue(clone.is_contiguous(memory_format=memory_format))
if not TEST_WITH_TORCHINDUCTOR:
self.assertFalse(xc.is_contiguous())
self.assertFalse(xc.is_contiguous(memory_format=memory_format))
if compare_data:
self.assertEqual(xc, clone.to(xc))
xc = input_generator_fn(device)
clone = transformation_fn(xc, memory_format=torch.contiguous_format)
self.assertTrue(clone.is_contiguous())
self.assertFalse(clone.is_contiguous(memory_format=memory_format))
if compare_data:
self.assertEqual(xc, clone.to(xc))
xc = input_generator_fn(device)
clone = transformation_fn(xc)
if default_is_preserve:
self.assertFalse(clone.is_contiguous())
self.assertTrue(clone.is_contiguous(memory_format=memory_format))
else:
self.assertTrue(clone.is_contiguous())
self.assertFalse(clone.is_contiguous(memory_format=memory_format))
if compare_data:
self.assertEqual(xc, clone.to(xc))
if not TEST_WITH_TORCHINDUCTOR:
x = torch.randn((3, 4, 5, 6, 7, 8, 9), device=device)
for i in range(10):
permutation = list(range(len(x.shape)))
random.shuffle(permutation)
x = x.permute(permutation)
self.assertEqual(x.stride(), transformation_fn(x, memory_format=torch.preserve_format).stride())
def test_memory_format_to(self, device):
def get_generator(memory_format, shape):
def input_generator_fn(device):
return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format)
return input_generator_fn
def transformation_fn(tensor, **kwargs):
return tensor.to(dtype=torch.float64, **kwargs)
formats_shapes = (
(torch.channels_last, (4, 3, 8, 8)),
(torch.channels_last_3d, (4, 3, 8, 8, 8)))
for mf, shape in formats_shapes:
self._test_memory_format_transformations(
device, get_generator(mf, shape), transformation_fn, mf, default_is_preserve=True)
def test_memory_format_type(self, device):
def get_generator(memory_format, shape):
def input_generator_fn(device):
return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format)
return input_generator_fn
def transformation_fn(tensor, **kwargs):
return tensor.to(torch.float64, **kwargs)
formats_shapes = (
(torch.channels_last, (4, 3, 8, 8)),
(torch.channels_last_3d, (4, 3, 8, 8, 8)))
for mf, shape in formats_shapes:
self._test_memory_format_transformations(
device, get_generator(mf, shape), transformation_fn, mf, default_is_preserve=True)
def test_memory_format_clone(self, device):
def get_generator(memory_format, shape):
def input_generator_fn(device):
return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format)
return input_generator_fn
def transformation_fn(tensor, **kwargs):
return tensor.clone(**kwargs)
formats_shapes = (
(torch.channels_last, (4, 3, 8, 8)),
(torch.channels_last_3d, (4, 3, 8, 8, 8)))
for mf, shape in formats_shapes:
self._test_memory_format_transformations(
device, get_generator(mf, shape), transformation_fn, mf, True, default_is_preserve=True)
def test_memory_format_factory_like_functions_preserve(self, device):
def get_generator(memory_format, shape):
def input_generator_fn(device):
return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format)
return input_generator_fn
transformation_fns = [
lambda t, **kwargs: torch.zeros_like(t, **kwargs),
lambda t, **kwargs: torch.ones_like(t, **kwargs),
lambda t, **kwargs: torch.randint_like(t, 10, 100, **kwargs),
lambda t, **kwargs: torch.randint_like(t, 100, **kwargs),
lambda t, **kwargs: torch.randn_like(t, **kwargs),
lambda t, **kwargs: torch.rand_like(t, **kwargs),
lambda t, **kwargs: torch.full_like(t, 7, **kwargs),
lambda t, **kwargs: torch.empty_like(t, **kwargs)]
formats_shapes = (
(torch.channels_last, (4, 3, 8, 8)),
(torch.channels_last_3d, (4, 3, 8, 8, 8)))
for mf, shape, in formats_shapes:
for transformation_fn in transformation_fns:
self._test_memory_format_transformations(
device, get_generator(mf, shape), transformation_fn, mf, compare_data=False, default_is_preserve=True)
def test_memory_format_type_shortcuts(self, device):
def get_generator(memory_format, shape, dtype):
def input_generator_fn(device):
return torch.randn(shape, device=device, dtype=dtype).clamp(0, 1) \
.round().contiguous(memory_format=memory_format)
return input_generator_fn
def get_fn(fn_name):
def transformation_fn(tensor, **kwargs):
fn = getattr(tensor, fn_name)
return fn(**kwargs)
return transformation_fn
shortcuts = ['byte', 'char', 'double', 'bool', 'half', 'int', 'long', 'short']
if device == 'cpu':
shortcuts += ['bfloat16']
formats_shapes = (
(torch.channels_last, (4, 3, 8, 8)),
(torch.channels_last_3d, (4, 3, 8, 8, 8)))
for mf, shape in formats_shapes:
for fn_name in shortcuts:
self._test_memory_format_transformations(
device, get_generator(mf, shape, torch.float32), get_fn(fn_name), mf, default_is_preserve=True)
for mf, shape in formats_shapes:
self._test_memory_format_transformations(
device, get_generator(mf, shape, torch.float64), get_fn('float'), mf, default_is_preserve=True)
@onlyPRIVATEUSE1
def test_memory_format_cpu_and_NPU_ops(self, device):
def get_generator(memory_format, shape):
def input_generator_fn(device):
return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format)
return input_generator_fn
def transformation_cpu_fn(tensor, **kwargs):
return tensor.cpu(**kwargs)
def transformation_npu_fn(tensor, **kwargs):
return tensor.npu(**kwargs)
formats_shapes = (
(torch.channels_last, (4, 3, 8, 8)),
(torch.channels_last_3d, (4, 3, 8, 8, 8)))
for mf, shape in formats_shapes:
self._test_memory_format_transformations(
'npu', get_generator(mf, shape), transformation_cpu_fn, mf, default_is_preserve=True)
self._test_memory_format_transformations(
'cpu', get_generator(mf, shape), transformation_npu_fn, mf, default_is_preserve=True)
@onlyNativeDeviceTypes
def test_pickle_gradscaler(self, device):
device = torch.device(device)
try_lazy_inits = (True, False)
GradScaler = partial(torch.GradScaler, device=device.type)
for lazy_init_scale in try_lazy_inits:
a = GradScaler(init_scale=3., growth_factor=4., backoff_factor=.5, growth_interval=2)
if device.type == "npu":
self.assertTrue(not a.is_enabled() if torch_npu.npu.amp.common.amp_definitely_not_available() else a.is_enabled())
else:
self.assertTrue(a.is_enabled())
if lazy_init_scale:
a.scale(torch.tensor([4.0], dtype=torch.float32, device=device))
self.assertTrue(a._scale.device.type == device.type)
serialized = pickle.dumps(a)
b = pickle.loads(serialized)
self.assertEqual(b.is_enabled(), a.is_enabled())
if a.is_enabled():
self.assertEqual(b.get_scale(), 3.)
self.assertEqual(b.get_growth_factor(), 4.)
self.assertEqual(b.get_backoff_factor(), .5)
self.assertEqual(b.get_growth_interval(), 2)
self.assertEqual(b._init_growth_tracker, 0)
self.assertEqual(b._per_optimizer_states["fdsa"],
torch_npu.npu.amp.grad_scaler._refresh_per_optimizer_state())
if lazy_init_scale:
self.assertEqual(b.scale(torch.tensor([4.0], dtype=torch.float32, device=device)), 12.0)
def _test_multinomial_empty(self, device, replacement, num_samples):
probs = torch.ones(0, 3, device=device)
expected = torch.empty(0, num_samples, dtype=torch.int64)
out = torch.multinomial(probs, num_samples=num_samples, replacement=replacement)
self.assertEqual(out, expected)
def test_multinomial_empty_w_replacement(self, device):
self._test_multinomial_empty(device, True, 1)
self._test_multinomial_empty(device, True, 2)
def test_multinomial_empty_wo_replacement(self, device):
self._test_multinomial_empty(device, False, 1)
self._test_multinomial_empty(device, False, 2)
@onlyNativeDeviceTypes
@dtypes(torch.float, torch.double)
def test_grad_scaling_unscale(self, device, dtype):
device = torch.device(device)
device0 = "npu:0" if device.type == "npu" else "cpu"
inv_scale = torch.full((1,), 0.25, dtype=torch.float, device=device0)
found_inf = torch.full((1,), 0.0, dtype=torch.float, device=device0)
size = 20
g = torch.full((size, size), 4.0, dtype=dtype, device=device0)
ginf = g.clone()
ginf[2, 2] = float('inf')
gnan = g.clone()
gnan[2, 2] = float('nan')
cases = (
([g.clone(), g.clone()], False),
([g.clone(), g.clone().t()], False),
([g.clone(), g.clone()[:, :5]], False),
([g.clone()[:, :5], g.clone()[:, :5]], False),
([g.clone(), ginf.clone()], True),
([g.clone(), gnan.clone()], True),
([g.clone(), ginf.clone()[:, :5]], True),
([g.clone(), gnan.clone()[:, :5]], True),
([ginf.clone(), g.clone()[:, :5]], True),
([ginf.clone()[:, :5], g.clone()[:, :5]], True),
)
for grads, has_inf in cases:
found_inf.zero_()
torch._amp_foreach_non_finite_check_and_unscale_(grads, found_inf, inv_scale)
if has_inf:
self.assertEqual(found_inf, 1.0)
else:
self.assertEqual(found_inf, 0.0)
for grad in grads:
self.assertEqual(grad, torch.ones_like(grad), rtol=1e-5, atol=1e-7)
grads = [g.clone(), g.to(dtype=torch.float16)]
torch._amp_foreach_non_finite_check_and_unscale_(grads, found_inf, inv_scale)
for grad in grads:
self.assertEqual(grad, torch.ones_like(grad), rtol=1e-5, atol=1e-7)
if device.type == "npu" and TEST_MULTINPU:
with self.assertRaisesRegex(RuntimeError, r"Expected all tensors to be on the same device"):
torch._amp_foreach_non_finite_check_and_unscale_([g.clone(), g.to(device="npu:1")],
found_inf,
inv_scale)
def perfect_storm_grads(inject_inf):
grads = [g.clone(), g.clone()[:, :5], g.to(dtype=torch.float16), g.to(dtype=torch.float16)]
if device.type == "npu" and TEST_MULTINPU:
grads += [g.to(device="npu:1"),
g.to(device="npu:1")[:, :5],
g.to(device="npu:1", dtype=torch.float16),
g.to(device="npu:1", dtype=torch.float16)]
if inject_inf >= 0:
grads[inject_inf][2, 2] = float('inf')
return grads
GradScaler = partial(torch.GradScaler, device=device.type)
scaler = GradScaler()
dummy_params = [torch.empty_like(g) for g in perfect_storm_grads(-1)]
dummy_opt = torch.optim.SGD(dummy_params, lr=1.)
for inject_inf in range(-1, len(dummy_params)):
found_inf = torch.full((1,), 0.0, dtype=torch.float, device=device0)
grads = perfect_storm_grads(inject_inf)
for i, p in enumerate(dummy_params):
p.grad = grads[i]
found_inf_per_device = scaler._unscale_grads_(dummy_opt, inv_scale, found_inf, True)
if inject_inf < 0:
self.assertTrue(sum(v.item() for v in found_inf_per_device.values()) == 0)
for grad in grads:
self.assertEqual(grad, torch.ones_like(grad), rtol=1e-5, atol=1e-7)
else:
self.assertTrue(sum(v.item() for v in found_inf_per_device.values()) == 1)
@onlyNativeDeviceTypes
@dtypes(torch.float)
@unittest.skipIf(device_is_910A, "aclnnAmpUpdateScale is not supported on 910A")
def test_grad_scaling_update_scale(self, device, dtype):
growth = 2.0
backoff = 0.25
growth_interval = 2
scale = torch.full((1,), 4.0, dtype=dtype, device=device)
growth_tracker = torch.full((1,), 0.0, dtype=torch.int32, device=device)
found_inf = torch.full((1,), 0.0, dtype=torch.float, device=device)
torch._amp_update_scale_(scale, growth_tracker, found_inf, growth, backoff, growth_interval)
self.assertEqual(growth_tracker, 1)
self.assertEqual(scale, 4.0)
torch._amp_update_scale_(scale, growth_tracker, found_inf, growth, backoff, growth_interval)
self.assertEqual(growth_tracker, 0)
self.assertEqual(scale, 8.0)
found_inf.fill_(1.0)
torch._amp_update_scale_(scale, growth_tracker, found_inf, growth, backoff, growth_interval)
self.assertEqual(growth_tracker, 0)
self.assertEqual(scale, 2.0)
@skipIfTorchDynamo("Failed running call_function for sparse_coo_tensor. See pytorch/issues/118856")
@onlyNativeDeviceTypes
@dtypes(torch.float)
def test_grad_scaling_unscale_sparse(self, device, dtype):
device = torch.device(device)
scaler = torch.GradScaler(device=device.type)
inv_scale = torch.full((1,), 0.25, dtype=dtype, device=device)
found_inf = torch.empty((1,), dtype=dtype, device=device)
cur = found_inf.device
i = torch.tensor([[0, 1, 1],
[2, 0, 2]], device=device, dtype=torch.int64)
v = torch.tensor([16., 32., 64.], device=device, dtype=torch.float)
s = torch.sparse_coo_tensor(i, v, torch.Size([2, 3]), device=device, dtype=dtype)
p = s.clone()
assert p.is_sparse
opt = torch.optim.SGD([p], lr=1.)
p.grad = s.clone()
found_inf.zero_()
found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, False)[cur]
self.assertEqual(found_inf, 0.0)
self.assertEqual(p.grad.to_dense(), (s / 4).to_dense())
v = torch.FloatTensor([16., 32., float('inf')])
p.grad = torch.sparse_coo_tensor(i, v, torch.Size([2, 3]), device=device, dtype=dtype)
found_inf.zero_()
found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, False)[cur]
self.assertEqual(found_inf, 1.0)
v = torch.FloatTensor([16., 32., float('nan')])
p.grad = torch.sparse_coo_tensor(i, v, torch.Size([2, 3]), device=device, dtype=dtype)
found_inf.zero_()
found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, False)[cur]
self.assertEqual(found_inf, 1.0)
p = s.clone().half()
assert p.is_sparse
opt = torch.optim.SGD([p], lr=1.)
p.grad = s.clone().half()
found_inf.zero_()
found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, True)[cur]
self.assertEqual(found_inf, 0.0)
self.assertEqual(p.grad.to_dense(), (s.half() / 4).to_dense())
i = torch.LongTensor([[0, 1, 0],
[2, 0, 2]])
v = torch.FloatTensor([64000., 32., 64000.])
p.grad = torch.sparse_coo_tensor(i, v, torch.Size([2, 3]), device=device, dtype=torch.float16)
found_inf.zero_()
found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, True)[cur]
self.assertEqual(found_inf, 1.0)
@onlyNativeDeviceTypes
def test_grad_scaling_state_dict(self, device):
device = torch.device(device)
GradScaler = partial(torch.GradScaler, device=device.type)
for lazy_init_scale in True, False:
s0 = GradScaler(init_scale=3., growth_factor=4., backoff_factor=.5, growth_interval=2)
s1 = GradScaler(init_scale=6., growth_factor=7., backoff_factor=.8, growth_interval=1)
s1._init_growth_tracker = 7
if lazy_init_scale:
s1.scale(torch.full((1,), 4.0, dtype=torch.float32, device=device))
if "npu" == device.type:
self.assertTrue(isinstance(s1._scale, torch.npu.FloatTensor))
else:
self.assertTrue(isinstance(s1._scale, torch.FloatTensor))
s1.load_state_dict(s0.state_dict())
self.assertEqual(s1.get_scale(), 3.)
self.assertEqual(s1.get_growth_factor(), 4.)
self.assertEqual(s1.get_backoff_factor(), .5)
self.assertEqual(s1.get_growth_interval(), 2)
self.assertEqual(s1._init_growth_tracker, 0)
def _run_scaling_case(self, device, run, unskipped, skipped, atol=1e-7, optimizer_ctor=torch.optim.SGD,
optimizer_kwargs=None):
for enabled in True, False:
(
mod_control, mod_scaling, opt_control, opt_scaling, data, loss_fn, skip_iter,
) = _create_scaling_case(device=device, optimizer_ctor=optimizer_ctor, optimizer_kwargs=optimizer_kwargs)
GradScaler = partial(torch.GradScaler, device=device)
scaler = GradScaler(init_scale=128., growth_factor=2.0, enabled=enabled, growth_interval=1)
_ = run(device, data, mod_control, opt_control, scaler, loss_fn, skip_iter, False)
ret = run(device, data, mod_scaling, opt_scaling, scaler, loss_fn, skip_iter, True)
scaler = ret if ret else scaler
if enabled:
net_growth = scaler.get_growth_factor() ** unskipped if unskipped > 0 else 1.0
net_backoff = scaler.get_backoff_factor() ** skipped if skipped > 0 else 1.0
self.assertTrue(scaler.get_scale() == (128. * net_growth * net_backoff))
else:
self.assertTrue(scaler.get_scale() == 1.0)
for c, s in zip(mod_control.parameters(), mod_scaling.parameters()):
self.assertEqual(c.grad, s.grad, atol=atol, rtol=1e-05)
c_state, s_state = opt_control.state[c], opt_scaling.state[s]
for k in c_state:
self.assertEqual(c_state[k], s_state[k], atol=atol, rtol=1e-05, msg=k)
self.assertEqual(c, s, atol=atol, rtol=1e-05)
@onlyNativeDeviceTypes
@parametrize("foreach, fused", [(None, None), (True, None), (None, True)])
@optims(
[optim for optim in optim_db if optim.optim_cls in [torch.optim.AdamW, torch.optim.Adam, torch.optim.SGD]],
dtypes=[torch.float32]
)
@unittest.skipIf(device_is_910A, "aclnnAmpUpdateScale is not supported on 910A")
def test_grad_scaling_autocast(self, device, dtype, optim_info, foreach, fused):
try_pickle = False
def run(device, data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api):
for i, (input_, target) in enumerate(data):
optimizer.zero_grad()
with torch.autocast(device_type=device, dtype=torch.half, enabled=try_scaling_api):
output = model(input_)
loss = loss_fn(output, target)
if try_scaling_api:
scaler.scale(loss).backward()
if i == skip_iter and scaler.is_enabled():
with torch.no_grad():
model[1].weight.grad.fill_(float('inf'))
scaler.step(optimizer)
scaler.update()
if try_pickle:
scaler = pickle.loads(pickle.dumps(scaler))
else:
loss.backward()
if (not scaler.is_enabled()) or (i != skip_iter):
optimizer.step()
return scaler
optimizer_ctor = optim_info.optim_cls
context = contextlib.nullcontext
if optimizer_ctor in (torch.optim.Adam, torch.optim.AdamW):
from functools import partial
context = partial(self.assertRaises, AssertionError)
with context():
self._run_scaling_case(
device, run, unskipped=3, skipped=1, atol=1e-3,
optimizer_ctor=optimizer_ctor, optimizer_kwargs={"foreach": foreach, "fused": fused},
)
try_pickle = True
self._run_scaling_case(
device, run, unskipped=3, skipped=1, atol=1e-3,
optimizer_ctor=optimizer_ctor, optimizer_kwargs={"foreach": foreach, "fused": fused},
)
def _test_params_invalidated_with_grads_invalidated_between_unscale_and_step(self, device, dtype, optim_info):
optimizer_ctor = optim_info.optim_cls
all_optim_inputs = _get_optim_inputs_including_global_cliquey_kwargs(
device, dtype, optim_info, skip=("differentiable",))
for optim_input in all_optim_inputs:
model, _, optimizer, _, data, loss_fn, _ = _create_scaling_case(
device, optimizer_ctor=optimizer_ctor, optimizer_kwargs=optim_input.kwargs,
)
scaler = torch.GradScaler(device=device, init_scale=128.0)
for input_, target in data:
optimizer.zero_grad()
with torch.autocast(device_type=device, dtype=torch.half):
output = model(input_)
loss = loss_fn(output, target)
scaler.scale(loss).backward()
scaler.unscale_(optimizer)
for j, param in enumerate(model.parameters()):
param.grad.copy_(torch.inf if j % 2 else torch.nan)
scaler.step(optimizer)
scaler.update()
self.assertTrue(all((p.isnan().any() or p.isinf().any()) for p in model.parameters()))
@onlyNativeDeviceTypes
@optims(
[optim for optim in optim_db if optim.optim_cls in [torch.optim.AdamW, torch.optim.Adam, torch.optim.SGD]],
dtypes=[torch.float32]
)
def test_params_invalidated_with_grads_invalidated_between_unscale_and_step(self, device, dtype, optim_info):
self._test_params_invalidated_with_grads_invalidated_between_unscale_and_step(device, dtype, optim_info)
@onlyNativeDeviceTypes
@optims(
[optim for optim in optim_db if optim.optim_cls in [torch.optim.AdamW, torch.optim.Adam, torch.optim.SGD]],
dtypes=[torch.float32]
)
@torch._inductor.config.patch("graph_partition", True)
def test_params_invalidated_with_grads_invalidated_and_graph_partition(self, device, dtype, optim_info):
self._test_params_invalidated_with_grads_invalidated_between_unscale_and_step(device, dtype, optim_info)
@onlyNativeDeviceTypes
def test_grad_scale_will_not_overflow(self, device):
device = torch.device(device)
model = torch.nn.Linear(5, 1).to(device)
optimizer = torch.optim.Adam(model.parameters())
scaler = torch.GradScaler(device=device.type, growth_interval=1, growth_factor=2 ** 4, init_scale=1e38)
optimizer.zero_grad()
x = torch.randn(1, 5).to(device)
y = 1e-30 * torch.randn(1, 1).to(device)
lm = ((model(x) - y) ** 2).mean()
scaler.scale(lm).backward()
scaler.step(optimizer)
scaler.update()
assert scaler._scale != float("inf") and scaler._scale != float("nan")
@onlyNativeDeviceTypes
@unittest.skipIf(device_is_910A, "aclnnAmpUpdateScale is not supported on 910A")
def test_grad_scaling_clipping(self, device):
device = torch.device(device)
def run(device, data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api):
max_norm = 0.2
for i, (input_, target) in enumerate(data):
optimizer.zero_grad()
output = model(input_)
loss = loss_fn(output, target)
if try_scaling_api:
scaler.scale(loss).backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm * scaler.get_scale())
if i == skip_iter and scaler.is_enabled():
model[1].weight.grad.data.fill_(float('inf'))
scaler.step(optimizer)
scaler.update()
else:
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
if (not scaler.is_enabled()) or (i != skip_iter):
optimizer.step()
self._run_scaling_case(device.type, run, unskipped=3, skipped=1, atol=1e-5)
@onlyNativeDeviceTypes
@unittest.skipIf(device_is_910A, "aclnnAmpUpdateScale is not supported on 910A")
def test_grad_scaling_clipping_separate_unscale(self, device):
device = torch.device(device)
def run(device, data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api):
max_norm = 0.2
for i, (input_, target) in enumerate(data):
optimizer.zero_grad()
output = model(input_)
loss = loss_fn(output, target)
if try_scaling_api:
scaler.scale(loss).backward()
if i == skip_iter and scaler.is_enabled():
model[1].weight.grad.data.fill_(float('inf'))
scaler.unscale_(optimizer)
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm, error_if_nonfinite=False)
scaler.step(optimizer)
scaler.update()
else:
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
if (not scaler.is_enabled()) or (i != skip_iter):
optimizer.step()
self._run_scaling_case(device.type, run, unskipped=3, skipped=1)
@onlyNativeDeviceTypes
@unittest.skipIf(device_is_910A, "aclnnAmpUpdateScale is not supported on 910A")
def test_grad_scaling_penalty(self, device):
device = torch.device(device)
def run(device, data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api):
for i, (input_, target) in enumerate(data):
optimizer.zero_grad()
output = model(input_)
loss = loss_fn(output, target)
if try_scaling_api:
grad_params = torch.autograd.grad(scaler.scale(loss),
model.parameters(), create_graph=True)
inv_scale = 1. / scaler.get_scale()
grad_params = [p * inv_scale for p in grad_params]
else:
grad_params = torch.autograd.grad(loss, model.parameters(), create_graph=True)
grad_norm = 0
for grad in grad_params:
grad_norm += grad.pow(2).sum()
grad_norm = grad_norm.sqrt()
loss = loss + grad_norm
if try_scaling_api:
scaler.scale(loss).backward()
if i == skip_iter and scaler.is_enabled():
model[1].weight.grad.data.fill_(float('inf'))
scaler.step(optimizer)
scaler.update()
else:
loss.backward()
if (not scaler.is_enabled()) or (i != skip_iter):
optimizer.step()
self._run_scaling_case(device.type, run, unskipped=3, skipped=1)
@onlyNativeDeviceTypes
@unittest.skipIf(device_is_910A, "aclnnAmpUpdateScale is not supported on 910A")
def test_grad_scaling_accumulation(self, device):
device = torch.device(device)
def run(device, data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api):
iters_to_accumulate = 2
for i, (input_, target) in enumerate(data):
output = model(input_)
loss = loss_fn(output, target)
loss = loss / iters_to_accumulate
if try_scaling_api:
scaler.scale(loss).backward()
else:
loss.backward()
if (i + 1) % iters_to_accumulate == 0:
if try_scaling_api:
scaler.step(optimizer)
scaler.update()
optimizer.zero_grad()
else:
optimizer.step()
optimizer.zero_grad()
self._run_scaling_case(device.type, run, unskipped=2, skipped=0)
@onlyNativeDeviceTypes
@unittest.skipIf(device_is_910A, "aclnnAmpUpdateScale is not supported on 910A")
def test_grad_scaling_multiple(self, device):
device = torch.device(device)
for enabled in True, False:
mod_control0, mod_scaling0, opt_control0, opt_scaling0, data, loss_fn, skip_iter = \
_create_scaling_case(device.type)
mod_control1, mod_scaling1, opt_control1, opt_scaling1 = \
_create_scaling_models_optimizers(device.type)
GradScaler = partial(torch.GradScaler, device=device.type)
scaler = GradScaler(init_scale=128., growth_factor=2.0, enabled=enabled, growth_interval=1)
def run(model0, model1, optimizer0, optimizer1, try_scaling_api):
for i, (input_, target) in enumerate(data):
optimizer0.zero_grad()
optimizer1.zero_grad()
output0 = model0(input_)
output1 = model1(input_)
loss0 = loss_fn(0.3 * output0 + 0.7 * output1, target)
loss1 = loss_fn(0.6 * output0 - 0.4 * output1, target)
if try_scaling_api:
scaler.scale(loss0).backward(retain_graph=True)
scaler.scale(loss1).backward()
if i == skip_iter and scaler.is_enabled():
model1[1].weight.grad.data.fill_(float('inf'))
scaler.unscale_(optimizer0)
scaler.step(optimizer0)
scaler.step(optimizer1)
scaler.update()
else:
loss0.backward(retain_graph=True)
loss1.backward()
optimizer0.step()
if (not scaler.is_enabled()) or (i != skip_iter):
optimizer1.step()
run(mod_control0, mod_control1, opt_control0, opt_control1, False)
run(mod_scaling0, mod_scaling1, opt_scaling0, opt_scaling1, True)
self.assertTrue(scaler.get_scale() == (128. * scaler.get_growth_factor() ** 3 *
scaler.get_backoff_factor() ** 1) if enabled else 1.0)
for c, s in zip(chain(mod_control0.parameters(), mod_control1.parameters()),
chain(mod_scaling0.parameters(), mod_scaling1.parameters())):
self.assertEqual(c, s, rtol=1e-5, atol=1e-7)
@onlyNativeDeviceTypes
@unittest.skipIf(device_is_910A, "aclnnAmpUpdateScale is not supported on 910A")
def test_grad_scaler_pass_itself(self, device):
device = torch.device(device)
GradScaler = partial(torch.amp.GradScaler, device=device.type)
class _PlaceHolderOptimizer(torch.optim.Optimizer):
tester = self
def __init__(self, params, defaults=None):
if defaults is None:
defaults = {}
super().__init__(params, defaults)
self._step_supports_amp_scaling = True
class Optimizer1(_PlaceHolderOptimizer):
def step(self, closure=None, *, grad_scaler=None):
self.tester.assertTrue(isinstance(grad_scaler, torch.amp.GradScaler))
self.tester.assertFalse(hasattr(self, "grad_scale"))
self.tester.assertFalse(hasattr(self, "found_inf"))
class Optimizer2(_PlaceHolderOptimizer):
def step(self, closure=None):
self.tester.assertTrue(isinstance(self.grad_scale, torch.Tensor))
self.tester.assertTrue(isinstance(self.found_inf, torch.Tensor))
x = torch.randn(4, 4).to(device)
m = torch.nn.Linear(4, 1).to(device)
o1 = Optimizer1(m.parameters())
o2 = Optimizer2(m.parameters())
scaler = GradScaler(init_scale=2.0)
with torch.autocast(device_type=device.type, dtype=torch.half):
y = m(x)
loss = y.mean()
scaler.scale(loss).backward()
with self.assertWarns(FutureWarning):
scaler.step(o1)
scaler.step(o2)
scaler.update()
@onlyNativeDeviceTypes
def test_grad_scaler_deprecated_warning(self, device):
device = torch.device(device)
GradScaler = torch.npu.amp.GradScaler if "npu" == device.type else torch.cpu.amp.GradScaler
with self.assertWarnsRegex(
FutureWarning,
rf"`torch.{device.type}.amp.GradScaler\(args...\)` is deprecated.",
):
_ = GradScaler(init_scale=2.0)
@dtypesIfPRIVATEUSE1(torch.float, torch.double, torch.half)
@dtypesIfCPU(torch.float, torch.double, torch.bfloat16, torch.half)
@dtypes(torch.float, torch.double)
def test_multinomial_cpu(self, device, dtype):
def make_prob_dist(shape, is_contiguous):
if is_contiguous:
if dtype == torch.half or dtype == torch.bfloat16:
return torch.zeros(shape, device=device).uniform_().to(dtype=dtype)
return torch.zeros(shape, device=device, dtype=dtype).uniform_()
elif len(shape) == 1:
if dtype == torch.half or dtype == torch.bfloat16:
return torch.zeros((shape + [5]), device=device).uniform_().to(dtype=dtype)[:, 2]
return torch.zeros((shape + [5]), device=device, dtype=dtype).uniform_()[:, 2]
else:
new_shape = [2, shape[1], 7, 1, shape[0], 1, 10]
if dtype == torch.half or dtype == torch.bfloat16:
prob_dist = torch.zeros(new_shape, device=device).uniform_().to(dtype=dtype)
else:
prob_dist = torch.zeros(new_shape, device=device, dtype=dtype).uniform_()
prob_dist = prob_dist.transpose(1, 4)
prob_dist = prob_dist[1, :, 5, 0, :, 0, 4]
assert not prob_dist.is_contiguous()
return prob_dist
@onlyNativeDeviceTypes
def test_where_scalar_handcrafted_values(self, device):
condition_shape = (5, 5)
dtypes_ = (
torch.bool, torch.uint8, torch.int8, torch.int16, torch.int64,
torch.float16, torch.float32, torch.float64,
torch.complex64, torch.complex128,
)
shapes = ((), (5,), (1, 5),)
with torch.no_grad():
tensors = (torch.empty(shape, dtype=dtype, device=device).fill_(17)
for shape, dtype in product(shapes, dtypes_))
x_vals = (True, 3, 7.0, 1 + 0.5j)
y_vals = itertools.chain((False, 4, 8.0, 2 + 0.5j), tensors)
for x in x_vals:
for y in y_vals:
condition = torch.empty(*condition_shape, dtype=torch.bool, device=device).bernoulli_()
common_dtype = torch.result_type(x, y)
def check_equal(condition, x, y):
condition_np = condition.cpu().numpy()
x_np = x.cpu().numpy() if isinstance(x, torch.Tensor) else x
y_np = y.cpu().numpy() if isinstance(y, torch.Tensor) else y
expected = torch.from_numpy(np.where(condition_np, x_np, y_np)).to(common_dtype)
result = torch.where(condition, x, y)
self.assertEqual(expected, result)
check_equal(condition, x, y)
check_equal(condition, y, x)
if self.device_type == "npu":
check_equal(condition, torch.tensor(x), y)
check_equal(condition, y, torch.tensor(x))
if not isinstance(y, torch.Tensor):
check_equal(condition, torch.tensor(y), torch.tensor(x))
if isinstance(y, torch.Tensor) and y.ndim > 0:
check_equal(torch.tensor(True), x, y)
check_equal(torch.tensor(True), y, x)
@skipIfTorchInductor("FIXME")
def test_hook_remove(self, device):
def _test_helper(remove_hook):
def install_hook(tensor):
handle = None
def hook(tensor):
if remove_hook:
handle.remove()
return torch.zeros_like(tensor)
handle = tensor.register_hook(hook)
t = torch.ones((1, 5), device=device, requires_grad=True)
install_hook(t)
t.mean().backward()
self.assertEqual(t.grad, torch.zeros_like(t))
t.mean().backward()
if remove_hook:
self.assertEqual(t.grad, 0.2 * torch.ones_like(t))
else:
self.assertEqual(t.grad, torch.zeros_like(t))
_test_helper(remove_hook=True)
_test_helper(remove_hook=False)
@skipXLA
def test_skip_xla(self, device):
if self.device_type == 'xla':
self.assertTrue(False)
@expectedFailureXLA
def test_expected_failure_xla(self, device):
if self.device_type == 'xla':
self.assertTrue(False)
def test_assertRaisesRegex_ignore_msg_non_native_device(self, device):
x = torch.randn((10, 3), device=device)
t = torch.empty(10, dtype=torch.int64, device=device).random_(0, 3)
invalid_weight = torch.randn(4, device=device)
msg = "weight tensor should be defined either for all 3 classes or no classes"
with self.assertRaisesRegex(RuntimeError, msg):
torch.nn.functional.nll_loss(x, t, weight=invalid_weight)
@dtypes(*(all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.complex32)
if not device_is_910A else all_types_and(torch.bool, torch.half)))
def test_copy_(self, device, dtype):
def can_cast(src_dtype, dst_dtype):
def is_unsigned_int(dtype):
return dtype is torch.uint8
if is_unsigned_int(dst_dtype):
return is_unsigned_int(src_dtype)
return torch.can_cast(src_dtype, dst_dtype)
def make_tensor_wrapper(shape, dtype):
if dtype is not torch.complex32:
return make_tensor(shape, device=device, dtype=dtype)
return torch.randn(shape, device=device, dtype=dtype)
t = make_tensor_wrapper((50,), dtype)
if not device_is_910A:
src_dtypes = all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.complex32)
else:
src_dtypes = all_types_and(torch.bool, torch.half)
for src_dtype in src_dtypes:
src = make_tensor_wrapper((50,), dtype=src_dtype)
t.copy_(src)
dst = make_tensor_wrapper((50, ), dtype=src_dtype)
if can_cast(src_dtype, dtype):
rtol = None
atol = None
if dtype in (torch.half, torch.complex32):
rtol = 1e-3
atol = 1e-3
if dtype in (torch.bfloat16,):
rtol = 1e-2
atol = 1e-2
self.assertEqual(src, dst.copy_(t), rtol=rtol, atol=atol)
@dtypes(*(all_types_complex_float8_and(torch.bool, torch.half, torch.bfloat16, torch.complex32,
torch.uint16, torch.uint32, torch.uint64)
if not device_is_910A else all_types_and(torch.bool, torch.half)))
def test_item(self, device, dtype):
xla_unsupported_dtypes = [
torch.uint16,
torch.uint32,
torch.uint64,
torch.float8_e4m3fn,
torch.float8_e5m2,
torch.float8_e4m3fnuz,
torch.float8_e5m2fnuz,
]
if torch.device(device).type == 'xla' and dtype in xla_unsupported_dtypes:
self.skipTest('uint16,32,64,float8 not implemented on XLA')
t = torch.ones((), device=device, dtype=dtype)
self.assertEqual(1, t.item())
def test__local_scalar_dense_with_empty_tensor(self, device):
input = torch.randn(0, device=device)
with self.assertRaisesRegex(RuntimeError, "Empty tensor not supported"):
torch.ops.aten._local_scalar_dense(input)
@onlyNativeDeviceTypes
def test_masked_scatter_inplace_noncontiguous(self, device):
t = torch.zeros(5, 2, dtype=torch.long, device=device)
t_non_contig = t.transpose(0, 1)
t_contig = t_non_contig.contiguous()
assert t_contig.is_contiguous()
assert not t_non_contig.is_contiguous()
mask = torch.tensor([[False, True], [False, True], [False, False], [True, True], [True, True]], device=device)
mask_non_contig = mask.transpose(0, 1)
mask_contig = mask_non_contig.contiguous()
assert mask_contig.is_contiguous()
assert not mask_non_contig.is_contiguous()
source = torch.tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 9]], device=device)
expected = t_contig.masked_scatter_(mask_contig, source)
actual = t_non_contig.masked_scatter_(mask_non_contig, source)
self.assertEqual(actual, expected)
actual = t_contig.masked_scatter_(mask_non_contig, source)
self.assertEqual(actual, expected)
actual = t_non_contig.masked_scatter_(mask_contig, source)
self.assertEqual(actual, expected)
class TestDevicePrecision(TestCase):
exact_dtype = True
@onlyPRIVATEUSE1
@unittest.skipIf(device_is_910A, "bfloat16 is not supported on 910A")
def test_index_add_bfloat16(self, device):
inp_tensor = torch.randn(5, 3, device='cpu').bfloat16()
t = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.bfloat16, device='cpu')
index = torch.tensor([0, 4, 2], device='cpu')
out_cpu = inp_tensor.index_add(0, index, t)
inp_tensor = inp_tensor.to(device=device)
t = t.to(device=device)
index = index.to(device=device)
out_gpu = inp_tensor.index_add(0, index, t)
self.assertEqual(out_cpu, out_gpu, atol=1e-2, rtol=0)
def test_device_serialization(self, device):
x = torch.randn(4, 4, device=device)
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f)
self.assertEqual(x_copy, x)
self.assertIs(type(x_copy), type(x))
self.assertEqual(x_copy.device, x.device)
@deviceCountAtLeast(2)
def test_multidevice_serialization(self, devices):
x = [torch.randn(4, 4, device=devices[0]),
torch.randn(4, 4, device=devices[1])]
with tempfile.NamedTemporaryFile() as f:
torch.save(x, f)
f.seek(0)
x_copy = torch.load(f)
for original, cp in zip(x, x_copy):
self.assertEqual(cp, original)
self.assertIs(type(cp), type(original))
self.assertEqual(cp.device, original.device)
@deviceCountAtLeast(1)
def test_copy_noncontig(self, devices):
def do_test(d0, d1):
x = torch.tensor([1.5, 2.5, 3.5, 4.5, 5.5, 6.5], device=d0)
y = torch.tensor([0, 0, 0, 0, 0, 0], device=d1)
self.assertNotEqual(x.dtype, y.dtype)
y[::2].copy_(x[::2])
self.assertEqual(y, [1, 0, 3, 0, 5, 0])
do_test('cpu', devices[0])
do_test(devices[0], 'cpu')
if len(devices) > 1:
do_test(devices[0], devices[1])
@deviceCountAtLeast(2)
def test_type_conversions_same_device(self, devices):
x = torch.randn(5, 5, device=devices[1])
self.assertEqual(x.int().device, torch.device(devices[1]))
self.assertEqual(x.type(torch.int).device, torch.device(devices[1]))
self.assertEqual(x.to(torch.int).device, torch.device(devices[1]))
@dtypesIfPRIVATEUSE1(torch.half, torch.float, torch.double,
torch.int8, torch.short, torch.int, torch.long,
torch.uint8)
@dtypes(torch.float, torch.double,
torch.int8, torch.short, torch.int, torch.long,
torch.uint8)
def test_from_sequence(self, device, dtype):
seq = [list(range(i * 4, i * 4 + 4)) for i in range(5)]
reference = torch.arange(0, 20).resize_(5, 4)
self.assertEqual(torch.tensor(seq, dtype=dtype, device=device), reference, exact_dtype=False)
@deviceCountAtLeast(1)
def test_advancedindex_mixed_cpu_devices(self, devices) -> None:
def test(x: torch.Tensor, ia: torch.Tensor, ib: torch.Tensor) -> None:
self.assertEqual(x[:, ia, None, ib, 0].cpu(),
x.cpu()[:, ia.cpu(), None, ib.cpu(), 0])
self.assertEqual(x[ia], x.cpu()[ia.cpu()])
x_clone1 = x.clone()
x_clone2 = x.clone()
first_shape = x[:, ia, None, ib, 0].shape
second_shape = x[ia].shape
x_clone1[:, ia, None, ib, 0] = torch.randn(first_shape).to(x_clone1)
x_clone2[ia] = torch.randn(second_shape).to(x_clone2)
cpu = torch.device('cpu')
for device in devices:
x = torch.randn(3, 4, 4, 4, 3)
ia = torch.tensor([0, 2, 1])
ib = torch.tensor([0, 2, 1])
x = x.to(device)
ia = ia.to(cpu)
ib = ib.to(cpu)
test(x, ia, ib)
x = x.to(device)
ia = ia.to(cpu)
ib = ib.to(device)
test(x, ia, ib)
@deviceCountAtLeast(1)
def test_advancedindex_mixed_devices_error(self, devices) -> None:
def test(x: torch.Tensor, ia: torch.Tensor, ib: torch.Tensor) -> None:
with self.assertRaisesRegex(RuntimeError, fr"indices should be either .* \({x.device}\)"):
x[:, ia, None, ib, 0]
with self.assertRaisesRegex(RuntimeError, fr"indices should be either .* \({x.device}\)"):
x[ib]
cpu = torch.device('cpu')
for device in devices:
x = torch.randn(3, 4, 4, 4, 3)
ia = torch.tensor([0, 2, 1]).to(device)
ib = torch.tensor([0, 2, 1]).to(device)
test(x, ia, ib)
x = x.to(cpu)
ia = ia.to(cpu)
ib = ib.to(device)
test(x, ia, ib)
if len(devices) > 1:
other_device = devices[0] if device == devices[1] else devices[1]
x = x.to(device)
ia = ia.to(cpu)
ib = ib.to(other_device)
test(x, ia, ib)
def test_copy_broadcast(self, device) -> None:
x = torch.randn(10, 5)
y = torch.randn(5, device=device)
x.copy_(y)
self.assertEqual(x[3], y)
x = torch.randn(10, 5, device=device)
y = torch.randn(5)
x.copy_(y)
self.assertEqual(x[3], y)
@dtypes(torch.int64, torch.float32, torch.float64)
def test_clamp(self, device, dtype):
test_args = [
*product(
[(100, 50), (10, 64), (97,)],
(True, False),
)
]
for shape, noncontig in test_args:
x = make_tensor(shape, device=device, dtype=dtype,
noncontiguous=noncontig)
ub = make_tensor(shape, device=device, dtype=dtype,
noncontiguous=noncontig)
lb = make_tensor(shape, device=device, dtype=dtype,
noncontiguous=noncontig)
expect = x.max(lb).min(ub)
actual = x.clamp(lb, ub)
self.assertEqual(expect, actual)
expect = np.clip(x.cpu().numpy(), lb.cpu().numpy(), ub.cpu().numpy())
self.assertEqual(expect, actual)
expect = x.max(lb)
actual = x.clamp(min=lb)
self.assertEqual(expect, actual)
expect = x.min(ub)
actual = x.clamp(max=ub)
self.assertEqual(expect, actual)
expect = x.max(lb[0]).min(ub[..., :1])
actual = x.clamp(lb[0], ub[..., :1])
self.assertEqual(expect, actual)
expect = x[..., :1].max(lb).min(ub)
actual = x[..., :1].clamp(lb, ub)
self.assertEqual(expect, actual)
def test_NPU_device_idx(self, device):
x = torch.zeros(3, device=device)
y = torch._efficientzerotensor(3, device=device)
self.assertEqual(x.device, y.device)
class Tracker:
def __init__(self, marker):
self.marker = marker
@staticmethod
def make():
marker = [False]
return marker, Tracker(marker)
def __del__(self):
self.marker[0] = True
@contextlib.contextmanager
def disable_gc():
if gc.isenabled():
try:
gc.disable()
yield
finally:
gc.enable()
else:
yield
class TestTorch(TestCase):
exact_dtype = True
def test_dir(self):
dir(torch)
def test_wildcard_import(self):
exec('from torch import *')
def test_newaxis_numpy_comparison(self):
def run_test(tensor, *idx):
npt = tensor.numpy()
self.assertEqual(tensor[idx], npt[idx])
x = torch.arange(0, 10)
cases = [
[None],
[None, None],
[Ellipsis, None],
[None, Ellipsis],
[2, None],
[None, 2],
[Ellipsis, None, 2],
[Ellipsis, 2, None],
[2, Ellipsis, None],
[2, None, Ellipsis],
[None, 2, Ellipsis],
[None, Ellipsis, 2],
]
for case in cases:
run_test(x, *case)
x = torch.arange(0, 12).view(3, 4)
cases = [
[None],
[None, None],
[None, None, None],
[Ellipsis, None],
[Ellipsis, None, None],
[None, Ellipsis],
[None, Ellipsis, None],
[None, None, Ellipsis],
[2, None],
[2, None, Ellipsis],
[2, Ellipsis, None],
[None, 2, Ellipsis],
[Ellipsis, 2, None],
[Ellipsis, None, 2],
[None, Ellipsis, 2],
[1, 2, None],
[1, 2, Ellipsis, None],
[1, Ellipsis, 2, None],
[Ellipsis, 1, None, 2],
[Ellipsis, 1, 2, None],
[1, None, 2, Ellipsis],
[None, 1, Ellipsis, 2],
[None, 1, 2, Ellipsis],
]
for case in cases:
run_test(x, *case)
def _consecutive(self, size, start=1):
sequence = torch.ones(torch.tensor(size).prod(0)).cumsum(0)
sequence.add_(start - 1)
return sequence.resize_(*size)
def test_newindex(self):
reference = self._consecutive((3, 3, 3))
def checkPartialAssign(index):
reference = torch.zeros(3, 3, 3)
reference[index] = self._consecutive((3, 3, 3))[index]
self.assertEqual(reference[index], self._consecutive((3, 3, 3))[index], atol=0, rtol=0)
reference[index] = 0
self.assertEqual(reference, torch.zeros(3, 3, 3), atol=0, rtol=0)
checkPartialAssign(0)
checkPartialAssign(1)
checkPartialAssign(2)
checkPartialAssign((0, 1))
checkPartialAssign((1, 2))
checkPartialAssign((0, 2))
checkPartialAssign(torch.LongTensor((0, 2)))
with self.assertRaises(IndexError):
reference[1, 1, 1, 1] = 1
with self.assertRaises(IndexError):
reference[1, 1, 1, (1, 1)] = 1
with self.assertRaises(IndexError):
reference[3, 3, 3, 3, 3, 3, 3, 3] = 1
with self.assertRaises(IndexError):
reference[0.0] = 1
with self.assertRaises(TypeError):
reference[0.0:2.0] = 1
with self.assertRaises(IndexError):
reference[0.0, 0.0:2.0] = 1
with self.assertRaises(IndexError):
reference[0.0, :, 0.0:2.0] = 1
with self.assertRaises(IndexError):
reference[0.0, ..., 0.0:2.0] = 1
with self.assertRaises(IndexError):
reference[0.0, :, 0.0] = 1
def test_check(self):
test_cases = [
(torch._check, RuntimeError),
(torch._check_index, IndexError),
(torch._check_value, ValueError),
(torch._check_type, TypeError),
(torch._check_not_implemented, NotImplementedError),
]
for check_fn, expected_error in test_cases:
check_fn(True)
default_message = 'Expected cond to be True'
with self.assertRaisesRegex(expected_error, default_message):
check_fn(False)
message = 'message'
with self.assertRaisesRegex(expected_error, message):
check_fn(False, lambda: message)
def message():
return torch.arange(4)
with self.assertRaisesRegex(expected_error, re.escape(str(message()))):
check_fn(False, message)
def message():
return f"{'test'} {[1, 2, 'a', True]} {True} {100} {torch.arange(4)}"
with self.assertRaisesRegex(expected_error, re.escape(str(message()))):
check_fn(False, message)
with self.assertRaisesRegex(TypeError, 'cond must be a bool'):
check_fn('wrong type')
with self.assertRaisesRegex(TypeError, 'cond must be a bool'):
check_fn(torch.tensor(True))
def test_index_add(self):
for device in get_all_device_types():
for dest_contig, src_contig, index_contig in product([True, False], repeat=3):
for other_sizes in ((), (4, 5)):
for dtype in [torch.int, torch.long]:
num_copy, num_dest = 3, 3
dest = torch.randn(num_dest, *other_sizes, device=device)
if not dest_contig:
dest = make_tensor(dest.shape, device=device, dtype=dest.dtype, noncontiguous=True)
src = torch.randn(num_copy, *other_sizes, device=device)
if not src_contig:
src = noncontiguous_like(src)
idx = torch.randperm(num_dest, dtype=dtype, device=device).narrow(0, 0, num_copy)
if not index_contig:
idx = noncontiguous_like(idx)
dest2 = dest.clone()
dest.index_add_(0, idx, src)
for i in range(idx.size(0)):
dest2[idx[i]] += src[i]
self.assertEqual(dest, dest2)
dest2 = dest.clone()
dest.index_add_(0, idx, src, alpha=2)
for i in range(idx.size(0)):
dest2[idx[i]] += src[i] * 2
self.assertEqual(dest, dest2)
def test_index_add_all_dtypes(self):
for device in get_all_device_types():
for dtype in get_all_math_dtypes(device):
for idx_dtype in [torch.int, torch.long]:
size = [5, 5]
if dtype.is_floating_point or dtype.is_complex:
tensor = torch.rand(size, dtype=dtype, device=device)
elif dtype.is_signed:
tensor = torch.randint(-5, 15, size, dtype=dtype, device=device)
else:
tensor = torch.randint(0, 10, size, dtype=dtype, device=device)
zeros = torch.zeros(size, dtype=dtype, device=device)
added = zeros.index_add(0, torch.arange(0, size[0], dtype=idx_dtype, device=device), tensor)
self.assertEqual(added, tensor)
added = zeros.index_add(0, torch.arange(0, size[0], dtype=idx_dtype, device=device), tensor, alpha=-1)
self.assertEqual(added, -tensor)
@unittest.mock.patch.object(torch._dynamo.config, "suppress_errors", False)
@set_default_dtype(torch.double)
def test_index_add_correctness(self):
def helper(dim, dtype, device, size_result, size_source):
tensor = torch.zeros(size_result, dtype=dtype, device=device)
index = torch.randint(0, size_result[dim], (size_source[dim],),
dtype=torch.long, device=device)
if dtype.is_floating_point or dtype.is_complex:
source = torch.rand(size_source, dtype=dtype, device=device)
elif dtype.is_signed:
source = torch.randint(-2, 5, size_source, dtype=dtype, device=device)
else:
source = torch.randint(0, 5, size_source, dtype=dtype, device=device)
ref_out = tensor.index_add(dim, index, source, alpha=2.) / 2.
ref_out = ref_out.to(dtype=dtype)
out = tensor.index_add(dim, index, source)
if device == 'npu':
self.assertEqual(out, ref_out, atol=1e-2, rtol=1e-2)
else:
self.assertEqual(out, ref_out.to(dtype=dtype), atol=1e-2, rtol=1e-2)
if not device_is_910A:
dtypes_ = all_types_and_complex_and(torch.half, torch.bfloat16)
else:
dtypes_ = all_types_and(torch.half)
for dim in [-1, -2, -3]:
for dtype in dtypes_:
for device in get_all_device_types():
for size in [(2, 512, 256), (5, 256, 256)]:
helper(dim, dtype, device, size, size)
result = torch.zeros(1, 512, 256, dtype=dtype)
source = torch.ones(1, 512, 256, dtype=dtype)
index = torch.ones(257).to(dtype=torch.long)
self.assertRaises(RuntimeError, lambda: result.index_add_(dim, index, source))
index = (torch.ones(256) * 257).to(dtype=torch.long)
self.assertRaises(RuntimeError, lambda: result.index_add_(dim, index, source))
def test_index_add_cornercase(self):
for device in get_all_device_types():
dest = torch.randn((), device=device)
index = torch.tensor([0], device=device)
source = torch.randn(1, 1, 1, device=device)
with self.assertRaisesRegex(
RuntimeError,
r"source tensor shape must match self tensor shape, excluding the specified dimension",
):
dest.index_add(0, index, source)
def test_linspace_logspace(self):
start = 0.0
end = 3.0
for step in [0, 1, 2]:
self.assertFalse(
torch.linspace(
torch.tensor(start, requires_grad=True),
torch.tensor(end, requires_grad=True), step
).requires_grad
)
self.assertFalse(torch.linspace(torch.tensor(start, requires_grad=True), end, step).requires_grad)
self.assertFalse(torch.linspace(start, torch.tensor(end, requires_grad=True), step).requires_grad)
self.assertFalse(
torch.logspace(
torch.tensor(start, requires_grad=True),
torch.tensor(end, requires_grad=True), step
).requires_grad
)
self.assertFalse(torch.logspace(torch.tensor(start, requires_grad=True), end, step).requires_grad)
self.assertFalse(torch.logspace(start, torch.tensor(end, requires_grad=True), step).requires_grad)
def test_unflatten(self):
self.assertEqual(torch.tensor([]).unflatten(0, (0, 1)), torch.empty(0, 1))
self.assertEqual(torch.tensor([1]).unflatten(0, (1, 1)), torch.tensor([[1]]))
self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, (2, 2)), torch.tensor([[1, 2], [3, 4]]))
self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, [2, 2]), torch.tensor([[1, 2], [3, 4]]))
self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, torch.Size([2, 2])), torch.tensor([[1, 2], [3, 4]]))
self.assertEqual(torch.ones(2, 10).unflatten(1, (5, 2)), torch.ones(2, 5, 2))
self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, (-1, 2)),
torch.tensor([[1, 2], [3, 4]]))
self.assertEqual(torch.ones(2, 10).unflatten(1, (5, -1)),
torch.ones(2, 5, 2))
self.assertEqual(torch.ones(2, 10).unflatten(1, (-1,)),
torch.ones(2, 10))
self.assertEqual(torch.ones(2, 3 * 4 * 5 * 6).unflatten(1, (3, 4, -1, 6)),
torch.ones(2, 3, 4, 5, 6))
self.assertEqual(torch.ones(2, 0, 2).unflatten(1, (3, -1, 4, 5)),
torch.ones(2, 3, 0, 4, 5, 2))
with self.assertRaisesRegex(TypeError, r"unflatten\(\): argument 'dim' \(position 1\) must be int, not str"):
torch.tensor([1]).unflatten('A', (1, 1))
with self.assertRaisesRegex(RuntimeError, r"Name 'A' not found in Tensor\[None\]."):
torch.ones(4).unflatten('A', (('A', 2), ('B', 2)))
with self.assertRaisesRegex(RuntimeError, r"sizes must be non-empty"):
torch.tensor([1]).unflatten(0, [])
with self.assertRaisesRegex(RuntimeError, r"Provided sizes \[2, 2\] don't multiply up to the size of dim 0 \(1\)"):
torch.tensor([1]).unflatten(0, [2, 2])
with self.assertRaisesRegex(RuntimeError, r".*Dimension specified as 0 but tensor has no dimensions"):
torch.tensor(1).unflatten(0, [0])
with self.assertRaisesRegex(RuntimeError, r"only one dimension can be inferred"):
torch.randn(5, 10).unflatten(1, (-1, -1))
with self.assertRaisesRegex(RuntimeError,
r"Provided sizes \[-1, 4\] don't multiply up to the size of dim 1 \(10\)"):
torch.randn(5, 10).unflatten(1, (-1, 4))
with self.assertRaisesRegex(RuntimeError,
r"the unspecified dimension size -1 can be any value and is ambiguous"):
torch.randn(2, 0).unflatten(1, (2, -1, 0))
def test_warn_types(self):
test_cases = [
(torch._C._warn, UserWarning, r"Test message for TORCH_WARN"),
(torch._C._warn_deprecation, DeprecationWarning, r"Test message for TORCH_WARN_DEPRECATION"),
]
for fn, warning_type, message in test_cases:
with warnings.catch_warnings(record=True) as w:
warnings.resetwarnings()
warnings.filterwarnings('always', category=warning_type)
fn()
self.assertEqual(len(w), 1, msg=f'{warning_type} not raised')
warning = w[0].message
self.assertTrue(isinstance(warning, warning_type), msg=f'{warning_type} not raised')
self.assertTrue(re.search(
message,
str(warning)))
def test_structseq_repr(self):
a = torch.arange(250).reshape(5, 5, 10)
expected = """
torch.return_types.max(
values=tensor([[ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
[ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
[140, 141, 142, 143, 144, 145, 146, 147, 148, 149],
[190, 191, 192, 193, 194, 195, 196, 197, 198, 199],
[240, 241, 242, 243, 244, 245, 246, 247, 248, 249]]),
indices=tensor([[4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
[4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
[4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
[4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
[4, 4, 4, 4, 4, 4, 4, 4, 4, 4]]))"""
self.assertEqual(repr(a.max(1)), textwrap.dedent(expected).strip())
def test_is_same_size(self):
t1 = torch.empty(3, 4, 9, 10)
t2 = torch.empty(3, 4)
t3 = torch.empty(1, 9, 3, 3)
t4 = torch.empty(3, 4, 9, 10)
self.assertFalse(t1.is_same_size(t2))
self.assertFalse(t1.is_same_size(t3))
self.assertTrue(t1.is_same_size(t4))
nt1 = torch.nested.nested_tensor([torch.ones(2, 4), torch.ones(3, 4), torch.ones(5, 4)])
nt2 = torch.nested.nested_tensor([torch.ones(2, 4), torch.ones(2, 4), torch.ones(2, 4)])
nt3 = torch.nested.nested_tensor([torch.ones(2, 4, 5), torch.ones(2, 6, 5)])
nt4 = torch.nested.nested_tensor([torch.ones(2, 4), torch.ones(3, 4), torch.ones(5, 4)])
self.assertFalse(nt1.is_same_size(nt2))
self.assertFalse(nt1.is_same_size(nt3))
self.assertTrue(nt1.is_same_size(nt4))
with self.assertRaisesRegex(RuntimeError, "Expected both self and other to be nested tensors."):
t1.is_same_size(nt1)
with self.assertRaisesRegex(RuntimeError, "Expected both self and other to be nested tensors."):
nt1.is_same_size(t1)
def test_tensor_set(self):
t1 = torch.tensor([])
t2 = torch.empty(3, 4, 9, 10).uniform_()
t1.set_(t2)
self.assertEqual(t1.storage()._cdata, t2.storage()._cdata)
size = torch.Size([9, 3, 4, 10])
t1.set_(t2.storage(), 0, size)
self.assertEqual(t1.size(), size)
t1.set_(t2.storage(), 0, tuple(size))
self.assertEqual(t1.size(), size)
self.assertEqual(t1.stride(), (120, 40, 10, 1))
stride = (10, 360, 90, 1)
t1.set_(t2.storage(), 0, size, stride)
self.assertEqual(t1.stride(), stride)
t1.set_(t2.storage(), 0, size=size, stride=stride)
self.assertEqual(t1.size(), size)
self.assertEqual(t1.stride(), stride)
t1 = torch.tensor([])
t1.set_(source=t2)
self.assertEqual(t1.storage()._cdata, t2.storage()._cdata)
t1.set_(source=t2.storage())
self.assertEqual(t1.storage()._cdata, t2.storage()._cdata)
t1.set_(source=t2.storage(), storage_offset=0, size=size, stride=stride)
self.assertEqual(t1.size(), size)
self.assertEqual(t1.stride(), stride)
t1 = torch.tensor([True, True], dtype=torch.bool)
t2 = torch.tensor([False, False], dtype=torch.bool)
t1.set_(t2)
self.assertEqual(t1.storage()._cdata, t2.storage()._cdata)
def test_tensor_set_errors(self):
f_cpu = torch.randn((2, 3), dtype=torch.float32)
d_cpu = torch.randn((2, 3), dtype=torch.float64)
storage_offset = 0x41414141
with self.assertRaisesRegex(RuntimeError, "out of bounds for storage of size"):
t = torch.randn(1)
t.set_(t.untyped_storage(), storage_offset, t.size())
t = torch.randn(1)
size = torch.Size([2, 3])
t.set_(t.untyped_storage(), storage_offset, size)
self.assertEqual(t.storage_offset(), storage_offset)
self.assertEqual(t.untyped_storage().nbytes(), (storage_offset + size[0] * size[1]) * 4)
self.assertRaises(RuntimeError, lambda: f_cpu.set_(d_cpu.storage()))
self.assertRaises(RuntimeError,
lambda: f_cpu.set_(d_cpu.storage(), 0, d_cpu.size(), d_cpu.stride()))
self.assertRaises(RuntimeError, lambda: f_cpu.set_(d_cpu))
if torch_npu.npu.is_available():
f_npu = torch.randn((2, 3), dtype=torch.float32, device='npu')
self.assertRaises(RuntimeError, lambda: f_cpu.set_(f_npu.storage()))
self.assertRaises(RuntimeError,
lambda: f_cpu.set_(f_npu.storage(), 0, f_npu.size(), f_npu.stride()))
self.assertRaises(RuntimeError, lambda: f_cpu.set_(f_npu))
self.assertRaises(RuntimeError, lambda: f_npu.set_(f_cpu.storage()))
self.assertRaises(RuntimeError,
lambda: f_npu.set_(f_cpu.storage(), 0, f_cpu.size(), f_cpu.stride()))
self.assertRaises(RuntimeError, lambda: f_npu.set_(f_cpu))
def test_equal(self):
devices = [torch.cpu, torch_npu.npu]
for device in ["cpu", "npu"]:
if device == "npu" and not torch_npu.npu.is_available():
continue
t1 = torch.tensor((3., 4., 9., 10.), device=device)
t2 = t1.contiguous()
t3 = torch.tensor((1., 9., 3., 10.), device=device)
t4 = torch.tensor((3., 4., 9.), device=device)
t5 = torch.tensor([], device=device)
self.assertTrue(t1.equal(t2))
self.assertFalse(t1.equal(t3))
self.assertFalse(t1.equal(t4))
self.assertFalse(t1.equal(t5))
self.assertTrue(torch.equal(t1, t2))
self.assertFalse(torch.equal(t1, t3))
self.assertFalse(torch.equal(t1, t4))
self.assertFalse(torch.equal(t1, t5))
s = torch.tensor(((1, 2, 3, 4), (5, 6, 7, 8)), device=device)
s1 = s[:, 1:3]
s2 = s1.clone()
s3 = torch.tensor(((2, 3), (6, 7)), device=device)
s4 = torch.tensor(((0, 0), (0, 0)), device=device)
self.assertFalse(s1.is_contiguous())
self.assertTrue(s1.equal(s2))
self.assertTrue(s1.equal(s3))
self.assertFalse(s1.equal(s4))
self.assertTrue(torch.equal(s1, s2))
self.assertTrue(torch.equal(s1, s3))
self.assertFalse(torch.equal(s1, s4))
x = torch.tensor((1, 2, 3), dtype=torch.float, device=device)
y = torch.tensor((1, 2, 3), dtype=torch.int, device=device)
z = torch.tensor((1, -1), dtype=torch.int, device=device)
self.assertTrue(torch.equal(x, y))
self.assertFalse(torch.equal(z, x))
neg_0 = torch.tensor((1, 2, 3), dtype=torch.float, device=device)
neg_1 = neg_0._neg_view()
self.assertTrue(neg_1.is_neg())
self.assertEqual(neg_0.data_ptr(), neg_1.data_ptr())
self.assertEqual(neg_0.storage_offset(), neg_1.storage_offset())
self.assertEqual(neg_0.stride(), neg_1.stride())
self.assertEqual(neg_0.size(), neg_1.size())
self.assertFalse(torch.equal(neg_0, neg_1))
if not TEST_WITH_TORCHINDUCTOR:
self.assertTrue(torch.equal(neg_0, neg_1._neg_view()))
conj_0 = torch.tensor([1.0 + 2.0j, 2.0 + 1.0j], device=device)
conj_1 = conj_0.conj()
self.assertTrue(conj_1.is_conj())
self.assertEqual(conj_0.data_ptr(), conj_1.data_ptr())
self.assertEqual(conj_0.storage_offset(), conj_1.storage_offset())
self.assertEqual(conj_0.stride(), conj_1.stride())
self.assertEqual(conj_0.size(), conj_1.size())
self.assertFalse(torch.equal(conj_0, conj_1))
if not TEST_WITH_TORCHINDUCTOR:
self.assertTrue(torch.equal(conj_0, conj_1.conj()))
s_0 = torch.rand((2, 3), dtype=torch.float, device=device)
s_1 = s_0.view(dtype=torch.int32)
self.assertEqual(s_0.data_ptr(), s_1.data_ptr())
self.assertEqual(s_0.storage_offset(), s_1.storage_offset())
self.assertEqual(s_0.stride(), s_1.stride())
self.assertEqual(s_0.size(), s_1.size())
self.assertFalse(torch.equal(s_0, s_1))
t_0 = torch.rand((2, 3), dtype=torch.float, device=device)
t_1 = t_0.t()
self.assertEqual(t_0.data_ptr(), t_1.data_ptr())
self.assertEqual(t_0.storage_offset(), t_1.storage_offset())
self.assertNotEqual(t_0.stride(), t_1.stride())
self.assertNotEqual(t_0.size(), t_1.size())
self.assertFalse(torch.equal(t_0, t_1))
for dtype in floating_and_complex_types():
t = torch.tensor([1., float('nan')], dtype=dtype)
self.assertFalse(torch.equal(t, t))
def test_element_size(self):
byte = torch.ByteStorage().element_size()
char = torch.CharStorage().element_size()
short = torch.ShortStorage().element_size()
int_st = torch.IntStorage().element_size()
long = torch.LongStorage().element_size()
float_st = torch.FloatStorage().element_size()
double = torch.DoubleStorage().element_size()
bool_st = torch.BoolStorage().element_size()
bfloat16 = torch.BFloat16Storage().element_size()
complexfloat = torch.ComplexFloatStorage().element_size()
complexdouble = torch.ComplexDoubleStorage().element_size()
self.assertEqual(byte, torch.ByteTensor().element_size())
self.assertEqual(byte, torch.ByteTensor().itemsize)
self.assertEqual(char, torch.CharTensor().element_size())
self.assertEqual(char, torch.CharTensor().itemsize)
self.assertEqual(short, torch.ShortTensor().element_size())
self.assertEqual(short, torch.ShortTensor().itemsize)
self.assertEqual(int_st, torch.IntTensor().element_size())
self.assertEqual(int_st, torch.IntTensor().itemsize)
self.assertEqual(long, torch.LongTensor().element_size())
self.assertEqual(long, torch.LongTensor().itemsize)
self.assertEqual(float_st, torch.FloatTensor().element_size())
self.assertEqual(float_st, torch.FloatTensor().itemsize)
self.assertEqual(double, torch.DoubleTensor().element_size())
self.assertEqual(double, torch.DoubleTensor().itemsize)
self.assertEqual(bool_st, torch.BoolTensor().element_size())
self.assertEqual(bool_st, torch.BoolTensor().itemsize)
self.assertEqual(bfloat16, torch.tensor([], dtype=torch.bfloat16).element_size())
self.assertEqual(bfloat16, torch.tensor([], dtype=torch.bfloat16).itemsize)
self.assertEqual(complexfloat, torch.tensor([], dtype=torch.complex64).element_size())
self.assertEqual(complexfloat, torch.tensor([], dtype=torch.complex64).itemsize)
self.assertEqual(complexdouble, torch.tensor([], dtype=torch.complex128).element_size())
self.assertEqual(complexdouble, torch.tensor([], dtype=torch.complex128).itemsize)
self.assertGreater(byte, 0)
self.assertGreater(char, 0)
self.assertGreater(short, 0)
self.assertGreater(int_st, 0)
self.assertGreater(long, 0)
self.assertGreater(float_st, 0)
self.assertGreater(double, 0)
self.assertGreater(bool_st, 0)
self.assertGreater(bfloat16, 0)
self.assertGreater(complexfloat, 0)
self.assertGreater(complexdouble, 0)
self.assertEqual(byte, 1)
self.assertEqual(char, 1)
self.assertEqual(bool_st, 1)
self.assertGreaterEqual(short, 2)
self.assertGreaterEqual(int_st, 2)
self.assertGreaterEqual(int_st, short)
self.assertGreaterEqual(long, 4)
self.assertGreaterEqual(long, int_st)
self.assertGreaterEqual(double, float_st)
def test_permute(self):
orig = [1, 2, 3, 4, 5, 6, 7]
perm = torch.randperm(7).tolist()
x = torch.empty(*orig).fill_(0)
new = [i - 1 for i in x.permute(*perm).size()]
self.assertEqual(perm, new)
self.assertEqual(x.size(), orig)
@skipIfTorchDynamo("TorchDynamo fails with unknown reason")
def test_reversed(self):
val = torch.arange(0, 10)
self.assertEqual(reversed(val), torch.arange(9, -1, -1))
val = torch.arange(1, 10).view(3, 3)
self.assertEqual(reversed(val), torch.tensor([[7, 8, 9], [4, 5, 6], [1, 2, 3]]))
val = torch.tensor(42)
self.assertEqual(reversed(val), torch.tensor(42))
def test_contains(self):
x = torch.arange(0, 10)
self.assertEqual(4 in x, True)
self.assertEqual(12 in x, False)
x = torch.arange(1, 10).view(3, 3)
val = torch.arange(1, 4)
self.assertEqual(val in x, True)
val += 10
self.assertEqual(val in x, False)
self.assertRaisesRegex(
RuntimeError,
f"Tensor.__contains__ only supports Tensor or scalar, but you passed in a {str}.",
lambda: "foo" in x)
self.assertRaisesRegex(
RuntimeError,
f"Tensor.__contains__ only supports Tensor or scalar, but you passed in a {type([1, 2])}.",
lambda: [1, 2] in x)
@skipIfTorchDynamo("TorchDynamo fails with unknown reason")
def test_deepcopy_parameter(self):
from copy import deepcopy
linear = torch.nn.Linear(10, 1)
s = linear.state_dict(keep_vars=True)
self.assertEqual(torch.nn.Parameter, type(s['weight']))
self.assertEqual(torch.nn.Parameter, type(s['bias']))
s2 = deepcopy(s)
self.assertEqual(torch.nn.Parameter, type(s2['weight']))
self.assertEqual(torch.nn.Parameter, type(s2['bias']))
def test_pickle(self):
import pickle
a = torch.randn(5, 5)
serialized = pickle.dumps(a)
b = pickle.loads(serialized)
self.assertEqual(a, b)
@skipIfTorchDynamo("TorchDynamo fails with unknown reason")
def test_pickle_parameter(self):
import pickle
a = torch.nn.Parameter(torch.randn(5, 5))
serialized = pickle.dumps(a)
b = pickle.loads(serialized)
self.assertTrue(isinstance(b, torch.nn.Parameter))
self.assertEqual(a.requires_grad, b.requires_grad)
self.assertEqual(a, b)
@skipIfTorchDynamo("TorchDynamo fails with unknown reason")
def test_pickle_parameter_no_requires_grad(self):
import pickle
a = torch.nn.Parameter(torch.randn(5, 5), requires_grad=False)
serialized = pickle.dumps(a)
b = pickle.loads(serialized)
self.assertTrue(isinstance(b, torch.nn.Parameter))
self.assertEqual(a.requires_grad, b.requires_grad)
self.assertEqual(a, b)
def test_pickle_dtype(self):
t = torch.float32
serialized = pickle.dumps(t)
b = pickle.loads(serialized)
self.assertTrue(isinstance(b, torch.dtype))
self.assertEqual(id(b), id(t))
def test_pickle_size(self):
a = torch.rand(10).size()
serialized = pickle.dumps(a)
b = pickle.loads(serialized)
self.assertTrue(isinstance(b, torch.Size))
self.assertEqual(a, b)
def test_pickle_function(self):
a = torch.tanh
serialized = pickle.dumps(a)
b = pickle.loads(serialized)
self.assertEqual(a, b)
def test_generator_cpu(self):
self.assertEqual(torch.default_generator, torch.default_generator)
g1 = torch.Generator()
g2 = torch.Generator()
g1.manual_seed(12345)
g2.manual_seed(12345)
self.assertEqual(g1.initial_seed(), g2.initial_seed())
g1.seed()
g2.seed()
self.assertNotEqual(g1.initial_seed(), g2.initial_seed())
g1 = torch.Generator()
g2_state = g2.get_state()
g2_randn = torch.randn(1, generator=g2)
g1.set_state(g2_state)
g1_randn = torch.randn(1, generator=g1)
self.assertEqual(g1_randn, g2_randn)
default_state = torch.default_generator.get_state()
q = torch.empty(100)
g1_normal = q.normal_()
g2 = torch.Generator()
g2.set_state(default_state)
g2_normal = q.normal_(generator=g2)
self.assertEqual(g1_normal, g2_normal)
def test_invalid_generator_raises(self):
self.assertRaises(RuntimeError, lambda: torch.Generator('opengl'))
def test_pickle_generator(self) -> None:
devices = ['cpu']
if torch.npu.is_available():
devices += ['npu']
for device in devices:
with self.subTest(device=device):
generator = torch.Generator(device=device).manual_seed(12345)
if device != "cpu":
generator.set_offset(100)
torch.randn((100, 100), generator=generator, device=device)
reserialized: torch.Generator = pickle.loads(pickle.dumps(generator))
self.assertEqual(generator.device, reserialized.device)
self.assertEqual(generator.initial_seed(), reserialized.initial_seed())
if device != "cpu":
self.assertEqual(generator.get_offset(), reserialized.get_offset())
torch.testing.assert_close(generator.get_state(), reserialized.get_state())
def _sobol_reference_samples(self, scramble: bool) -> torch.Tensor:
if not scramble:
return torch.tensor(
[
[0., 0.],
[0.5, 0.5],
[0.75, 0.25],
[0.25, 0.75],
[0.375, 0.375],
[0.875, 0.875],
[0.625, 0.125],
[0.125, 0.625],
],
)
else:
return torch.tensor(
[
[0.50860737, 0.29320504],
[0.07116939, 0.89594537],
[0.49354145, 0.11524881],
[0.93097717, 0.70244044],
[0.87266153, 0.23887917],
[0.31021884, 0.57600391],
[0.13687253, 0.42054182],
[0.69931293, 0.77336788],
],
)
def test_sobolengine_bounds(self, scramble: bool = False):
engine = torch.quasirandom.SobolEngine(100, scramble=scramble, seed=123456)
sample = engine.draw(512)
self.assertTrue(torch.all(sample >= 0))
self.assertTrue(torch.all(sample <= 1))
def test_sobolengine_bounds_scrambled(self):
self.test_sobolengine_bounds(scramble=True)
def test_sobolengine_draw(self, scramble: bool = False):
ref_sample = self._sobol_reference_samples(scramble=scramble)
engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456)
sample = engine.draw(n=len(ref_sample))
self.assertEqual(sample, ref_sample)
self.assertEqual(engine.num_generated, len(ref_sample))
def test_sobolengine_draw_scrambled(self):
self.test_sobolengine_draw(scramble=True)
def test_sobolengine_first_point(self):
for dtype in (torch.float, torch.double):
engine = torch.quasirandom.SobolEngine(2, scramble=False)
sample = engine.draw(1, dtype=dtype)
self.assertTrue(torch.all(sample == 0))
self.assertEqual(sample.dtype, dtype)
for dtype in (torch.float, torch.double):
engine = torch.quasirandom.SobolEngine(2, scramble=True, seed=123456)
sample = engine.draw(1, dtype=dtype)
self.assertTrue(torch.all(sample != 0))
self.assertEqual(sample.dtype, dtype)
def test_sobolengine_continuing(self, scramble: bool = False):
ref_sample = self._sobol_reference_samples(scramble=scramble)
engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456)
n_half = len(ref_sample) // 2
_ = engine.draw(n=n_half)
sample = engine.draw(n=n_half)
torch.testing.assert_close(sample, ref_sample[n_half:])
def test_sobolengine_continuing_scrambled(self):
self.test_sobolengine_continuing(scramble=True)
def test_sobolengine_reset(self, scramble: bool = False):
ref_sample = self._sobol_reference_samples(scramble=scramble)
engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456)
_ = engine.draw(n=len(ref_sample) // 2)
engine.reset()
self.assertEqual(engine.num_generated, 0)
sample = engine.draw(n=len(ref_sample))
torch.testing.assert_close(sample, ref_sample)
def test_sobolengine_reset_scrambled(self):
self.test_sobolengine_reset(scramble=True)
def test_sobolengine_fast_forward(self, scramble: bool = False):
ref_sample = self._sobol_reference_samples(scramble=scramble)
engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456)
engine.fast_forward(4)
sample = engine.draw(n=4)
torch.testing.assert_close(sample, ref_sample[4:])
engine.reset()
even_draws = []
for i in range(8):
if i % 2 == 0:
even_draws.append(engine.draw())
else:
engine.fast_forward(1)
torch.testing.assert_close(
ref_sample[[i for i in range(8) if i % 2 == 0]],
torch.from_numpy(np.concatenate(even_draws)),
)
def test_sobolengine_fast_forward_scrambled(self):
self.test_sobolengine_fast_forward(scramble=True)
def test_sobolengine_default_dtype(self):
engine = torch.quasirandom.SobolEngine(dimension=3, scramble=True, seed=123456)
self.assertEqual(engine.draw(n=5).dtype, torch.float32)
with set_default_dtype(torch.float64):
engine = torch.quasirandom.SobolEngine(dimension=3, scramble=True, seed=123456)
self.assertEqual(engine.draw(n=5).dtype, torch.float64)
self.assertEqual(engine.draw(n=5, dtype=torch.float32).dtype, torch.float32)
engine = torch.quasirandom.SobolEngine(dimension=3, scramble=True, seed=123456)
self.assertEqual(engine.draw(n=5, dtype=torch.float32).dtype, torch.float32)
@skipIfTorchDynamo("np.float64 restored as float32 after graph break.")
def test_sobolengine_distribution(self, scramble=False):
d = 50
engine = torch.quasirandom.SobolEngine(d, scramble=scramble, seed=123456)
sample = engine.draw(1024)
torch.testing.assert_close(
torch.mean(sample, dim=0), torch.full((d,), 0.5), atol=2, rtol=2
)
torch.testing.assert_close(
np.percentile(sample, 25, axis=0).astype(np.float64), np.repeat(0.25, d), atol=2, rtol=2
)
torch.testing.assert_close(
np.percentile(sample, 75, axis=0).astype(np.float64), np.repeat(0.75, d), atol=2, rtol=2
)
@skipIfTorchDynamo("np.float64 restored as float32 after graph break.")
def test_sobolengine_distribution_scrambled(self):
self.test_sobolengine_distribution(scramble=True)
def test_sobolengine_draw_base2(self, scramble=False):
ref_sample = self._sobol_reference_samples(scramble=scramble)
engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456)
sample = engine.draw_base2(2)
self.assertEqual(ref_sample[:4], sample)
sample = engine.draw_base2(2)
self.assertEqual(ref_sample[4:8], sample)
def test_sobolengine_draw_base2_scrambled(self):
self.test_sobolengine_draw_base2(scramble=True)
def test_sobolengine_raise(self):
maxdim = torch.quasirandom.SobolEngine.MAXDIM
with self.assertRaises(ValueError):
torch.quasirandom.SobolEngine(maxdim + 1)
def test_sobolengine_high_dim(self):
engine = torch.quasirandom.SobolEngine(1111, scramble=False, seed=123456)
samples1 = engine.draw()
vals1, counts1 = torch.unique(samples1, return_counts=True)
samples2 = engine.draw()
vals2, counts2 = torch.unique(samples2, return_counts=True)
self.assertEqual(vals1.item(), 0.0)
self.assertEqual(counts1.item(), 1111)
self.assertEqual(vals2.item(), 0.5)
self.assertEqual(counts1.item(), 1111)
def test_parsing_int64(self):
x = torch.cumsum(torch.ones(5, 5), 0)
self.assertEqual(x, torch.cumsum(torch.ones(5, 5), torch.tensor(0)))
self.assertRaises(TypeError, lambda: torch.cumsum(torch.ones(5, 5), torch.tensor(0.)))
def test_parsing_double(self):
x = torch.randn(2, 3)
torch.isclose(x, x, 1, 1)
self.assertTrue(torch.isclose(x, x, 1, 1).all())
self.assertTrue(torch.isclose(x, x, 1.5, 1.).all())
self.assertTrue(torch.isclose(x, x, torch.tensor(1), torch.tensor(1)).all())
self.assertTrue(torch.isclose(x, x, torch.tensor(1.5), torch.tensor(1.)).all())
self.assertRaises(TypeError,
lambda: torch.isclose(x, x, torch.tensor(1.5), torch.tensor(1., requires_grad=True)).all())
def test_parsing_intlist(self):
self.assertEqual(torch.Size([3, 4]), torch.ones((torch.tensor(3), torch.tensor(4))).shape)
self.assertEqual(torch.Size([3, 4]), torch.ones(torch.tensor(3), torch.tensor(4)).shape)
self.assertEqual(torch.Size([3, 4]), torch.ones((np.array(3), np.int64(4))).shape)
self.assertEqual(torch.Size([3, 4]), torch.ones(np.array(3), np.int64(4)).shape)
self.assertEqual(torch.Size([3, 4]), torch.ones((np.int64(3), np.array(4))).shape)
self.assertEqual(torch.Size([3, 4]), torch.ones(np.int64(3), np.array(4)).shape)
self.assertRaises(TypeError, lambda: torch.ones((torch.tensor(3.), torch.tensor(4))))
self.assertRaises(TypeError, lambda: torch.ones((3., torch.tensor(4))))
self.assertRaises(TypeError, lambda: torch.ones((np.array(3.), torch.tensor(4))))
self.assertRaises(TypeError, lambda: torch.ones(torch.tensor(3, 3)))
self.assertRaises(TypeError, lambda: torch.ones(torch.tensor(3, 3)))
self.assertRaises(TypeError, lambda: torch.ones(np.array(3, 3)))
self.assertRaises(TypeError, lambda: torch.ones(np.array(3, 3)))
self.assertRaisesRegex(TypeError,
"received an invalid combination of arguments",
lambda: torch.LongTensor((6, 0), 1, 1, 0))
self.assertRaisesRegex(TypeError,
"missing 1 required positional arguments",
lambda: torch.tensor().new_zeros((5, 5), 0))
def test_from_buffer(self):
a = bytearray([1, 2, 3, 4])
self.assertEqual(torch.ByteStorage.from_buffer(a).tolist(), [1, 2, 3, 4])
shorts = torch.ShortStorage.from_buffer(a, 'big')
self.assertEqual(shorts.size(), 2)
self.assertEqual(shorts.tolist(), [258, 772])
ints = torch.IntStorage.from_buffer(a, 'little')
self.assertEqual(ints.size(), 1)
self.assertEqual(ints[0], 67305985)
f = bytearray([0x40, 0x10, 0x00, 0x00])
floats = torch.FloatStorage.from_buffer(f, 'big')
self.assertEqual(floats.size(), 1)
self.assertEqual(floats[0], 2.25)
f = bytearray([0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x40])
bools = torch.BoolStorage.from_buffer(f, 'big')
self.assertEqual(bools.size(), 8)
self.assertEqual(bools.tolist(), [False, True, True, True, True, True, True, True])
self.assertEqual(bools.type(), 'torch.BoolStorage')
self.assertTrue(isinstance(bools, torch.BoolStorage))
f = bytearray(b'\x80\x02\x8a\nl\xfc\x9cF\xf9 j\xa8P\x19.\x80\x02M\xe9')
bools = torch.BoolStorage.from_buffer(f, 'big')
self.assertEqual(bools.size(), 19)
f = bytearray(b'\0x4A')
bools = torch.BoolStorage.from_buffer(f, 'big')
self.assertEqual(bools.size(), 4)
self.assertEqual(bools.tolist(), [False, True, True, True])
bytes_ = torch.ByteStorage.from_buffer(a)
self.assertEqual(bytes_.nbytes(), 4)
self.assertEqual(bytes_.tolist(), [1, 2, 3, 4])
self.assertTrue(isinstance(bytes_, torch.ByteStorage))
def test_storage_error(self):
quantized_storages = [
torch.QInt32Storage,
torch.QInt8Storage,
torch.QUInt2x4Storage,
torch.QUInt4x2Storage,
torch.QUInt8Storage,
]
with self.assertRaisesRegex(RuntimeError, r"Only child classes of _LegacyStorage can be instantiated"):
torch.storage._LegacyStorage()
for storage_class in torch._storage_classes:
if storage_class in [torch.UntypedStorage, torch.TypedStorage]:
continue
device = 'npu' if storage_class.__module__ == 'torch_npu.npu' else 'cpu'
dtype = storage_class.dtype
if device == 'npu' and not torch_npu.npu.is_available():
continue
with self.assertRaisesRegex(RuntimeError, r"'device' cannot be specified"):
storage_class(device='cpu')
with self.assertRaisesRegex(RuntimeError, r"'dtype' cannot be specified"):
storage_class(dtype=torch.float)
with self.assertRaisesRegex(TypeError, r"got an unexpected keyword"):
storage_class(sdlkjf=torch.float)
with self.assertRaisesRegex(RuntimeError, r"Too many positional arguments"):
storage_class(0, 0)
with self.assertRaisesRegex(TypeError, r"invalid data type"):
storage_class('string')
with self.assertRaisesRegex(TypeError, r"Argument type not recognized"):
storage_class(torch.tensor([]))
s = storage_class()
with self.assertRaisesRegex(RuntimeError, r"No positional arguments"):
storage_class(0, wrap_storage=s.untyped())
with self.assertRaisesRegex(TypeError, r"must be UntypedStorage"):
storage_class(wrap_storage=s)
if torch_npu.npu.is_available():
if storage_class in quantized_storages:
with self.assertRaisesRegex(RuntimeError, r"Cannot create NPU storage with quantized dtype"):
s.npu()
else:
if s.is_npu:
s_other_device = s.cpu()
else:
s_other_device = s.npu()
with self.assertRaisesRegex(RuntimeError, r"Device of 'wrap_storage' must be"):
storage_class(wrap_storage=s_other_device.untyped())
with self.assertRaisesRegex(RuntimeError, r"No positional arguments"):
torch.TypedStorage(0, wrap_storage=s.untyped(), dtype=dtype)
with self.assertRaisesRegex(RuntimeError, r"Argument 'dtype' must be specified"):
torch.TypedStorage(wrap_storage=s.untyped())
with self.assertRaisesRegex(TypeError, r"Argument 'dtype' must be torch.dtype"):
torch.TypedStorage(wrap_storage=s.untyped(), dtype=0)
with self.assertRaisesRegex(RuntimeError, r"Argument 'device' should not be specified"):
torch.TypedStorage(wrap_storage=s.untyped(), dtype=dtype, device=device)
with self.assertRaisesRegex(TypeError, r"Argument 'wrap_storage' must be UntypedStorage"):
torch.TypedStorage(wrap_storage=s, dtype=dtype)
with self.assertRaisesRegex(RuntimeError, r"Storage device not recognized"):
torch.TypedStorage(dtype=dtype, device='xla')
if torch_npu.npu.is_available():
if storage_class in quantized_storages:
with self.assertRaisesRegex(RuntimeError, r"Cannot create NPU storage with quantized dtype"):
torch.TypedStorage(dtype=dtype, device='npu')
with self.assertRaisesRegex(TypeError, r"Argument type not recognized"):
torch.TypedStorage(torch.tensor([]), dtype=dtype, device=device)
with self.assertRaisesRegex(RuntimeError, r"Too many positional arguments"):
torch.TypedStorage(0, 0, dtype=dtype, device=device)
if isinstance(s, torch.TypedStorage):
s_other = torch.TypedStorage([1, 2, 3, 4], device=device, dtype=dtype)
with self.assertRaisesRegex(RuntimeError, r'cannot set item'):
s.fill_(s_other)
def test_storage_error_no_attribute(self):
storage_classes = [
torch_npu.npu.ByteStorage,
torch_npu.npu.FloatStorage,
]
for storage_class in storage_classes:
with self.assertRaisesRegex(RuntimeError, r'Not available for NPU storage'):
storage_class.from_buffer()
with self.assertRaisesRegex(RuntimeError, r'Not available for NPU storage'):
storage_class._new_with_weak_ptr()
with self.assertRaisesRegex(RuntimeError, r'Not available for NPU storage'):
storage_class._new_shared_filename(0, 0, 0)
def test_storage_casts(self):
storage = torch.IntStorage([-1, 0, 1, 2, 3, 4])
self.assertEqual(storage.size(), 6)
self.assertEqual(storage.tolist(), [-1, 0, 1, 2, 3, 4])
self.assertEqual(storage.type(), 'torch.IntStorage')
self.assertIs(storage.dtype, torch.int32)
floatStorage = storage.float()
self.assertEqual(floatStorage.size(), 6)
self.assertEqual(floatStorage.tolist(), [-1, 0, 1, 2, 3, 4])
self.assertEqual(floatStorage.type(), 'torch.FloatStorage')
self.assertEqual(floatStorage.int().tolist(), [-1, 0, 1, 2, 3, 4])
self.assertIs(floatStorage.dtype, torch.float32)
halfStorage = storage.half()
self.assertEqual(halfStorage.size(), 6)
self.assertEqual(halfStorage.tolist(), [-1, 0, 1, 2, 3, 4])
self.assertEqual(halfStorage.type(), 'torch.HalfStorage')
self.assertEqual(halfStorage.int().tolist(), [-1, 0, 1, 2, 3, 4])
self.assertIs(halfStorage.dtype, torch.float16)
bfloat16Storage = storage.bfloat16()
self.assertEqual(bfloat16Storage.size(), 6)
self.assertEqual(bfloat16Storage.tolist(), [-1, 0, 1, 2, 3, 4])
self.assertEqual(bfloat16Storage.type(), 'torch.BFloat16Storage')
self.assertEqual(bfloat16Storage.int().tolist(), [-1, 0, 1, 2, 3, 4])
self.assertIs(bfloat16Storage.dtype, torch.bfloat16)
longStorage = storage.long()
self.assertEqual(longStorage.size(), 6)
self.assertEqual(longStorage.tolist(), [-1, 0, 1, 2, 3, 4])
self.assertEqual(longStorage.type(), 'torch.LongStorage')
self.assertEqual(longStorage.int().tolist(), [-1, 0, 1, 2, 3, 4])
self.assertIs(longStorage.dtype, torch.int64)
shortStorage = storage.short()
self.assertEqual(shortStorage.size(), 6)
self.assertEqual(shortStorage.tolist(), [-1, 0, 1, 2, 3, 4])
self.assertEqual(shortStorage.type(), 'torch.ShortStorage')
self.assertEqual(shortStorage.int().tolist(), [-1, 0, 1, 2, 3, 4])
self.assertIs(shortStorage.dtype, torch.int16)
doubleStorage = storage.double()
self.assertEqual(doubleStorage.size(), 6)
self.assertEqual(doubleStorage.tolist(), [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0])
self.assertEqual(doubleStorage.type(), 'torch.DoubleStorage')
self.assertEqual(doubleStorage.int().tolist(), [-1, 0, 1, 2, 3, 4])
self.assertIs(doubleStorage.dtype, torch.float64)
charStorage = storage.char()
self.assertEqual(charStorage.size(), 6)
self.assertEqual(charStorage.tolist(), [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0])
self.assertEqual(charStorage.type(), 'torch.CharStorage')
self.assertEqual(charStorage.int().tolist(), [-1, 0, 1, 2, 3, 4])
self.assertIs(charStorage.dtype, torch.int8)
byteStorage = storage.byte()
self.assertEqual(byteStorage.size(), 6)
self.assertEqual(byteStorage.tolist(), [255, 0, 1, 2, 3, 4])
self.assertEqual(byteStorage.type(), 'torch.ByteStorage')
self.assertEqual(byteStorage.int().tolist(), [255, 0, 1, 2, 3, 4])
self.assertIs(byteStorage.dtype, torch.uint8)
boolStorage = storage.bool()
self.assertEqual(boolStorage.size(), 6)
self.assertEqual(boolStorage.tolist(), [True, False, True, True, True, True])
self.assertEqual(boolStorage.type(), 'torch.BoolStorage')
self.assertEqual(boolStorage.int().tolist(), [1, 0, 1, 1, 1, 1])
self.assertIs(boolStorage.dtype, torch.bool)
complexfloat_storage = torch.ComplexFloatStorage([-1, 0, 1 + 2j, 2.5j, 3.5, 4 - 2j])
self.assertEqual(complexfloat_storage.size(), 6)
self.assertEqual(complexfloat_storage.tolist(), [-1, 0, 1 + 2j, 2.5j, 3.5, 4 - 2j])
self.assertEqual(complexfloat_storage.type(), 'torch.ComplexFloatStorage')
self.assertIs(complexfloat_storage.dtype, torch.complex64)
complexdouble_storage = complexfloat_storage.complex_double()
self.assertEqual(complexdouble_storage.size(), 6)
self.assertEqual(complexdouble_storage.tolist(), [-1, 0, 1 + 2j, 2.5j, 3.5, 4 - 2j])
self.assertEqual(complexdouble_storage.type(), 'torch.ComplexDoubleStorage')
self.assertIs(complexdouble_storage.dtype, torch.complex128)
def test_storage_byteswap(self):
input_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
swapped_8bytes = [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]
swapped_4bytes = [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]
swapped_2bytes = [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]
swapped_1byte = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
storage = torch.storage.TypedStorage(input_, dtype=torch.uint8)._untyped_storage
storage_f64 = storage.__copy__()
storage_f64.byteswap(torch.float64)
self.assertEqual(storage_f64.tolist(), swapped_8bytes)
storage_f32 = storage.__copy__()
storage_f32.byteswap(torch.float32)
self.assertEqual(storage_f32.tolist(), swapped_4bytes)
storage_f16 = storage.__copy__()
storage_f16.byteswap(torch.float16)
self.assertEqual(storage_f16.tolist(), swapped_2bytes)
storage_bf16 = storage.__copy__()
storage_bf16.byteswap(torch.bfloat16)
self.assertEqual(storage_bf16.tolist(), swapped_2bytes)
storage_i64 = storage.__copy__()
storage_i64.byteswap(torch.int64)
self.assertEqual(storage_i64.tolist(), swapped_8bytes)
storage_i32 = storage.__copy__()
storage_i32.byteswap(torch.int32)
self.assertEqual(storage_i32.tolist(), swapped_4bytes)
storage_i16 = storage.__copy__()
storage_i16.byteswap(torch.int16)
self.assertEqual(storage_i16.tolist(), swapped_2bytes)
storage_i8 = storage.__copy__()
storage_i8.byteswap(torch.int8)
self.assertEqual(storage_i8.tolist(), swapped_1byte)
storage_ui8 = storage.__copy__()
storage_ui8.byteswap(torch.uint8)
self.assertEqual(storage_ui8.tolist(), swapped_1byte)
storage_bool = storage.__copy__()
storage_bool.byteswap(torch.bool)
self.assertEqual(storage_bool.tolist(), swapped_1byte)
storage_c128 = storage.__copy__()
storage_c128.byteswap(torch.complex128)
self.assertEqual(storage_c128.tolist(), swapped_8bytes)
storage_c64 = storage.__copy__()
storage_c64.byteswap(torch.complex64)
self.assertEqual(storage_c64.tolist(), swapped_4bytes)
def test_typed_storage_internal_no_warning(self):
s0 = torch.FloatStorage(10)
s0_untyped = s0.untyped()
t0 = torch.randn(10)
funcs = [
lambda: torch.FloatStorage(_internal=True),
lambda: torch.TypedStorage(
dtype=torch.float,
device='cpu',
_internal=True),
lambda: torch.TypedStorage(
wrap_storage=s0_untyped,
dtype=s0.dtype,
_internal=True),
lambda: torch.FloatStorage._dtype,
lambda: s0._resize_(20),
lambda: s0._size(),
lambda: s0._untyped_storage,
lambda: s0._is_shared(),
lambda: s0._share_memory_(),
lambda: s0._pickle_storage_type(),
lambda: s0._setitem(slice(0, s0._size()), 1),
lambda: s0._element_size(),
lambda: s0._deepcopy({}),
lambda: s0._data_ptr(),
lambda: s0._nbytes(),
lambda: t0._typed_storage(),
]
if torch_npu.npu.is_available():
s1 = torch_npu.npu.FloatStorage(10)
s1_untyped = s1.untyped()
t1 = torch.randn(10, device='npu')
funcs += [
lambda: torch_npu.npu.FloatStorage(_internal=True),
lambda: torch.TypedStorage(
dtype=torch.float,
device='npu',
_internal=True),
lambda: torch.TypedStorage(
wrap_storage=s1_untyped,
dtype=s1.dtype,
_internal=True),
lambda: torch_npu.npu.FloatStorage._dtype,
lambda: s1._resize_(20),
lambda: s1._size(),
lambda: s1._untyped_storage,
lambda: s1._is_shared(),
lambda: s1._share_memory_(),
lambda: s1._pickle_storage_type(),
lambda: s1._setitem(slice(0, s1._size()), 1),
lambda: s1._element_size(),
lambda: s1._deepcopy({}),
lambda: s1._data_ptr(),
lambda: s1._nbytes(),
lambda: t1._typed_storage(),
]
for f in funcs:
with warnings.catch_warnings():
warnings.filterwarnings('error', "TypedStorage is deprecated")
f()
@skipIfTorchInductor("FIXME")
def test_typed_storage_deprecation_warning(self):
s0 = torch.FloatStorage(10)
funcs = [
lambda: torch.FloatStorage(),
lambda: torch.FloatStorage.dtype,
lambda: s0.fill_(0),
lambda: s0.is_npu,
lambda: s0.untyped(),
lambda: len(s0),
lambda: s0[0],
]
if torch_npu.npu.is_available():
s1 = torch_npu.npu.FloatStorage(10)
funcs += [
lambda: torch_npu.npu.FloatStorage(),
lambda: torch_npu.npu.FloatStorage.dtype,
lambda: s1.fill_(0),
lambda: s1.is_npu,
lambda: s1.untyped(),
lambda: len(s1),
lambda: s1[0],
]
for f in funcs:
with AlwaysWarnTypedStorageRemoval(True):
with warnings.catch_warnings(record=True) as w:
warnings.resetwarnings()
f()
self.assertEqual(len(w), 1, msg=str([str(a) for a in w]))
warning = w[0].message
self.assertTrue(warning, DeprecationWarning)
self.assertTrue(re.search(
'^TypedStorage is deprecated',
str(warning)))
torch.storage._reset_warn_typed_storage_removal()
with warnings.catch_warnings(record=True) as w:
warnings.resetwarnings()
torch.FloatStorage()
torch.randn(10).storage()
self.assertEqual(len(w), 1, msg=str([str(a) for a in w]))
warning = w[0].message
self.assertTrue(re.search(
'^TypedStorage is deprecated',
str(warning)))
with open(w[0].filename, encoding="utf-8") as f:
code_line = f.readlines()[w[0].lineno - 1]
self.assertTrue(re.search(re.escape('torch.FloatStorage()'), code_line))
with warnings.catch_warnings(record=True) as w:
warnings.resetwarnings()
torch.FloatStorage()
torch.randn(10).storage()
self.assertEqual(len(w), 0, msg=str([str(a) for a in w]))
def test_from_file(self):
def assert_with_filename(filename):
size = 10000
s1 = torch.FloatStorage.from_file(filename, True, size)
t1 = torch.FloatTensor(s1).copy_(torch.randn(size))
self.assertEqual(s1.data_ptr(), torch.FloatTensor(s1).data_ptr())
s2 = torch.FloatStorage.from_file(filename, True, size)
t2 = torch.FloatTensor(s2)
self.assertEqual(t1, t2, atol=0, rtol=0)
rnum = random.uniform(-1, 1)
t1.fill_(rnum)
self.assertEqual(t1, t2, atol=0, rtol=0)
rnum = random.uniform(-1, 1)
t2.fill_(rnum)
self.assertEqual(t1, t2, atol=0, rtol=0)
del s1, t1, s2, t2
with TemporaryFileName() as fname:
assert_with_filename(fname)
if IS_FILESYSTEM_UTF8_ENCODING:
with TemporaryDirectoryName(suffix='\u4e2d\u6587') as dname, TemporaryFileName(dir=dname) as fname:
assert_with_filename(fname)
def test_torch_from_file(self):
def assert_with_filename(filename):
size = 10000
s1 = torch.from_file(filename, True, size, dtype=torch.float)
t1 = torch.FloatTensor(s1).copy_(torch.randn(size))
s2 = torch.from_file(filename, True, size, dtype=torch.float)
t2 = torch.FloatTensor(s2)
self.assertEqual(t1, t2, atol=0, rtol=0)
rnum = random.uniform(-1, 1)
t1.fill_(rnum)
self.assertEqual(t1, t2, atol=0, rtol=0)
rnum = random.uniform(-1, 1)
t2.fill_(rnum)
self.assertEqual(t1, t2, atol=0, rtol=0)
del s1, t1, s2, t2
with TemporaryFileName() as fname:
assert_with_filename(fname)
if IS_FILESYSTEM_UTF8_ENCODING:
with TemporaryDirectoryName(suffix='\u4e2d\u6587') as dname, TemporaryFileName(dir=dname) as fname:
assert_with_filename(fname)
def test_print(self):
default_type = torch.tensor([]).type()
for t in torch._tensor_classes:
if t == torch.HalfTensor:
continue
if t.is_sparse:
continue
if 'npu' not in t.__module__:
continue
if 'npu' in t.__module__ and t.is_npu and not torch_npu.npu.is_available():
continue
obj = t(100, 100).fill_(1)
obj.__repr__()
str(obj)
obj = torch.rand(100, 100, device='cpu').half()
obj.__repr__()
str(obj)
for t in torch._storage_classes:
if t == torch.BFloat16Storage:
continue
if t.is_npu and not torch_npu.npu.is_available():
continue
if t == torch.BoolStorage or t == torch_npu.npu.BoolStorage:
obj = t(100).fill_(True)
else:
obj = t(100).fill_(1)
obj.__repr__()
str(obj)
x = torch.tensor([2.3 + 4j, 7 + 6j])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([2.3000+4.j, 7.0000+6.j])''')
x = torch.tensor([1.25 + 4j, -7. + 6j], dtype=torch.chalf)
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([ 1.2500+4.j, -7.0000+6.j], dtype=torch.complex32)''')
x = torch.tensor([1e28 + 2j, -1e-28j])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([1.0000e+28+2.0000e+00j, -0.0000e+00-1.0000e-28j])''')
x = torch.tensor(2341234123412341)
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor(2341234123412341)''')
x = torch.tensor([1e28, 1e-28])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([1.0000e+28, 1.0000e-28])''')
x = torch.tensor([1e2, 1e-2])
torch.set_printoptions(sci_mode=True)
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([1.0000e+02, 1.0000e-02])''')
torch.set_printoptions(sci_mode=False)
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([ 100.0000, 0.0100])''')
torch.set_printoptions(sci_mode=None)
x = torch.tensor([1, 2])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([1, 2])''')
x = torch.tensor([1, -2])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([ 1, -2])''')
x = torch.tensor([4, inf, 1.5, -inf, 0, nan, 1])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([4.0000, inf, 1.5000, -inf, 0.0000, nan, 1.0000])''')
y = torch.tensor([4, inf, complex(1.5, inf), complex(-inf, 4), 0, complex(nan, inf), complex(3, nan)])
self.assertEqual(y.__repr__(), str(y))
expected_str = '''\
tensor([4.0000+0.j, inf+0.j, 1.5000+infj, -inf+4.j, 0.0000+0.j, nan+infj,
3.0000+nanj])'''
self.assertExpectedInline(str(y), expected_str)
with set_default_dtype(torch.float):
x = torch.tensor([1e-324, 1e-323, 1e-322, 1e307, 1e308, 1e309], dtype=torch.float64)
self.assertEqual(x.__repr__(), str(x))
expected_str = '''\
tensor([ 0.0000e+00, 9.8813e-324, 9.8813e-323, 1.0000e+307, 1.0000e+308,
inf], dtype=torch.float64)'''
self.assertExpectedInline(str(x), expected_str)
with set_default_dtype(torch.float64):
self.assertEqual(x.__repr__(), str(x))
expected_str = '''\
tensor([ 0.0000e+00, 9.8813e-324, 9.8813e-323, 1.0000e+307, 1.0000e+308,
inf])'''
self.assertExpectedInline(str(x), expected_str)
x = torch.zeros(10000)
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([0., 0., 0., ..., 0., 0., 0.])''')
x = torch.rand(1, 20, 5, 30)
summary = torch._tensor_str.get_summarized_data(x)
self.assertEqual(summary.shape, (1, 6, 5, 6))
first_and_last = [0, 1, 2, -3, -2, -1]
self.assertEqual(summary, x[:, first_and_last][..., first_and_last])
if torch_npu.npu.is_available():
x = torch.tensor([123], device='npu:0')
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([123], device='npu:0')''')
torch.set_default_tensor_type(torch_npu.npu.FloatTensor)
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([123])''')
if torch_npu.npu.device_count() >= 2:
with torch_npu.npu.device(1):
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([123], device='npu:0')''')
y = torch.tensor([123], device='cpu')
self.assertEqual(y.__repr__(), str(y))
self.assertExpectedInline(str(y), '''tensor([123], device='cpu')''')
torch.set_default_tensor_type(default_type)
x = torch.tensor([123.], requires_grad=True)
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([123.], requires_grad=True)''')
x = torch.ones(100, 2, 2, 10)
y = x.as_strided(size=(100, 2, 10), stride=(2 * 2 * 10, 2 * 10, 1))
self.assertEqual(str(y), y.__repr__())
expected_str = '''\
tensor([[[1., 1., 1., ..., 1., 1., 1.],
[1., 1., 1., ..., 1., 1., 1.]],
[[1., 1., 1., ..., 1., 1., 1.],
[1., 1., 1., ..., 1., 1., 1.]],
[[1., 1., 1., ..., 1., 1., 1.],
[1., 1., 1., ..., 1., 1., 1.]],
...,
[[1., 1., 1., ..., 1., 1., 1.],
[1., 1., 1., ..., 1., 1., 1.]],
[[1., 1., 1., ..., 1., 1., 1.],
[1., 1., 1., ..., 1., 1., 1.]],
[[1., 1., 1., ..., 1., 1., 1.],
[1., 1., 1., ..., 1., 1., 1.]]])\
'''
self.assertExpectedInline(str(y), expected_str)
x = torch.ones(100, 2, 2, 10) * (1 + 1j)
y = x.as_strided(size=(100, 2, 10), stride=(2 * 2 * 10, 2 * 10, 1))
self.assertEqual(str(y), y.__repr__())
expected_str = '''\
tensor([[[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j],
[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]],
[[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j],
[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]],
[[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j],
[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]],
...,
[[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j],
[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]],
[[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j],
[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]],
[[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j],
[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]]])\
'''
self.assertExpectedInline(str(y), expected_str)
x = torch.tensor(0.00002)
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor(2.0000e-05)''')
x = torch.tensor([True])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([True])''')
x = torch.tensor(True)
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor(True)''')
x = torch.tensor([0.00002])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([2.0000e-05])''')
x = torch.tensor([0.00002]) * (1 + 1j)
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([2.0000e-05+2.0000e-05j])''')
x = torch.tensor([123456789.])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([1.2346e+08])''')
x = torch.tensor([0.01, 11])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([1.0000e-02, 1.1000e+01])''')
x = torch.tensor([1, 1010])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([ 1, 1010])''')
x = torch.tensor([1000000000])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([1000000000])''')
x = torch.tensor([1., 1000.])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([ 1., 1000.])''')
x = torch.tensor([1., 1010.])
self.assertEqual(x.__repr__(), str(x))
self.assertExpectedInline(str(x), '''tensor([1.0000e+00, 1.0100e+03])''')
def test_sizeof(self) -> None:
sizeof_empty = torch.randn(0).storage().__sizeof__()
sizeof_10 = torch.randn(10).storage().__sizeof__()
sizeof_100 = torch.randn(100).storage().__sizeof__()
self.assertEqual((sizeof_100 - sizeof_empty) // (sizeof_10 - sizeof_empty), 10)
self.assertEqual((sizeof_100 - sizeof_empty) % (sizeof_10 - sizeof_empty), 0)
sizeof_empty = torch.randn(0).to(torch.uint8).storage().__sizeof__()
sizeof_10 = torch.randn(10).to(torch.uint8).storage().__sizeof__()
sizeof_100 = torch.randn(100).to(torch.uint8).storage().__sizeof__()
self.assertEqual((sizeof_100 - sizeof_empty) // (sizeof_10 - sizeof_empty), 10)
self.assertEqual((sizeof_100 - sizeof_empty) % (sizeof_10 - sizeof_empty), 0)
@skipIfTorchDynamo("Not a suitable test for TorchDynamo")
def test_resizable(self) -> None:
x = torch.randn(5)
self.assertTrue(x.storage().resizable())
x.numpy()
self.assertFalse(x.storage().resizable())
def test_iter(self) -> None:
x = torch.randn(5, 5)
for i, sub in enumerate(x):
self.assertEqual(sub, x[i])
x = torch.tensor([])
self.assertEqual(list(x), [])
def test_new(self) -> None:
x = torch.autograd.Variable(torch.tensor([]))
y = torch.autograd.Variable(torch.randn(4, 4))
z = torch.autograd.Variable(torch.IntTensor([1, 2, 3]))
self.assertEqual(x.new().shape, [0])
self.assertEqual(x.new(), x)
self.assertEqual(x.new(1, 2).shape, [1, 2])
self.assertEqual(x.new(torch.Size([3, 4])).shape, [3, 4])
self.assertEqual(x.new([3, 4]).shape, [2])
self.assertEqual(x.new([3, 4]).tolist(), [3, 4])
self.assertEqual(x.new((3, 4)).tolist(), [3, 4])
self.assertEqual(x.new([np.int32(3), np.float64(4)]).tolist(), [3, 4])
self.assertEqual(x.new(np.array((3, 4))).tolist(), [3, 4])
self.assertEqual(x.new([z[2], z[0] + 3]).tolist(), [3, 4])
self.assertEqual(x.new(size=(3, 4)).shape, [3, 4])
self.assertEqual(x.new(()).shape, [0])
self.assertEqual(x.new(y.storage()).data_ptr(), y.data_ptr())
self.assertEqual(x.new(y).data_ptr(), y.data_ptr())
self.assertIsNot(x.new(y), y)
self.assertRaises(TypeError, lambda: x.new(z))
self.assertRaises(RuntimeError, lambda: x.new(z.storage()))
@unittest.skipIf(PYTORCH_CUDA_MEMCHECK, "is_pinned uses failure to detect pointer property")
def test_pin_memory(self):
x = torch.randn(3, 5)
self.assertFalse(x.is_pinned())
if torch.npu.is_available():
pinned = x.pin_memory()
self.assertTrue(pinned.is_pinned())
self.assertEqual(pinned, x)
self.assertNotEqual(pinned.data_ptr(), x.data_ptr())
self.assertIs(pinned, pinned.pin_memory())
self.assertEqual(pinned.data_ptr(), pinned.pin_memory().data_ptr())
def test_error_msg_type_translation(self):
with self.assertRaisesRegex(
RuntimeError,
'(?=.*Double)(?=.*Long)'):
input_ = torch.zeros(1, 1, 1, 6, dtype=torch.long)
weight = torch.nn.Parameter(torch.zeros(1, 1, 1, 3, dtype=torch.double))
model = torch.nn.Conv2d(1, 1, (1, 3), stride=1, padding=0, bias=False)
model.weight = weight
out = model(input_)
def test_apply(self):
x = torch.arange(1, 6)
res = x.clone().apply_(lambda k: k + k)
self.assertEqual(res, x * 2)
self.assertRaises(TypeError, lambda: x.apply_(lambda k: "str"))
def test_map(self):
x = torch.autograd.Variable(torch.randn(3, 3))
y = torch.autograd.Variable(torch.randn(3))
res = x.clone()
res.map_(y, lambda a, b: a + b)
self.assertEqual(res, x + y)
self.assertRaisesRegex(TypeError, "not callable", lambda: res.map_(y, "str"))
def test_map2(self):
x = torch.autograd.Variable(torch.randn(3, 3))
y = torch.autograd.Variable(torch.randn(3))
z = torch.autograd.Variable(torch.randn(1, 3))
res = x.clone()
res.map2_(y, z, lambda a, b, c: a + b * c)
self.assertEqual(res, x + y * z)
z.requires_grad = True
self.assertRaisesRegex(
RuntimeError, "requires grad",
lambda: res.map2_(y, z, lambda a, b, c: a + b * c))
def test_Size(self):
self.assertRaises(TypeError, lambda: torch.Size(torch.ones(3)))
empty_size = torch.Size([])
size = torch.Size([1, 2, 3])
self.assertIsInstance(empty_size, tuple)
self.assertIsInstance(size, tuple)
self.assertEqual(len(empty_size), 0)
self.assertEqual(len(size), 3)
self.assertIsInstance(size[0], int)
self.assertIsInstance(size[1], int)
self.assertIsInstance(size[2], int)
self.assertEqual(size[0], 1)
self.assertEqual(size[1], 2)
self.assertEqual(size[2], 3)
self.assertIsInstance(size[:], torch.Size)
self.assertIsInstance(size[:-1], torch.Size)
self.assertIsInstance(size[0:0], torch.Size)
self.assertEqual(size[:], (1, 2, 3))
self.assertEqual(size[:-1], (1, 2))
self.assertEqual(size[0:0], ())
self.assertIsInstance(empty_size + (), torch.Size)
self.assertIsInstance(size + (), torch.Size)
self.assertIsInstance(size + (4, 5), torch.Size)
self.assertIsInstance(size + size, torch.Size)
self.assertEqual(empty_size + (), ())
self.assertEqual(size + (), (1, 2, 3))
self.assertEqual(size + (4, 5), (1, 2, 3, 4, 5))
self.assertEqual(size + size, (1, 2, 3, 1, 2, 3))
self.assertIsInstance(() + empty_size, torch.Size)
self.assertIsInstance((4, 5) + size, torch.Size)
self.assertEqual(() + size, (1, 2, 3))
self.assertEqual((4, 5) + size, (4, 5, 1, 2, 3))
self.assertIsInstance(empty_size * 0, torch.Size)
self.assertIsInstance(size * 0, torch.Size)
self.assertIsInstance(size * 1, torch.Size)
self.assertIsInstance(size * 2, torch.Size)
self.assertEqual(empty_size * 0, ())
self.assertEqual(size * 0, ())
self.assertEqual(size * 1, (1, 2, 3))
self.assertEqual(size * 2, (1, 2, 3, 1, 2, 3))
self.assertIsInstance(0 * empty_size, torch.Size)
self.assertIsInstance(0 * size, torch.Size)
self.assertIsInstance(1 * size, torch.Size)
self.assertIsInstance(2 * size, torch.Size)
self.assertEqual(0 * empty_size, ())
self.assertEqual(0 * size, ())
self.assertEqual(1 * size, (1, 2, 3))
self.assertEqual(2 * size, (1, 2, 3, 1, 2, 3))
def test_Size_concat_non_tuple_sequence(self):
from collections.abc import Sequence
class DummySequence(Sequence):
vals = list(range(5))
def __len__(self): return len(self.vals)
def __getitem__(self, i): return self.vals[i]
def __iter__(self): return iter(self.vals)
size = torch.Size([1, 2, 3])
seq = DummySequence()
msg = r"can only concatenate tuple \(not \w+\) to torch.Size"
self.assertRaisesRegex(TypeError, msg, lambda: size + seq)
msg = r"unsupported operand type"
self.assertRaisesRegex(TypeError, msg, lambda: seq + size)
def test_Size_concat_wildcard(self):
class Wildcard:
def __add__(self, other): return 42
def __radd__(self, other): return 42
size = torch.Size([1, 2, 3])
wildcard = Wildcard()
self.assertEqual(wildcard + size, 42)
self.assertEqual(size + wildcard, 42)
def test_Size_scalar(self):
three = torch.tensor(3)
two = torch.tensor(2)
x = torch.Size([0, 1, two, three, 4])
for i in range(1, 5):
self.assertEqual(x[i], i)
def test_Size_iter(self):
for sizes in [iter([1, 2, 3, 4, 5]), range(1, 6)]:
x = torch.Size(sizes)
for i in range(0, 5):
self.assertEqual(x[i], i + 1)
def test_t_not_2d_error(self):
self.assertRaises(RuntimeError, lambda: torch.randn(2, 3, 4).t())
self.assertRaises(RuntimeError, lambda: torch.randn(2, 3, 4).t_())
@unittest.skipIf(True, "flush_denormal not supported")
def test_set_flush_denormal(self):
tiny_float = 1e-42
tiny_double = 1e-320
float_tensor = torch.FloatTensor([1.0, tiny_float])
double_tensor = torch.DoubleTensor([1.0, tiny_float, tiny_double])
self.assertEqual(float_tensor[0], 1.0, atol=0.0, rtol=0)
self.assertEqual(float_tensor[1], tiny_float, atol=tiny_float / 16, rtol=0)
self.assertEqual(double_tensor[0], 1.0, atol=0.0, rtol=0)
self.assertEqual(double_tensor[1], tiny_float, atol=0.0, rtol=0)
self.assertEqual(double_tensor[2], tiny_double, atol=0.0, rtol=0)
torch.set_flush_denormal(True)
self.assertEqual(float_tensor[0], 1.0, atol=0.0, rtol=0)
self.assertEqual(float_tensor[1], 0.0, atol=0.0, rtol=0)
self.assertEqual(double_tensor[0], 1.0, atol=0.0, rtol=0)
self.assertEqual(double_tensor[1], tiny_float, atol=0.0, rtol=0)
self.assertEqual(double_tensor[2], 0.0, atol=0.0, rtol=0)
torch.set_flush_denormal(False)
def test_show_config(self):
torch.__config__.show()
@unittest.skipIf(IS_FBCODE, "CXX_FLAGS is only for OSS build.")
def test_cxx_flags(self):
torch.__config__._cxx_flags()
def test_parallel_info(self):
torch.__config__.parallel_info()
def test_get_cpu_capability(self):
torch.backends.cpu.get_cpu_capability()
torch.jit.script(torch.backends.cpu.get_cpu_capability)
@slowTest
def test_slow_test(self):
pass
def test_is_nonzero(self):
with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with no values is ambiguous"):
torch.tensor([]).is_nonzero()
with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with more than one value is ambiguous"):
torch.tensor([0, 0]).is_nonzero()
self.assertFalse(torch.tensor(0).is_nonzero())
self.assertTrue(torch.tensor(1).is_nonzero())
self.assertFalse(torch.tensor([0]).is_nonzero())
self.assertTrue(torch.tensor([1]).is_nonzero())
self.assertFalse(torch.tensor([[0]]).is_nonzero())
self.assertTrue(torch.tensor([[1]]).is_nonzero())
self.assertTrue(torch.tensor(0.1).is_nonzero())
self.assertTrue(torch.tensor(-0.1).is_nonzero())
self.assertFalse(torch.tensor(0.0).is_nonzero())
self.assertTrue(torch.tensor(True).is_nonzero())
self.assertFalse(torch.tensor(False).is_nonzero())
self.assertFalse(torch.tensor(0 + 0j).is_nonzero())
self.assertTrue(torch.tensor(0 + 0.1j).is_nonzero())
def test_assert_async(self):
with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with no values is ambiguous"):
torch._assert_async(torch.tensor([]))
with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with more than one value is ambiguous"):
torch._assert_async(torch.tensor([0, 0]))
with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"):
torch._assert_async(torch.tensor(0))
torch._assert_async(torch.tensor(1))
torch._assert_async(torch.tensor(0.1))
torch._assert_async(torch.tensor(-0.1))
with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"):
torch._assert_async(torch.tensor(0.0))
torch._assert_async(torch.tensor(True))
with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"):
torch._assert_async(torch.tensor(False))
torch._assert_async(torch.tensor(0 + 0.1j))
with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"):
torch._assert_async(torch.tensor(0 + 0j))
@unittest.skipIf(IS_SANDCASTLE, "NPU is built, can't test NPU not built error")
def test_NPU_not_built(self):
msg = "Torch not compiled with NPU enabled"
self.assertRaisesRegex(AssertionError, msg, lambda: torch_npu.npu.current_device())
self.assertRaisesRegex(AssertionError, msg, lambda: torch.tensor([1], device="npu"))
self.assertRaisesRegex(AssertionError, msg, lambda: torch.tensor([1]).npu())
self.assertRaisesRegex(TypeError, msg, lambda: torch_npu.npu.FloatTensor())
self.assertRaisesRegex(TypeError, msg, lambda: torch.set_default_tensor_type(torch_npu.npu.FloatTensor))
self.assertRaisesRegex(AssertionError, msg, lambda: torch.tensor([1]).to(device="npu"))
def test_has_internal_overlap(self):
OVERLAP_NO = 0
OVERLAP_YES = 1
OVERLAP_TOO_HARD = 2
a = torch.randn(3, 3)
self.assertEqual(torch._debug_has_internal_overlap(a), OVERLAP_NO)
b = torch.randn(1, 3)
b_expanded = b.expand(4, 3)
self.assertEqual(torch._debug_has_internal_overlap(b_expanded), OVERLAP_YES)
c = torch.randn(10).as_strided([2, 1, 5], [1, 0, 2])
self.assertEqual(torch._debug_has_internal_overlap(c), OVERLAP_NO)
c = torch.randn(2, 1, 10)[::2].as_strided((2, 1, 5), (10, 0, 2))
self.assertEqual(torch._debug_has_internal_overlap(c), OVERLAP_TOO_HARD)
def test_allow_tensor_metadata_change(self):
torch.ones(2, 3)
def test_memory_format(self):
def test_helper(x, memory_format):
y = x.contiguous(memory_format=memory_format)
self.assertFalse(y.is_contiguous())
self.assertTrue(y.is_contiguous(memory_format=memory_format))
self.assertEqual(y, x)
test_helper(torch.randn(4, 3, 8, 8), torch.channels_last)
test_helper(torch.randn(4, 3, 8, 8, 8), torch.channels_last_3d)
def test_memory_format_contiguous_returns_same_tensor_if_already_satisfies(self):
def test_helper(x, memory_format):
alias = x.contiguous(memory_format=memory_format)
alias.fill_(7)
self.assertEqual(x, alias)
test_helper(torch.randn(4, 8, 8, 3).permute(0, 3, 1, 2), torch.channels_last)
test_helper(torch.randn(4, 8, 8, 8, 3).permute(0, 4, 1, 2, 3), torch.channels_last_3d)
def test_memory_format_empty(self):
def test_helper(dim1, dim2, memory_format):
with self.assertRaises(RuntimeError):
x = torch.empty(dim1, memory_format=memory_format)
x = torch.empty(dim2, memory_format=memory_format)
self.assertTrue(x.is_contiguous(memory_format=memory_format))
test_helper((3, 3), (3, 3, 3, 3), torch.channels_last)
test_helper((3, 3, 3), (3, 3, 3, 3, 3), torch.channels_last_3d)
@skipIfCrossRef
def test_dim_order(self):
shape = (2, 3, 5, 7)
t = torch.empty(shape)
self.assertSequenceEqual(t.dim_order(), (0, 1, 2, 3), seq_type=tuple)
self.assertSequenceEqual(t.transpose(0, 1).dim_order(), (1, 0, 2, 3))
t = torch.empty(shape, memory_format=torch.channels_last)
self.assertSequenceEqual(t.dim_order(), (0, 2, 3, 1))
t = torch.empty((2, 3, 5, 7, 8), memory_format=torch.channels_last_3d)
self.assertSequenceEqual(t.dim_order(), (0, 2, 3, 4, 1))
for dim_order in itertools.permutations(range(4)):
self.assertSequenceEqual(
dim_order, torch.empty_permuted(shape, dim_order).dim_order()
)
target_shapes = [[2, 2, 1, 2], [1, 2, 2, 2], [2, 2, 2, 1], [1, 2, 2, 1], [1, 2, 1, 2]]
for shape in target_shapes:
for memory_format in (torch.contiguous_format, torch.channels_last):
t = torch.empty(shape).to(memory_format=memory_format)
with self.assertRaises(RuntimeError):
t.dim_order(ambiguity_check=True)
if memory_format == torch.contiguous_format:
dim_order_target = list(range(len(shape)))
elif memory_format == torch.channels_last:
dim_order_target = [0, *list(range(2, len(shape))), 1]
self.assertSequenceEqual(
dim_order_target, t.dim_order(ambiguity_check=[torch.contiguous_format, torch.channels_last])
)
ambiguous_shapes = [[2, 1, 2, 2], [2, 2, 1, 1], [1, 2, 1, 1], [2, 1, 1, 2], [2, 1, 2, 1],
[1, 1, 1, 2], [1, 1, 2, 2], [1, 1, 1, 1], [2, 1, 1, 1], [1, 1, 2, 1]]
for shape in ambiguous_shapes:
for memory_format in (torch.contiguous_format, torch.channels_last):
t = torch.empty(shape).to(memory_format=memory_format)
with self.assertRaises(RuntimeError):
t.dim_order(ambiguity_check=True)
t.dim_order(ambiguity_check=[torch.contiguous_format, torch.channels_last])
with self.assertRaises(TypeError):
torch.empty((1, 2, 3, 4)).dim_order(ambiguity_check="ILLEGAL_STR")
with self.assertRaises(AttributeError):
indices = torch.tensor([[0, 1, 2], [0, 1, 2]])
values = torch.tensor([1.0, 2.0, 3.0])
sparse_tensor = torch.sparse_coo_tensor(indices, values, size=(3, 3))
sparse_tensor.dim_order()
def test_subclass_tensors(self):
with self.assertRaisesRegex(TypeError, "type 'torch.FloatTensor' is not an acceptable base type"):
class Foo1(torch.FloatTensor):
pass
class Foo2(torch.Tensor):
def foo(self):
return 5
f = Foo2()
self.assertEqual(f.foo(), 5)
def test_ndim(self):
a = torch.randn(1, 2, 3)
self.assertEqual(3, a.ndim)
b = torch.randn(())
self.assertEqual(0, b.ndim)
c = torch.randn(1, 0)
self.assertEqual(2, c.ndim)
def test_nbytes(self):
a = torch.randn(1, 2, 3, dtype=torch.float64)
self.assertEqual(a.numel() * a.element_size(), a.nbytes)
b = torch.randn(())
self.assertEqual(b.numel() * b.element_size(), b.nbytes)
c = torch.randn(1, 0)
self.assertEqual(c.numel() * c.element_size(), c.nbytes)
def test_fill_diagonal(self):
a1 = torch.randn(7, 3)
a2 = a1.clone()
v = 1
for i in range(3):
a2[i][i] = v
a1.fill_diagonal_(v)
self.assertEqual(a1, a2)
b1 = torch.randn(7, 3)
b2 = b1.clone()
for i in range(3):
b2[i][i] = v
b2[i + 4][i] = v
b1.fill_diagonal_(v, wrap=True)
self.assertEqual(b1, b2)
c1 = torch.rand(3, 3, 3)
c2 = c1.clone()
for i in range(3):
c2[i][i][i] = v
c1.fill_diagonal_(v)
self.assertEqual(c1, c2)
d1 = torch.rand(3, 3, 3)[:, 1, ...]
d2 = d1.clone()
for i in range(3):
d2[i][i] = v
d1.fill_diagonal_(v)
self.assertEqual(d1, d2)
e1 = torch.rand(7, 3, 3)[:, 1, ...]
e2 = e1.clone()
for i in range(3):
e2[i][i] = v
e2[i + 4][i] = v
e1.fill_diagonal_(v, wrap=True)
self.assertEqual(e1, e2)
def test_setting_real_imag_to_a_number(self):
x = torch.randn(4, dtype=torch.cfloat)
x.real = 0
x.imag = 0
zeros = torch.zeros(4)
self.assertEqual(x.real, zeros)
self.assertEqual(x.imag, zeros)
def test_batch_norm_cpu_inference(self):
inputs = [
torch.tensor([[[[-0.5000]]], [[[0.5000]]]]),
torch.tensor([
[
[[-0.5000, 0.5000], [-1.0000, 1.0000]],
[[-0.2500, -0.5000], [0.2500, 0.5000]]
],
[
[[0.1000, 1.0000], [1.0000, 0.1000]],
[[1.0000, 0.5000], [1.5000, -1.5000]]
]])]
outputs = [
torch.tensor([
[[[-0.499997496604919433593750000]]],
[[[0.499997496604919433593750000]]]]),
torch.tensor([
[[[-0.499997496604919433593750000, 0.499997496604919433593750000],
[-0.999994993209838867187500000, 0.999994993209838867187500000]],
[[-0.249998748302459716796875000, -0.499997496604919433593750000],
[0.249998748302459716796875000, 0.499997496604919433593750000]]],
[[[0.099999502301216125488281250, 0.999994993209838867187500000],
[0.999994993209838867187500000, 0.099999502301216125488281250]],
[[0.999994993209838867187500000, 0.499997496604919433593750000],
[1.499992489814758300781250000, -1.499992489814758300781250000]]]])]
for i in range(len(inputs)):
for affine in [False, True]:
m = torch.nn.BatchNorm2d(inputs[i].size()[1], 1e-05, 0.1, affine=affine)
m.eval()
input1 = inputs[i].contiguous()
output1 = m(input1)
input2 = input1.permute(0, 1, 3, 2)
output2 = m(input2).permute(0, 1, 3, 2)
input3 = input1.contiguous(memory_format=torch.channels_last)
output3 = m(input3)
self.assertEqual(output3, outputs[i])
self.assertEqual(output3, output1)
self.assertEqual(output3, output2)
@skipIfTorchDynamo("Fails after Triton update, see pytorch issue 94687")
def test_empty_meta(self):
x = torch.empty(2 ** 20, 2 ** 20, device='meta')
y = torch.empty(2 ** 20, device='meta')
z = x + y
self.assertEqual(z.size(), (2 ** 20, 2 ** 20))
self.assertRaises(RuntimeError, lambda: z[0][0].item())
@skipIfTorchDynamo("Fails after Triton update, see pytorch issue 94687")
def test_format_scalar_meta(self):
x = torch.empty((), device='meta')
self.assertEqual(format(x), repr(x))
def test_upsample_nearest1d_meta(self):
x = torch.empty(2 * 10 ** 8, 3, 2 * 10 ** 8, device='meta')
z = torch.nn.functional.interpolate(x, scale_factor=2)
self.assertEqual(z.size(), (2 * 10 ** 8, 3, 4 * 10 ** 8))
self.assertRaises(RuntimeError, lambda: z[0][0][0].item())
z = torch.empty(0, device='meta')
torch._C._nn.upsample_nearest1d(x, (4 * 10 ** 8,), 2, out=z)
self.assertEqual(z.size(), (2 * 10 ** 8, 3, 4 * 10 ** 8))
self.assertRaises(RuntimeError, lambda: z[0][0][0].item())
def test_upsample_nearest2d_meta(self):
x = torch.empty(4, 3, 8, 8, device='meta')
out = torch.empty(4, 3, 16, 16, device='meta', memory_format=torch.channels_last)
torch._C._nn.upsample_nearest2d(x, (16, 16), out=out)
self.assertTrue(out.is_contiguous(memory_format=torch.channels_last))
x = torch.empty(4, 3, 8, 8, device='meta', memory_format=torch.channels_last)
out = torch.empty(4, 3, 16, 16, device='meta')
torch._C._nn.upsample_nearest2d(x, (16, 16), out=out)
self.assertTrue(out.is_contiguous())
x = torch.empty(4, 3, 8, 8, device='meta', memory_format=torch.channels_last)
out = torch.empty(0, device='meta')
torch._C._nn.upsample_nearest2d(x, (16, 16), out=out)
self.assertTrue(out.is_contiguous(memory_format=torch.channels_last))
x = torch.empty(4, 3, 8, 8, device='meta', dtype=torch.float)
out = torch.empty(4, 3, 16, 16, device='meta', dtype=torch.double)
self.assertExpectedRaisesInline(
RuntimeError, lambda: torch._C._nn.upsample_nearest2d(x, (16, 16), out=out),
"""Expected out tensor to have dtype torch.float32 but got torch.float64 instead"""
)
x = torch.empty(0, 3, 8, 8, device='meta')
out = torch.empty(0, 3, 16, 16, device='cpu')
if not TEST_WITH_TORCHINDUCTOR:
self.assertExpectedRaisesInline(
RuntimeError, lambda: torch._C._nn.upsample_nearest2d(x, (16, 16), out=out),
"""Attempting to copy from device meta to device cpu, but cross-device copies are not allowed!"""
)
def test_add_meta_scalar(self):
x = torch.empty(2, device='meta')
y = x + 2
self.assertEqual(y.size(), x.size())
def test_normal_shape(self):
for device in get_all_device_types():
tensor1 = torch.rand(1, device=device)
tensor4 = torch.rand(4, device=device)
tensor120 = torch.rand(120, device=device)
tensor2145 = torch.rand(2, 1, 4, 5, device=device)
tensor2345 = torch.rand(2, 3, 4, 5, device=device)
tensor2345_non_contiguous = torch.rand(2, 4, 3, 5, device=device).permute(0, 2, 1, 3)
tensor2345_channels_last = tensor2345.contiguous(memory_format=torch.channels_last)
output2345 = torch.zeros(2, 3, 4, 5, device=device)
output345 = torch.zeros(3, 4, 5, device=device)
self.assertEqual(torch.normal(tensor2345, tensor2345).size(), (2, 3, 4, 5))
self.assertEqual(torch.normal(tensor2345_non_contiguous, tensor2345).size(), (2, 3, 4, 5))
self.assertEqual(torch.normal(tensor2345, tensor2345_channels_last).size(), (2, 3, 4, 5))
self.assertEqual(torch.normal(tensor2345_non_contiguous, tensor2345_channels_last).size(), (2, 3, 4, 5))
self.assertEqual(torch.normal(tensor2345, 2).size(), (2, 3, 4, 5))
self.assertEqual(torch.normal(2, tensor2345).size(), (2, 3, 4, 5))
self.assertEqual(torch.normal(tensor2345, tensor1).size(), (2, 3, 4, 5))
self.assertEqual(torch.normal(tensor2145, tensor2345).size(), (2, 3, 4, 5))
with self.assertRaisesRegex(
RuntimeError,
r"The size of tensor a \(120\) must match the size of "
r"tensor b \(5\) at non-singleton dimension 3"):
self.assertEqual(torch.normal(tensor120, tensor2345).size(), (120,))
with self.assertRaisesRegex(
RuntimeError,
r"The size of tensor a \(5\) must match the size of "
r"tensor b \(120\) at non-singleton dimension 3"):
self.assertEqual(torch.normal(tensor2345, tensor120).size(), (2, 3, 4, 5))
with self.assertRaisesRegex(
RuntimeError,
r"The size of tensor a \(5\) must match the size of "
r"tensor b \(4\) at non-singleton dimension 3"):
torch.normal(tensor2345, tensor4)
self.assertEqual(torch.normal(tensor2345, tensor2345, out=output2345).size(), (2, 3, 4, 5))
with self.assertWarnsRegex(
UserWarning,
"This behavior is deprecated, and in a future PyTorch "
"release outputs will not be resized unless they have "
"zero elements"):
self.assertEqual(torch.normal(tensor2345, tensor2145, out=output345).size(), (2, 3, 4, 5))
with self.assertRaisesRegex(
RuntimeError,
r"The size of tensor a \(5\) must match the size of "
r"tensor b \(120\) at non-singleton dimension 3"):
torch.normal(tensor2345, tensor120, out=output345)
def test_tensoriterator_output_setup(self):
def test_memory_layout(x, y, scale, zero_point, out):
self.assertEqual(x.dim(), 4)
self.assertEqual(x.size(), y.size())
self.assertEqual(y.size(), out.size())
shape = x.size()
for n in range(shape[0]):
for c in range(shape[1]):
for h in range(shape[2]):
for w in range(shape[3]):
if scale is not None and zero_point is not None:
self.assertEqual(
out[n][c][h][w],
torch.ops.quantized.add(x[n][c][h][w], y[n][c][h][w], scale, zero_point))
else:
self.assertEqual(out[n][c][h][w], x[n][c][h][w] + y[n][c][h][w])
xraw = torch.rand(2, 3, 4, 4)
yraw = torch.rand(2, 3, 4, 4)
qxraw = torch.quantize_per_tensor(xraw, 0.1, 5, torch.quint8)
qyraw = torch.quantize_per_tensor(yraw, 0.1, 5, torch.quint8)
test_memory_layout(xraw, yraw, None, None, xraw + yraw)
test_memory_layout(qxraw, qyraw, 0.1, 5, torch.ops.quantized.add(qxraw, qyraw, 0.1, 5))
x = xraw.contiguous(memory_format=torch.channels_last)
y = yraw.contiguous(memory_format=torch.channels_last)
test_memory_layout(x, y, None, None, x + y)
qx = qxraw.contiguous(memory_format=torch.channels_last)
qy = qyraw.contiguous(memory_format=torch.channels_last)
test_memory_layout(qx, qy, 0.1, 5, torch.ops.quantized.add(qx, qy, 0.1, 5))
x = xraw.permute(0, 2, 3, 1)
y = yraw.permute(0, 2, 3, 1)
test_memory_layout(x, y, None, None, x + y)
qx = qxraw.permute(0, 2, 3, 1)
qy = qyraw.permute(0, 2, 3, 1)
test_memory_layout(qx, qy, 0.1, 5, torch.ops.quantized.add(qx, qy, 0.1, 5))
x = xraw.permute(0, 2, 3, 1)
y = yraw.permute(0, 2, 3, 1)
out = torch.empty_like(xraw)
out = out.permute(0, 3, 2, 1)
expected_stride = out.stride()
test_memory_layout(x, y, None, None, torch.add(x, y, out=out))
self.assertEqual(expected_stride, out.stride())
x = xraw.permute(0, 2, 3, 1)
y = yraw.permute(0, 3, 2, 1)
test_memory_layout(x, y, None, None, x + y)
qx = qxraw.permute(0, 2, 3, 1)
qy = qyraw.permute(0, 3, 2, 1)
test_memory_layout(qx, qy, 0.1, 5, torch.ops.quantized.add(qx, qy, 0.1, 5))
def test_conj_physical_meta_stride(self):
a = torch.zeros((5, 3, 6), dtype=torch.complex128, device='meta')
b = torch._fft_c2c(a, [1], 1, True)
c = torch.conj_physical(b)
self.assertEqual(b.stride(), c.stride())
def test_dot_data_use(self):
with self.assertRaisesRegex(
RuntimeError,
'(?=.*Double)(?=.*ComplexFloat)'):
input_ = torch.randn(1, 1, 1, 6, dtype=torch.double)
weight = torch.zeros(1, 1, 1, 3, dtype=torch.complex64)
model = torch.nn.Conv2d(1, 1, (1, 3), stride=1, padding=0, bias=False)
model.weight.data = weight
out = model(input_)
def test_empty_storage_view(self):
t = torch.from_numpy(np.empty((0, 4)))
t[:, 1::2] *= 1
def test_has_storage(self):
self.assertIsNotNone(torch.tensor([]).storage())
self.assertIsNotNone(torch.empty(0).storage())
self.assertIsNotNone(torch.tensor([]).clone().storage())
self.assertIsNotNone(torch.tensor([0, 0, 0]).nonzero().storage())
self.assertIsNotNone(torch.tensor([]).new().storage())
def test_numel(self):
b = torch.ByteTensor(3, 100, 100)
self.assertEqual(b.nelement(), 3 * 100 * 100)
self.assertEqual(b.numel(), 3 * 100 * 100)
def test_copy_dtypes(self):
for dtype in all_types_and_complex_and(torch.half, torch.bfloat16, torch.bool):
copied_dtype = copy.deepcopy(dtype)
self.assertIs(dtype, copied_dtype)
def test_dtype_is_signed(self):
for dtype in all_types_and_complex_and(torch.half, torch.bfloat16, torch.half):
self.assertEqual(dtype.is_signed, torch.is_signed(torch.tensor(0, dtype=dtype)))
self.assertRaisesRegex(RuntimeError, 'not supported for quantized', lambda: torch.quint8.is_signed)
self.assertRaisesRegex(RuntimeError, 'not supported for quantized', lambda: torch.qint8.is_signed)
self.assertRaisesRegex(RuntimeError, 'not supported for quantized', lambda: torch.qint32.is_signed)
@skipIfTorchDynamo("requires pytorch torchdynamo PR 1098")
def test_RNGState(self):
state = torch.get_rng_state()
stateCloned = state.clone()
before = torch.rand(1000)
self.assertEqual(state.ne(stateCloned).long().sum(), 0, atol=0, rtol=0)
torch.set_rng_state(state)
after = torch.rand(1000)
self.assertEqual(before, after, atol=0, rtol=0)
@skipIfTorchDynamo("requires pytorch torchdynamo PR 1098")
def test_RNGStateAliasing(self):
gen = torch.Generator()
gen.set_state(torch.get_rng_state())
self.assertEqual(gen.get_state(), torch.get_rng_state())
target_value = torch.rand(1000)
_ = torch.rand(100000)
forked_value = torch.rand(1000, generator=gen)
self.assertEqual(target_value, forked_value, atol=0, rtol=0, msg="RNG has not forked correctly.")
@skipIfTorchDynamo("requires pytorch torchdynamo PR 1098")
def test_RNG_after_pickle(self):
torch.random.manual_seed(100)
before = torch.rand(10)
torch.random.manual_seed(100)
buf = io.BytesIO()
tensor = torch.tensor([1, 2, 3])
ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(tensor)
after = torch.rand(10)
self.assertEqual(before, after, atol=0, rtol=0)
@skipIfTorchDynamo("requires pytorch torchdynamo PR 1098")
def test_boxMullerState(self):
torch.manual_seed(123)
odd_number = 101
seeded = torch.randn(odd_number)
state = torch.get_rng_state()
midstream = torch.randn(odd_number)
torch.set_rng_state(state)
repeat_midstream = torch.randn(odd_number)
torch.manual_seed(123)
reseeded = torch.randn(odd_number)
self.assertEqual(midstream, repeat_midstream, atol=0, rtol=0,
msg='get_rng_state/set_rng_state not generating same sequence of normally distributed numbers')
self.assertEqual(seeded, reseeded, atol=0, rtol=0,
msg='repeated calls to manual_seed not generating same sequence of normally distributed numbers')
@skipIfTorchDynamo("requires pytorch torchdynamo PR 1098")
def test_manual_seed(self):
rng_state = torch.get_rng_state()
torch.manual_seed(2)
x = torch.randn(100)
self.assertEqual(torch.initial_seed(), 2)
torch.manual_seed(2)
y = torch.randn(100)
self.assertEqual(x, y)
max_int64 = 0x7fff_ffff_ffff_ffff
min_int64 = -max_int64 - 1
max_uint64 = 0xffff_ffff_ffff_ffff
test_cases = [
(max_int64, max_int64),
(max_int64 + 1, max_int64 + 1),
(max_uint64, max_uint64),
(0, 0),
(-1, max_uint64),
(min_int64, max_int64 + 1)
]
for seed, expected_initial_seed in test_cases:
torch.manual_seed(seed)
actual_initial_seed = torch.initial_seed()
msg = (f"expected initial_seed() = {expected_initial_seed:x} "
f"after calling manual_seed({seed:x}), but got {actual_initial_seed:x} instead")
self.assertEqual(expected_initial_seed, actual_initial_seed, msg=msg)
for invalid_seed in [min_int64 - 1, max_uint64 + 1]:
with self.assertRaisesRegex(ValueError, r'Overflow when unpacking long long'):
torch.manual_seed(invalid_seed)
torch.set_rng_state(rng_state)
def test_copy_transpose(self):
x = torch.arange(100 * 100, dtype=torch.float).reshape(100, 100).t()
y = torch.empty(100, 100, dtype=torch.float)
y.copy_(x)
self.assertEqual(y[:, 0], range(100))
self.assertEqual(y[:, 40], range(4000, 4100))
y = torch.empty(100, 100, dtype=torch.double)
y.copy_(x)
self.assertEqual(y[:, 0], range(100))
self.assertEqual(y[:, 40], range(4000, 4100))
x = torch.arange(100 * 100).reshape(100, 100).to(dtype=torch.cfloat).t()
y = torch.empty(100, 100, dtype=torch.cfloat)
y.copy_(x)
self.assertEqual(y[:, 0], range(100))
self.assertEqual(y[:, 40], range(4000, 4100))
x = torch.arange(100 * 100).reshape(100, 100).to(dtype=torch.complex32).t()
y = torch.empty(100, 100, dtype=torch.complex32)
y.copy_(x)
self.assertEqual(y[:, 0], range(100))
self.assertEqual(y[:, 40], range(4000, 4100))
def test_copy_broadcast(self):
torch.zeros(5, 6).copy_(torch.zeros(6))
self.assertRaises(RuntimeError, lambda: torch.zeros(5, 6).copy_(torch.zeros(30)))
def test_copy_many_to_one(self):
self.assertRaises(RuntimeError, lambda: torch.zeros(1, 6).expand(5, 6).copy_(torch.zeros(5, 6)))
def test_copy_float16(self):
dtypes_ = (
(torch.float32, torch.float16),
(torch.float16, torch.float32),
(torch.float32, torch.float32),
)
cases = (
((1, 2, 3), (0, 2, 3), False),
((1, 5, 6), (4, 5, 6), False),
(1, (0, 2, 3), False),
((4, 5, 6), (0, 2, 3), False),
((4, 5, 6), (1, 2, 3), False),
((4, 5, 6), (6, 5, 4), False),
((4, 5, 6), (1, 5, 6), True),
((4, 5, 6), (4, 5, 6), True),
((0, 2, 3), 1, True),
((4, 5, 6), (4, 5, 1), True),
)
for (out_shape, src_shape, is_ok), (out_dtype, src_dtype) in itertools.product(cases, dtypes_):
out = torch.zeros(out_shape, dtype=out_dtype, device=torch.device('cpu'))
src = torch.ones(src_shape, dtype=src_dtype, device=torch.device('cpu'))
if is_ok:
if torch_npu.npu.is_available():
out_npu = out.npu()
src_npu = src.npu()
res = out.copy_(src)
if torch_npu.npu.is_available():
res_npu = out_npu.copy_(src_npu)
self.assertEqual(res, res_npu)
else:
self.assertRaises(RuntimeError, lambda: out.copy_(src))
def _test_to_with_layout(self, layout):
def test_copy_behavior(t, non_blocking=False):
self.assertIs(t, t.to(t, non_blocking=non_blocking))
self.assertIs(t, t.to(t.dtype, non_blocking=non_blocking))
self.assertIs(t, t.to(torch.empty_like(t), non_blocking=non_blocking))
self.assertIsNot(t, t.to(t, non_blocking=non_blocking, copy=True))
self.assertIsNot(t, t.to(t.dtype, non_blocking=non_blocking, copy=True))
self.assertIsNot(t, t.to(torch.empty_like(t), non_blocking=non_blocking, copy=True))
devices = [t.device]
if t.device.type == 'npu':
if t.device.index == -1:
devices.append(f'npu:{torch_npu.npu.current_device()}')
elif t.device.index == torch_npu.npu.current_device():
devices.append('npu')
for device in devices:
self.assertIs(t, t.to(device, non_blocking=non_blocking))
self.assertIs(t, t.to(device, t.dtype, non_blocking=non_blocking))
self.assertIsNot(t, t.to(device, non_blocking=non_blocking, copy=True))
self.assertIsNot(t, t.to(device, t.dtype, non_blocking=non_blocking, copy=True))
a = torch.tensor(5)
if layout == torch.sparse_csr:
a = torch.tensor([[0, 1, 2], [2, 0, 3]]).to_sparse_csr()
test_copy_behavior(a)
self.assertEqual(a.device, a.to('cpu').device)
self.assertEqual(a.device, a.to('cpu', dtype=torch.float32).device)
self.assertIs(torch.float32, a.to('cpu', dtype=torch.float32).dtype)
self.assertEqual(a.device, a.to(torch.float32).device)
self.assertIs(torch.float32, a.to(dtype=torch.float32).dtype)
def test_data_ptr(getter):
self.assertEqual(getter(a), getter(a.to('cpu')))
self.assertEqual(getter(a), getter(a.to(dtype=a.dtype, device=a.device, copy=False)))
self.assertEqual(getter(a), getter(a.to('cpu', copy=False)))
self.assertNotEqual(getter(a), getter(a.to('cpu', copy=True)))
if layout == torch.sparse_csr:
with self.assertRaisesRegex(RuntimeError, "Cannot access data pointer of Tensor that doesn't have storage"):
a.data_ptr()
test_data_ptr(lambda a: a.values().data_ptr())
test_data_ptr(lambda a: a.crow_indices().data_ptr())
test_data_ptr(lambda a: a.col_indices().data_ptr())
else:
test_data_ptr(lambda a: a.data_ptr())
if torch_npu.npu.is_available():
for non_blocking in [True, False]:
for npu in ['npu', 'npu:0' if torch_npu.npu.device_count() == 1 else 'npu:1']:
b = torch.tensor(5., device=npu)
test_copy_behavior(b, non_blocking)
self.assertEqual(b.device, b.to(npu, non_blocking=non_blocking).device)
self.assertEqual(a.device, b.to('cpu', non_blocking=non_blocking).device)
self.assertEqual(b.device, a.to(npu, non_blocking=non_blocking).device)
self.assertIs(torch.int32, b.to('cpu', dtype=torch.int32, non_blocking=non_blocking).dtype)
self.assertEqual(a.device, b.to('cpu', dtype=torch.int32, non_blocking=non_blocking).device)
self.assertIs(torch.int32, b.to(dtype=torch.int32).dtype)
self.assertEqual(b.device, b.to(dtype=torch.int32).device)
def test_to(self):
self._test_to_with_layout(torch.strided)
self._test_to_with_layout(torch.sparse_csr)
def test_as_subclass(self):
class SubTensor(torch.Tensor):
member_var = object()
t0 = torch.tensor(0)
t1 = torch.tensor([1, 2])
t2 = torch.tensor([[3, 4], [5, 6]])
s0 = t0.as_subclass(SubTensor)
s1 = t1.as_subclass(SubTensor)
s2 = t2.as_subclass(SubTensor)
self.assertTrue(type(s0) is SubTensor)
self.assertTrue(type(s1) is SubTensor)
self.assertTrue(type(s2) is SubTensor)
self.assertEqual(t0, s0)
self.assertEqual(t1, s1)
self.assertEqual(t2, s2)
t0[()] = 1
t1[1] = 3
t2[1, 1] = 7
self.assertEqual(t0, s0)
self.assertEqual(t1, s1)
self.assertEqual(t2, s2)
self.assertTrue(s0.member_var is SubTensor.member_var)
self.assertTrue(s1.member_var is SubTensor.member_var)
self.assertTrue(s2.member_var is SubTensor.member_var)
t = torch.tensor(5, dtype=torch.float32, requires_grad=True)
exp_t = torch.exp(t)
exp_s = exp_t.as_subclass(SubTensor)
self.assertTrue(t.grad is None)
exp_s.backward()
self.assertTrue(t.grad is not None)
class BadSubTensor:
member_var = object()
err_msg = "Creating a Tensor subclass from a class that does not inherit from Tensor"
with self.assertRaisesRegex(RuntimeError, err_msg):
s0 = t0.as_subclass(BadSubTensor)
def test_slice(self):
empty = torch.empty(0, 4)
x = torch.arange(0., 16).view(4, 4)
self.assertEqual(x[:], x)
self.assertEqual(x[:4], x)
self.assertEqual(x[:5], x)
self.assertEqual(x[2:1], empty)
self.assertEqual(x[2:2], empty)
self.assertEqual(x[10:12], empty)
self.assertEqual(x[:1].tolist(), [[0, 1, 2, 3]])
self.assertEqual(x[:-3].tolist(), [[0, 1, 2, 3]])
self.assertEqual(x[:, -2:3].tolist(), [[2], [6], [10], [14]])
self.assertEqual(x[0:-1:2].tolist(), [[0, 1, 2, 3], [8, 9, 10, 11]])
def test_split_with_sizes_copy_out(self):
device = torch.device("npu:0") if torch.npu.is_available() else torch.device("cpu")
shape = (30, 40, 50)
x = torch.rand(*shape, device=device)
cases = [
(0, [3, 7, 8, 12]),
(1, [3, 7, 10, 20]),
(-2, [3, 7, 10, 20]),
(2, [3, 7, 10, 12, 18]),
(-1, [3, 7, 10, 12, 18]),
(2, [3, 7, 10, 0, 30]),
]
for dim, split_sizes in cases:
views = x.split_with_sizes(split_sizes, dim=dim)
expects = [v.clone() for v in views]
out = [torch.zeros_like(v) for v in views]
for expect, t in zip(expects, out):
if expect.numel() != 0:
self.assertFalse(expect.eq(t).all().item())
torch.split_with_sizes_copy(x, split_sizes, dim=dim, out=out)
for expect, t in zip(expects, out):
self.assertTrue(expect.eq(t).all().item())
if not torch.npu.is_available():
continue
out = [torch.zeros_like(v) for v in views]
for expect, t in zip(expects, out):
if expect.numel() != 0:
self.assertFalse(expect.eq(t).all().item())
g = torch.npu.NPUGraph()
with torch.npu.graph(g):
torch.split_with_sizes_copy(x, split_sizes, dim=dim, out=out)
g.replay()
for expect, t in zip(expects, out):
self.assertTrue(expect.eq(t).all().item())
def test_type(self):
x = torch.randn(3, 3).double()
self.assertEqual(x.type('torch.FloatTensor').dtype, torch.float32)
self.assertEqual(x.type(torch.FloatTensor).dtype, torch.float32)
self.assertEqual(x.int().type(torch.Tensor).dtype, torch.get_default_dtype())
self.assertEqual(x.type(torch.int32).dtype, torch.int32)
self.assertEqual(x.type('torch.npu.FloatTensor').dtype, torch.float32)
self.assertEqual(x.type(torch.npu.FloatTensor).dtype, torch.float32)
self.assertEqual(x.type('torch.npu.FloatTensor').device.type, 'npu')
self.assertEqual(x.type(torch.npu.FloatTensor).device.type, 'npu')
x = torch.randn(3, 3).npu().half()
self.assertEqual(x.type('torch.FloatTensor').dtype, torch.float32)
self.assertEqual(x.type(torch.FloatTensor).dtype, torch.float32)
self.assertEqual(x.type('torch.FloatTensor').device.type, 'cpu')
self.assertEqual(x.type(torch.FloatTensor).device.type, 'cpu')
self.assertEqual(x.int().type(torch.Tensor).dtype, torch.get_default_dtype())
self.assertEqual(x.type(torch.int32).dtype, torch.int32)
self.assertEqual(x.type('torch.npu.FloatTensor').dtype, torch.float32)
self.assertEqual(x.type(torch.npu.FloatTensor).dtype, torch.float32)
def test_qengine(self):
qengines = torch.backends.quantized.supported_engines
original_qe = torch.backends.quantized.engine
for qe in qengines:
torch.backends.quantized.engine = qe
assert torch.backends.quantized.engine == qe, 'qengine not set successfully'
torch.backends.quantized.engine = original_qe
def test_terminate_handler_on_crash(self):
cmd = [sys.executable, '-c', "import os; os.environ[\"TORCH_CUSTOM_TERMINATE\"] ='1'; \
import torch; import torch._C; torch._C._abort()"]
with self.assertRaises(subprocess.CalledProcessError) as cm:
subprocess.check_output(cmd, shell=False)
e = cm.exception
output = e.stdout.decode("utf-8")
self.assertNotEqual(e.returncode, 0)
self.assertNotEqual(output, None)
self.assertIn('Unhandled exception caught in c10/util/AbortHandler.h', output)
@slowTest
def test_multinomial_invalid_probs(self):
def _spawn_method(self, method, arg):
try:
mp.set_start_method('spawn')
except RuntimeError:
pass
with mp.Pool(1) as pool:
out = pool.map(method, [arg])
self.assertTrue(out[0])
def _test_multinomial_invalid_probs(probs):
try:
torch.multinomial(probs.to('cpu'), 2)
return False
except RuntimeError as e:
return 'probability tensor contains either `inf`, `nan` or element < 0' in str(e)
_spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., -1., 1.]))
_spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., inf, 1.]))
_spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., -inf, 1.]))
_spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., 1., nan]))
def test_to_with_tensor(self):
a = torch.tensor(5)
self.assertEqual(a.device, a.to(a).device)
if torch_npu.npu.is_available():
for non_blocking in [True, False]:
for npu in ['npu', 'npu:0' if torch_npu.npu.device_count() == 1 else 'npu:1']:
b = torch.tensor(5., device=npu)
self.assertEqual(b.device, b.to(b, non_blocking=non_blocking).device)
self.assertEqual(a.device, b.to(a, non_blocking=non_blocking).device)
self.assertEqual(b.device, a.to(b, non_blocking=non_blocking).device)
def test_device(self):
cpu = torch.device('cpu')
self.assertEqual('cpu', str(cpu))
self.assertEqual('cpu', cpu.type)
self.assertEqual(None, cpu.index)
cpu0 = torch.device('cpu:0')
self.assertEqual('cpu:0', str(cpu0))
self.assertEqual('cpu', cpu0.type)
self.assertEqual(0, cpu0.index)
cpu0 = torch.device('cpu', 0)
self.assertEqual('cpu:0', str(cpu0))
self.assertEqual('cpu', cpu0.type)
self.assertEqual(0, cpu0.index)
npu = torch.device('npu')
self.assertEqual('npu', str(npu))
self.assertEqual('npu', npu.type)
self.assertEqual(None, npu.index)
npu1 = torch.device('npu:1')
self.assertEqual('npu:1', str(npu1))
self.assertEqual('npu', npu1.type)
self.assertEqual(1, npu1.index)
npu1 = torch.device('npu', 1)
self.assertEqual('npu:1', str(npu1))
self.assertEqual('npu', npu1.type)
self.assertEqual(1, npu1.index)
npu90 = torch.device('npu', 90)
self.assertEqual('npu:90', str(npu90))
self.assertEqual('npu', npu90.type)
self.assertEqual(90, npu90.index)
self.assertRaises(RuntimeError, lambda: torch.device('cpu:-1'))
self.assertRaises(RuntimeError, lambda: torch.device('npu:-1'))
self.assertRaises(RuntimeError, lambda: torch.device('npu:2 '))
self.assertRaises(RuntimeError, lambda: torch.device('npu: 2'))
self.assertRaises(RuntimeError, lambda: torch.device('npu:2 2'))
self.assertRaises(RuntimeError, lambda: torch.device('npu:2.'))
self.assertRaises(RuntimeError, lambda: torch.device('npu:2?'))
self.assertRaises(RuntimeError, lambda: torch.device('npu:?2'))
self.assertRaises(RuntimeError, lambda: torch.device('npu:'))
self.assertRaises(RuntimeError, lambda: torch.device('npu:2.232'))
self.assertRaises(RuntimeError, lambda: torch.device('npu:2 npu:3'))
self.assertRaises(RuntimeError, lambda: torch.device('npu:2+npu:3'))
self.assertRaises(RuntimeError, lambda: torch.device('npu:2npu:3'))
self.assertRaises(RuntimeError, lambda: torch.device(-1))
self.assertRaises(RuntimeError, lambda: torch.device('other'))
self.assertRaises(RuntimeError, lambda: torch.device('other:0'))
device_set = {'cpu', 'cpu:0', 'npu', 'npu:0', 'npu:1', 'npu:10', 'npu:100'}
device_hash_set = set()
device_hash_set.update(hash(torch.device(device)) for device in device_set)
self.assertEqual(len(device_set), len(device_hash_set))
def get_expected_device_repr(device):
if device.index is not None:
return f"device(type='{device.type}', index={device.index})"
return f"device(type='{device.type}')"
for device in device_set:
dev = torch.device(device)
self.assertEqual(repr(dev), get_expected_device_repr(dev))
@wrapDeterministicFlagAPITest
def test_deterministic_flag(self):
for deterministic, warn_only in product([True, False], [True, False]):
torch.use_deterministic_algorithms(deterministic, warn_only=warn_only)
self.assertEqual(deterministic, torch.are_deterministic_algorithms_enabled())
self.assertEqual(warn_only, torch.is_deterministic_algorithms_warn_only_enabled())
if deterministic:
if warn_only:
debug_mode = 1
else:
debug_mode = 2
else:
debug_mode = 0
self.assertEqual(debug_mode, torch.get_deterministic_debug_mode())
for debug_mode in [0, 1, 2]:
torch.set_deterministic_debug_mode(debug_mode)
self.assertEqual(debug_mode, torch.get_deterministic_debug_mode())
deterministic = debug_mode in [1, 2]
warn_only = debug_mode == 1
self.assertEqual(deterministic, torch.are_deterministic_algorithms_enabled())
self.assertEqual(warn_only, torch.is_deterministic_algorithms_warn_only_enabled())
for debug_mode, debug_mode_str in [(0, 'default'), (1, 'warn'), (2, 'error')]:
torch.set_deterministic_debug_mode(debug_mode_str)
self.assertEqual(debug_mode, torch.get_deterministic_debug_mode())
with self.assertRaisesRegex(
TypeError,
r"_set_deterministic_algorithms\(\): argument 'mode' \(position 1\) must be bool, not int"):
torch.use_deterministic_algorithms(1)
with self.assertRaisesRegex(
TypeError,
r"_set_deterministic_algorithms\(\): argument 'warn_only' must be bool, not int"):
torch.use_deterministic_algorithms(False, warn_only=1)
def test_deterministic_fill_uninitialized_memory(self):
with DeterministicGuard(True, fill_uninitialized_memory=False):
self.assertFalse(torch.utils.deterministic.fill_uninitialized_memory)
self.assertFalse(torch._C._get_deterministic_fill_uninitialized_memory())
with DeterministicGuard(True, fill_uninitialized_memory=True):
self.assertTrue(torch.utils.deterministic.fill_uninitialized_memory)
self.assertTrue(torch._C._get_deterministic_fill_uninitialized_memory())
self.assertFalse(torch.utils.deterministic.fill_uninitialized_memory)
self.assertFalse(torch._C._get_deterministic_fill_uninitialized_memory())
torch.utils.deterministic.fill_uninitialized_memory = False
self.assertFalse(torch.utils.deterministic.fill_uninitialized_memory)
self.assertFalse(torch._C._get_deterministic_fill_uninitialized_memory())
torch.utils.deterministic.fill_uninitialized_memory = True
self.assertTrue(torch.utils.deterministic.fill_uninitialized_memory)
self.assertTrue(torch._C._get_deterministic_fill_uninitialized_memory())
torch._C._set_deterministic_fill_uninitialized_memory(False)
self.assertFalse(torch.utils.deterministic.fill_uninitialized_memory)
self.assertFalse(torch._C._get_deterministic_fill_uninitialized_memory())
torch._C._set_deterministic_fill_uninitialized_memory(True)
self.assertTrue(torch.utils.deterministic.fill_uninitialized_memory)
self.assertTrue(torch._C._get_deterministic_fill_uninitialized_memory())
with self.assertRaisesRegex(RuntimeError, r"expected a bool, but got int"):
torch.utils.deterministic.fill_uninitialized_memory = 1
def test_type_conversion_via_dtype_name(self):
x = torch.tensor([1])
self.assertEqual(x.byte().dtype, torch.uint8)
self.assertEqual(x.bool().dtype, torch.bool)
self.assertEqual(x.char().dtype, torch.int8)
self.assertEqual(x.double().dtype, torch.float64)
self.assertEqual(x.float().dtype, torch.float32)
self.assertEqual(x.half().dtype, torch.float16)
self.assertEqual(x.int().dtype, torch.int32)
self.assertEqual(x.bfloat16().dtype, torch.bfloat16)
cfloat = x.cfloat()
self.assertEqual(cfloat.dtype, torch.complex64)
self.assertEqual(cfloat.real, x.float())
self.assertEqual(cfloat.imag, torch.zeros_like(cfloat.imag))
cdouble = x.cdouble()
self.assertEqual(cdouble.dtype, torch.complex128)
self.assertEqual(cdouble.real, x.double())
self.assertEqual(cdouble.imag, torch.zeros_like(cdouble.imag))
chalf = x.chalf()
self.assertEqual(chalf.dtype, torch.complex32)
self.assertEqual(chalf.real, x.half())
self.assertEqual(chalf.imag, torch.zeros_like(chalf.imag))
def test_type_alias(self):
type_alias_map = {torch.float64: torch.double,
torch.float32: torch.float,
torch.int32: torch.int,
torch.int64: torch.long,
torch.int16: torch.short,
torch.float16: torch.half,
torch.complex32: torch.chalf,
torch.complex64: torch.cfloat}
for dtype, alias in type_alias_map.items():
self.assertIs(alias, dtype)
def test_doc_template(self) -> None:
"""
Test that all public API doc strings use the same standard template for
all common arguments such as tensor or dim
"""
from torch._torch_docs import __file__ as doc_file
from torch._torch_docs import multi_dim_common, single_dim_common, factory_common_args, factory_like_common_args
with open(doc_file, encoding="utf-8") as f:
doc_strs = f.read()
matches = re.findall(
r'add_docstr\(([^,]+?),[^"\']*?(?:"""|\'\'\')(.*?)(?:"""|\'\'\')(?:\.|,?[^,\)]*?\))',
doc_strs,
re.MULTILINE | re.DOTALL,
)
self.assertTrue(matches)
for m in matches:
func = m[0].strip()
desc = m[1].strip()
for common_args in [multi_dim_common, single_dim_common, factory_common_args, factory_like_common_args]:
for k, v in common_args.items():
self.assertNotIn(v, desc, f'The argument description "{v}" in {func} can be '
f'replaced by {{{k}}}')
def test_doc(self):
checked_types = (types.MethodType, types.FunctionType,
types.BuiltinFunctionType, types.BuiltinMethodType)
def _test_namespace(ns, *skips):
if isinstance(ns, object):
ns_name = ns.__class__.__name__
else:
ns_name = ns.__name__
skip_regexes = []
for r in skips:
if isinstance(r, str):
skip_regexes.append(re.compile(f'^{re.escape(r)}$'))
else:
skip_regexes.append(r)
for name in dir(ns):
if name.startswith('_'):
continue
if name in ['real', 'imag']:
y = torch.randn(1, dtype=torch.cfloat)
var = getattr(y, name)
elif name in ["H", "mT", "mH"]:
y = torch.randn(1, 1)
var = getattr(y, name)
else:
var = getattr(ns, name)
if not isinstance(var, checked_types):
continue
doc = var.__doc__
has_doc = doc is not None and len(doc.strip()) > 0
full_name = ns_name + '.' + name
if any(r.match(name) for r in skip_regexes):
self.assertFalse(has_doc,
f'New docs have been added for {full_name}, please remove '
'it from the skipped list in TestTorch.test_doc')
else:
self.assertTrue(has_doc, f'{full_name} is missing documentation')
test_namespace(torch.randn(1),
'as_strided_',
re.compile('^clamp_(min|max)_?$'),
'is_distributed',
'is_nonzero',
'is_same_size',
'log_softmax',
'map2_',
'new',
'reinforce',
'relu',
'relu_',
'prelu',
'resize',
'resize_as',
'softmax',
'split_with_sizes',
'unsafe_split_with_sizes',
'_autocast_to_fp16',
'_autocast_to_fp32',
)
test_namespace(torch.nn)
test_namespace(torch.nn.functional, 'assert_int_or_pair')
def test_tensor_ctor_scalar(self):
x = torch.Tensor(torch.tensor(1.0))
self.assertEqual(x, torch.tensor(1.0))
def test_deepcopy_gradient(self):
from copy import deepcopy
a = torch.zeros(10)
a.grad = torch.ones(10)
self.assertEqual(a.grad, deepcopy(a).grad)
s = torch.zeros(10).to_sparse()
s.grad = torch.ones(10).to_sparse()
self.assertEqual(s.grad, deepcopy(s).grad)
c = deepcopy([a, a.grad])
self.assertTrue(c[0].grad is c[1])
def test_tensor_base_init(self):
self.assertRaises(RuntimeError, lambda: torch._C.TensorBase())
with self.assertRaisesRegex(RuntimeError, "Cannot subclass"):
class Tfail(torch._C.TensorBase):
pass
class T(torch.Tensor):
pass
T()
def test_storage_base_init(self):
self.assertRaises(RuntimeError, lambda: torch._C.StorageBase())
class T(torch._C.StorageBase):
pass
T()
def test_tensor_base_new(self):
class TestTensor(torch.Tensor):
@staticmethod
def __new__(cls, x, *args, **kwargs):
return super().__new__(cls, x, *args, **kwargs)
x = torch.ones(5)
TestTensor(x)
def test_storage_base_new(self):
class TestStorage(torch._C.StorageBase):
@staticmethod
def __new__(cls, x, *args, **kwargs):
return super().__new__(cls, x, *args, **kwargs)
x = torch.UntypedStorage(5)
TestStorage(x)
def test_pyobj_preserved(self):
x = torch.empty(2)
x.foo = 2
y = torch.empty(2)
y.grad = x
del x
self.assertEqual(y.grad.foo, 2)
z = y.grad
del z
self.assertEqual(y.grad.foo, 2)
def test_subclass_preserved(self):
class MyTensor(torch.Tensor):
pass
x = MyTensor(torch.empty(2))
y = torch.empty(2)
y.grad = x
del x
self.assertEqual(type(y.grad), MyTensor)
z = y.grad
del z
self.assertEqual(type(y.grad), MyTensor)
@skipIfTorchDynamo("Tracker hook does not work in TorchDynamo")
def test_storage_dealloc(self):
m, t = Tracker.make()
s0 = torch.UntypedStorage(10)
s1 = s0
s0._tracker = t
del t
self.assertFalse(m[0])
del s0
self.assertFalse(m[0])
del s1
self.assertTrue(m[0])
@skipIfTorchDynamo("Tracker hook does not work in TorchDynamo")
def test_storage_from_tensor_dealloc(self):
m, t = Tracker.make()
a = torch.randn(10)
s0 = a.untyped_storage()
s0._tracker = t
del t
s1 = a.untyped_storage()
self.assertTrue(s0 is s1)
self.assertTrue(hasattr(s1, '_tracker'))
del a
self.assertFalse(m[0])
del s0
self.assertFalse(m[0])
del s1
self.assertTrue(m[0])
@skipIfTorchDynamo("Tracker hook does not work in TorchDynamo")
def test_storage_from_tensor_dealloc_zombie(self):
m, t = Tracker.make()
a = torch.randn(10)
s0 = a.untyped_storage()
s0._tracker = t
del t
s1 = a.untyped_storage()
self.assertTrue(s0 is s1)
self.assertTrue(hasattr(s1, '_tracker'))
self.assertFalse(m[0])
del s0
self.assertFalse(m[0])
del s1
self.assertFalse(m[0])
del a
self.assertTrue(m[0])
@skipIfTorchDynamo("Tracker hook does not work in TorchDynamo")
def test_storage_from_tensor_dealloc_resurrected(self):
m, t = Tracker.make()
a = torch.randn(10)
s0 = a.untyped_storage()
s0._tracker = t
del t
s1 = a.untyped_storage()
self.assertTrue(s0 is s1)
self.assertTrue(hasattr(s1, '_tracker'))
self.assertFalse(m[0])
del s0
self.assertFalse(m[0])
del s1
self.assertFalse(m[0])
s0 = a.untyped_storage()
self.assertTrue(isinstance(s0, torch.UntypedStorage))
del a
self.assertFalse(m[0])
del s0
self.assertTrue(m[0])
@skipIfTorchDynamo("Tracker hook does not work in TorchDynamo")
def test_storage_dealloc_resurrected(self):
m, t = Tracker.make()
s = torch.UntypedStorage(10)
s._tracker = t
del t
a = torch.tensor(s)
self.assertFalse(m[0])
del s
self.assertFalse(m[0])
s = a.untyped_storage()
self.assertTrue(isinstance(s, torch.UntypedStorage))
del a
self.assertFalse(m[0])
del s
self.assertTrue(m[0])
@skipIfTorchDynamo("Tracker hook does not work in TorchDynamo")
def test_storage_dealloc_subclass_zombie(self):
class MyStorage(torch.UntypedStorage):
finalized_count = 0
def __del__(self):
MyStorage.finalized_count += 1
m, t = Tracker.make()
s = MyStorage(10)
s._tracker = t
del t
a = torch.tensor(s)
self.assertFalse(m[0])
del s
self.assertEqual(MyStorage.finalized_count, 0)
self.assertFalse(m[0])
del a
self.assertEqual(MyStorage.finalized_count, 1)
self.assertTrue(m[0])
@skipIfTorchDynamo("Tracker hook does not work in TorchDynamo")
def test_storage_dealloc_subclass_resurrected(self):
class MyStorage(torch.UntypedStorage):
finalized_count = 0
def __del__(self):
MyStorage.finalized_count += 1
m, t = Tracker.make()
s = MyStorage(10)
s._tracker = t
del t
a = torch.tensor(s)
self.assertFalse(m[0])
del s
self.assertEqual(MyStorage.finalized_count, 0)
self.assertFalse(m[0])
s = a.untyped_storage()
del a
self.assertFalse(m[0])
self.assertEqual(MyStorage.finalized_count, 0)
self.assertTrue(isinstance(s, MyStorage))
del s
self.assertEqual(MyStorage.finalized_count, 1)
self.assertTrue(m[0])
def test_tensor_ressurecting_clear(self):
t = torch.rand(2, requires_grad=True).clone()
t.foo = 2
l = []
l.append(l)
l.append(t)
t2 = t ** 2
self.assertIs(t2.grad_fn._saved_self, t)
del t, l
gc.collect()
self.assertTrue(hasattr(t2.grad_fn._saved_self, "foo"))
def test_tensor_slot_dealloc(self):
class SlotTensor1(torch.Tensor):
__slots__ = ['slot1']
class SlotTensor2(SlotTensor1):
__slots__ = ['slot2']
m1, t1 = Tracker.make()
m2, t2 = Tracker.make()
slot_tensor = SlotTensor2(torch.empty(2))
slot_tensor.slot1 = t1
slot_tensor.slot2 = t2
del t1
del t2
self.assertFalse(m1[0])
self.assertFalse(m2[0])
del slot_tensor
self.assertTrue(m1[0])
self.assertTrue(m2[0])
def test_storage_slot_dealloc(self):
class SlotStorage1(torch._C.StorageBase):
__slots__ = ['slot1']
class SlotStorage2(SlotStorage1):
__slots__ = ['slot2']
m1, t1 = Tracker.make()
m2, t2 = Tracker.make()
slot_storage = SlotStorage2(torch.UntypedStorage(2))
slot_storage.slot1 = t1
slot_storage.slot2 = t2
del t1
del t2
self.assertFalse(m1[0])
self.assertFalse(m2[0])
del slot_storage
self.assertTrue(m1[0])
self.assertTrue(m2[0])
@skipIfTorchDynamo("Not a suitable test for TorchDynamo")
def test_tensor_dict_dealloc(self):
m, t = Tracker.make()
x = torch.empty(2)
x.arf = t
del t
self.assertFalse(m[0])
del x
self.assertTrue(m[0])
@skipIfTorchDynamo("Not a suitable test for TorchDynamo")
def test_storage_dict_dealloc(self):
m, t = Tracker.make()
x = torch.UntypedStorage(2)
x.arf = t
del t
self.assertFalse(m[0])
del x
self.assertTrue(m[0])
def test_tensor_finalizer_dealloc(self):
m = [False]
class FinalizerTensor(torch.Tensor):
def __del__(self):
m[0] = True
fin_tensor = FinalizerTensor(torch.empty(2))
self.assertFalse(m[0])
del fin_tensor
self.assertTrue(m[0])
def test_storage_finalizer_dealloc(self):
m = [False]
class FinalizerStorage(torch._C.StorageBase):
def __del__(self):
m[0] = True
fin_storage = FinalizerStorage(torch.UntypedStorage(2))
self.assertFalse(m[0])
del fin_storage
self.assertTrue(m[0])
@skipIfTorchDynamo("see pytorch torchdynamo issues 1993")
def test_tensor_weakref_dealloc(self):
x = torch.empty(2)
m = [False]
def cb(r):
m[0] = True
wref = weakref.ref(x, cb)
del x
self.assertTrue(m[0])
self.assertEqual(wref(), None)
@skipIfTorchDynamo("pytorch torchdynamo issues 1993")
def test_storage_weakref_dealloc(self):
x = torch.UntypedStorage(2)
m = [False]
def cb(r):
m[0] = True
wref = weakref.ref(x, cb)
del x
self.assertTrue(m[0])
self.assertEqual(wref(), None)
@skipIfTorchDynamo("Not a suitable test for TorchDynamo")
def test_tensor_cycle_via_dict(self):
m1, t1 = Tracker.make()
x = torch.empty(2)
x._tracker = t1
del t1
m2, t2 = Tracker.make()
y = torch.empty(2)
y._tracker = t2
del t2
x._loop = y
y._loop = x
z = torch.empty(2)
z.grad = x
del x
del y
gc.collect()
self.assertFalse(m1[0])
self.assertFalse(m2[0])
with disable_gc():
del z
self.assertFalse(m1[0])
self.assertFalse(m2[0])
gc.collect()
self.assertTrue(m1[0])
self.assertTrue(m2[0])
@skipIfTorchDynamo("Not a suitable test for TorchDynamo")
def test_storage_cycle_via_dict(self):
m1, t1 = Tracker.make()
x = torch.UntypedStorage(2)
x._tracker = t1
del t1
m2, t2 = Tracker.make()
y = torch.UntypedStorage(2)
y._tracker = t2
del t2
x._loop = y
y._loop = x
z = torch.UntypedStorage(2)
z.grad = x
del x
del y
gc.collect()
self.assertFalse(m1[0])
self.assertFalse(m2[0])
with disable_gc():
del z
self.assertFalse(m1[0])
self.assertFalse(m2[0])
gc.collect()
self.assertTrue(m1[0])
self.assertTrue(m2[0])
def test_tensor_cycle_via_slots(self):
m1 = [False]
m2 = [False]
class SlotTensor1(torch.Tensor):
__slots__ = ['slot1']
def __del__(self):
m1[0] = True
class SlotTensor2(SlotTensor1):
__slots__ = ['slot2']
def __del__(self):
m2[0] = True
x = SlotTensor1(torch.empty(2))
x_ref = weakref.ref(x)
y = SlotTensor2(torch.empty(2))
x.slot1 = y
y.slot2 = x
del x
with disable_gc():
del y
self.assertFalse(m1[0])
self.assertFalse(m2[0])
gc.collect()
self.assertTrue(m1[0])
self.assertTrue(m2[0])
self.assertIsNone(x_ref())
self.assertFalse(any(isinstance(o, SlotTensor1) for o in gc.get_objects()))
def test_storage_cycle_via_slots(self):
m1 = [False]
m2 = [False]
class SlotStorage1(torch._C.StorageBase):
__slots__ = ['slot1']
def __del__(self):
m1[0] = True
class SlotStorage2(SlotStorage1):
__slots__ = ['slot2']
def __del__(self):
m2[0] = True
x = SlotStorage1(torch.UntypedStorage(2))
y = SlotStorage2(torch.UntypedStorage(2))
x.slot1 = y
y.slot2 = x
del x
with disable_gc():
del y
self.assertFalse(m1[0])
self.assertFalse(m2[0])
gc.collect()
self.assertTrue(m1[0])
self.assertTrue(m2[0])
@skipIfTorchDynamo("Not a suitable test for TorchDynamo")
def test_storage_preserve_nonhermetic_in_hermetic_context(self):
from torch.library import Library, impl
global _my_storage
my_lib = Library("my_lib", "DEF")
my_lib.define('my_func() -> None')
a = torch.tensor([1.])
_my_storage = a.untyped_storage()
m, t = Tracker.make()
_my_storage._tracker = t
del t
@impl(my_lib, 'my_func', '')
def my_func():
global _my_storage
del _my_storage
self.assertFalse(m[0])
torch.ops.my_lib.my_func()
self.assertFalse(m[0])
s = a.untyped_storage()
del a
del s
self.assertTrue(m[0])
@skipIfTorchDynamo("TorchDynamo does not work well with hooks")
def test_backward_hooks_traverse(self):
m1, t1 = Tracker.make()
m2, t2 = Tracker.make()
x = torch.empty(2, requires_grad=True)
x._tracker = t1
y = torch.empty(2, requires_grad=True)
y._tracker = t2
del t1
del t2
x._backward_hooks = y
y._backward_hooks = x
del x
with disable_gc():
del y
self.assertFalse(m1[0])
self.assertFalse(m2[0])
gc.collect()
self.assertTrue(m1[0])
self.assertTrue(m2[0])
@skipIfTorchDynamo("see pytorch torchdynamo issue 1993")
def test_tensor_dead_weak_ref(self):
x = torch.empty(2)
w_x = weakref.ref(x)
y = torch.empty(2)
y.grad = x
del x
x = w_x()
del y
self.assertRaises(RuntimeError, lambda: x.sigmoid())
@skipIfTorchDynamo("pytorch/torchdynamo/issues/1993")
def test_storage_dead_weak_ref(self):
x = torch.UntypedStorage(2)
w_x = weakref.ref(x)
y = torch.tensor(x)
del x
x = w_x()
del y
self.assertRaisesRegex(RuntimeError, "Got a null Storage", lambda: x[0])
self.assertRaisesRegex(RuntimeError, "Got a null Storage", lambda: x.float())
def test_tensor_resurrected_weak_ref(self):
x = torch.empty(2)
w_x = weakref.ref(x)
y = torch.empty(2)
y.grad = x
del x
x = w_x()
x._fix_weakref()
del y
x.sigmoid()
def test_storage_resurrected_weak_ref(self):
x = torch.UntypedStorage(2)
w_x = weakref.ref(x)
y = torch.tensor(x)
del x
x = w_x()
x._fix_weakref()
del y
x.float()
@skipIfTorchDynamo("see pytorch torchdynamo issue 1993")
def test_tensor_fix_weakref_no_leak(self):
import weakref
called = False
a = torch.randn(1)
def callback(w):
nonlocal called
called = True
wa = weakref.ref(a, callback)
a._fix_weakref()
del a
self.assertTrue(called)
@skipIfTorchDynamo("pytorch torchdynamo issues 1993")
def test_storage_fix_weakref_no_leak(self):
import weakref
called = False
a = torch.UntypedStorage(1)
def callback(w):
nonlocal called
called = True
wa = weakref.ref(a, callback)
a._fix_weakref()
del a
self.assertTrue(called)
@torch.inference_mode()
def test_bmm_multithreaded(self):
device = 'cpu'
num_threads = torch.get_num_threads()
torch.set_num_threads(4)
batch_sizes = [1, 10]
M, N, O = 23, 8, 12
dtype = torch.float32
numpy_dtype = dtype
def invert_perm(p):
d = {x: i for i, x in enumerate(p)}
return (d[0], d[1], d[2])
def generate_inputs(num_batches):
for perm1, perm2 in itertools.product(itertools.permutations((0, 1, 2)), repeat=2):
b1 = make_tensor((num_batches, M, N), dtype=dtype, device=device, low=-1, high=1)
b2 = make_tensor((num_batches, N, O), dtype=dtype, device=device, low=-1, high=1)
b1 = b1.permute(perm1).contiguous().permute(invert_perm(perm1))
b2 = b2.permute(perm2).contiguous().permute(invert_perm(perm2))
yield b1, b2
for b1, b2, b3, b4, b5, b6 in itertools.product((True, False), repeat=6):
shape1 = (num_batches if b1 else 1, M if b2 else 1, N if b3 else 1)
shape2 = (num_batches if b4 else 1, N if b5 else 1, O if b6 else 1)
b1 = make_tensor(shape1, dtype=dtype, device=device, low=-1, high=1).expand(num_batches, M, N)
b2 = make_tensor(shape2, dtype=dtype, device=device, low=-1, high=1).expand(num_batches, N, O)
yield b1, b2
for z1, z2, z3, z4 in itertools.product((True, False), repeat=4):
shape1 = (num_batches if z1 else 0, M if z2 else 0, N if z3 else 0)
shape2 = (num_batches if z1 else 0, N if z3 else 0, O if z4 else 0)
b1 = torch.randn(shape1, dtype=dtype, device=device)
b2 = torch.randn(shape2, dtype=dtype, device=device)
yield b1, b2
try:
for num_batches in batch_sizes:
for (b1, b2), perm3 in itertools.product(generate_inputs(num_batches), itertools.permutations((0, 1, 2))):
res1 = torch.bmm(b1, b2)
res2 = torch.full((num_batches, M, O), math.nan, dtype=dtype, device=device) \
.permute(perm3).contiguous().permute(invert_perm(perm3))
torch.bmm(b1, b2, out=res2)
expect = torch.from_numpy(
b1.to(numpy_dtype).cpu().numpy() @ b2.to(numpy_dtype).cpu().numpy()).to(device=device, dtype=dtype)
self.assertEqual(expect, res1)
self.assertEqual(expect, res2)
finally:
torch.set_num_threads(num_threads)
def test_conj_neg_tolist(self):
x = torch.randn(2, dtype=torch.cfloat)
y1 = x.conj()
y1_expect = x.conj_physical()
y2 = y1.imag
self.assertEqual(y1, y1_expect.tolist())
self.assertEqual(y2, y1_expect.imag.tolist())
def test_no_NPU_monkeypatch(self):
with self.assertRaisesRegex(RuntimeError, "Tried to instantiate dummy base class Stream"):
torch_npu.npu.Stream()
with self.assertRaisesRegex(RuntimeError, "Tried to instantiate dummy base class Event"):
torch_npu.npu.Event()
with self.assertRaisesRegex(RuntimeError, "Tried to instantiate dummy base class NPUGraph"):
torch_npu.npu.graphs.NPUGraph()
def test_tensor_where_scalar(self):
a = torch.arange(4.0)
not_zero = 0.001
b = torch.where(a != 0, a, not_zero)
c = a.where(a != 0, not_zero)
self.assertEqual(b, c)
def test_data_ptr_of_empty_tensor_with_storage(self):
t = torch.empty((2, 2))
self.assertNotEqual(t.data_ptr(), 0)
t.resize_((0, 2))
self.assertEqual(t.data_ptr(), 0)
def test_data_ptr_of_empty_view_with_storage(self):
t = torch.empty((2, 2))
self.assertNotEqual(t.data_ptr(), 0)
t2 = t[0:0].view(0, 1)
self.assertEqual(t2.data_ptr(), 0)
def test_size_stride(self) -> None:
t = torch.rand(2, 3, dtype=torch.float32)
self.assertEqual(t.size(0), 2)
self.assertEqual(t.size(dim=None), torch.Size([2, 3]))
self.assertEqual(t.stride(dim=None), torch.Size([3, 1]))
self.assertEqual(t.t().stride(), torch.Size([1, 3]))
def test_invalid_arg_error_handling(self) -> None:
""" Tests that errors from old TH functions are propagated back """
for invalid_val in [-1, 2 ** 65]:
self.assertRaises((ValueError, RuntimeError), lambda: torch.set_num_threads(invalid_val))
self.assertRaises((ValueError, RuntimeError), lambda: torch.set_num_interop_threads(invalid_val))
def _get_tensor_prop(self, t):
preserved = (
id(t),
0 if TEST_WITH_TORCHDYNAMO else sys.getrefcount(t),
)
slotnames = copyreg._slotnames(t.__class__)
moved = (
slotnames,
id(t.__dict__),
tuple(t.__dict__.keys()),
[getattr(t, name, None) for name in slotnames]
)
return preserved, moved
def _checked_swap(self, t1, t2):
t1_pres, t1_moved = self._get_tensor_prop(t1)
t2_pres, t2_moved = self._get_tensor_prop(t2)
torch.utils.swap_tensors(t1, t2)
new_t1_pres, new_t1_moved = self._get_tensor_prop(t1)
new_t2_pres, new_t2_moved = self._get_tensor_prop(t2)
self.assertEqual(t1_pres, new_t1_pres)
self.assertEqual(t2_pres, new_t2_pres)
self.assertEqual(t1_moved, new_t2_moved)
self.assertEqual(t2_moved, new_t1_moved)
self.assertEqual(id(t1.fill_(0.5)), id(t1))
self.assertEqual(id(t2.fill_(0.5)), id(t2))
@unittest.skipIf(TEST_WITH_TORCHDYNAMO, "Dynamo adds weakrefs")
def test_swap_basic(self):
ts = [
torch.rand(2),
torch.rand(3, 3),
torch.empty(3, dtype=torch.int),
TwoTensor(torch.rand(4), torch.rand(4))
]
for t1, t2 in itertools.combinations(ts, 2):
t1 = t1.clone()
t2 = t2.clone()
t2.foo = "bar"
holder = []
holder.append(t1)
self._checked_swap(t1, t2)
self.assertIs(holder[0], t1)
self.assertEqual(t1.foo, "bar")
if t1.is_floating_point():
t3 = t1.detach().clone().requires_grad_(True)
out = t3 * 2
torch.utils.swap_tensors(t3, t2)
with self.assertRaisesRegex(RuntimeError, "AccumulateGrad node that was poisoned by swap_tensors"):
out.sum().backward()
wr = weakref.ref(t1)
with self.assertRaisesRegex(RuntimeError, "has weakref"):
torch.utils.swap_tensors(t1, t2)
@unittest.skipIf(TEST_WITH_TORCHDYNAMO, "Dynamo adds weakrefs")
def test_swap_fail_slots(self):
class MyTwoTensor(TwoTensor):
__slots__ = ("a", "b")
class MyTwoTensor2(TwoTensor):
__slots__ = ("b", "a")
class MyTwoTensor3(TwoTensor):
__slots__ = ("a", "b", "c", "d")
class MyTwoTensor4(TwoTensor):
__slots__ = ("a", "c")
t1 = torch.rand(4)
t2 = TwoTensor(torch.rand(4), torch.rand(4))
t3 = MyTwoTensor(torch.rand(4), torch.rand(4))
t4 = MyTwoTensor(torch.rand(4), torch.rand(4))
t5 = MyTwoTensor2(torch.rand(4), torch.rand(4))
t6 = MyTwoTensor3(torch.rand(4), torch.rand(4))
t7 = MyTwoTensor3(torch.rand(4), torch.rand(4))
t8 = MyTwoTensor4(torch.rand(4), torch.rand(4))
self._checked_swap(t1, t2)
with self.assertRaisesRegex(RuntimeError, "Cannot swap t1 and t2 if they have different slots"):
torch.utils.swap_tensors(t1, t3)
with self.assertRaisesRegex(RuntimeError, "Cannot swap t1 and t2 if they have different slots"):
torch.utils.swap_tensors(t2, t3)
with self.assertRaisesRegex(RuntimeError, "Cannot swap t1 and t2 if they have different slots"):
torch.utils.swap_tensors(t2, t8)
self._checked_swap(t3, t4)
self._checked_swap(t3, t5)
with self.assertRaisesRegex(RuntimeError, "Cannot swap t1 and t2 if they have different slots"):
torch.utils.swap_tensors(t3, t6)
t3.c = "foo"
t4.d = "bar"
self._checked_swap(t3, t4)
self.assertEqual(t4.c, "foo")
self.assertEqual(t3.d, "bar")
t6.c = "cat"
t7.d = "dog"
self._checked_swap(t6, t7)
@unittest.skipIf(torch.npu.is_available(), "Test specific for CPU")
def test_bf16_supported_on_cpu(self):
self.assertFalse(torch.npu.is_bf16_supported())
def test_tensor_with_grad_to_scalar_warning(self) -> None:
with (warnings.catch_warnings(record=True) as w,
set_warn_always_context(True)):
warnings.simplefilter("always")
x = torch.tensor(2.0, requires_grad=True)
math.pow(x, 3)
self.assertEqual(len(w), 1)
self.assertTrue(issubclass(w[0].category, UserWarning))
self.assertIn(
"Converting a tensor with requires_grad=True to a scalar may lead to unexpected behavior.",
str(w[0].message)
)
def test_tensor_item_no_warning(self):
with (warnings.catch_warnings(record=True) as w,
set_warn_always_context(True)):
warnings.simplefilter("always")
x = torch.tensor(2.0, requires_grad=True)
max(x, 3)
x.item()
self.assertEqual(len(w), 0)
METHOD = 1
INPLACE_METHOD = 2
FUNCTIONAL = 4
DIM_ARG: None = None
def make_neg_dim_test(name, tensor_arg, arg_constr, types_, extra_dim=0):
def neg_dim_test(self):
if isinstance(tensor_arg, list):
assert METHOD not in types_ and INPLACE_METHOD not in types_
x = [torch.randn(arg) for arg in tensor_arg]
ndim = len(tensor_arg[-1])
else:
x = torch.randn(*tensor_arg)
ndim = len(tensor_arg)
ndim += extra_dim
n_dim_to_test = sum(e is DIM_ARG for e in arg_constr())
for dims_val in combinations(range(ndim), n_dim_to_test):
arg = arg_constr()
arg_neg = copy.deepcopy(arg)
idx = 0
for i, v in enumerate(arg):
if v is DIM_ARG:
arg[i] = dims_val[idx]
arg_neg[i] = dims_val[idx] - ndim
idx += 1
if METHOD in types_:
a = getattr(x, name)(*arg)
b = getattr(x, name)(*arg_neg)
self.assertEqual(a, b)
if INPLACE_METHOD in types_:
a = x.clone()
getattr(a, name + '_')(*arg)
b = x.clone()
getattr(b, name + '_')(*arg_neg)
self.assertEqual(a, b)
if FUNCTIONAL in types_:
a = getattr(torch, name)(x, *arg)
b = getattr(torch, name)(x, *arg_neg)
self.assertEqual(a, b)
return neg_dim_test
def idx_tensor(size, max_val):
return torch.LongTensor(*size).random_(0, max_val - 1)
def add_neg_dim_tests():
neg_dim_tests = [
('narrow', (10, 20, 30), lambda: [DIM_ARG, 0, 5], [METHOD]),
('transpose', (10, 20, 30), lambda: [DIM_ARG, DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL]),
('size', (10, 20, 30), lambda: [DIM_ARG], [METHOD]),
('cat', [(2, 3, 4), (2, 3, 4)], lambda: [DIM_ARG], [FUNCTIONAL]),
('chunk', (10, 20, 30), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]),
('gather', (10, 20), lambda: [DIM_ARG, idx_tensor((10, 20), 10)], [METHOD, FUNCTIONAL]),
('index_select', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10)], [METHOD, FUNCTIONAL]),
('split', (10, 20), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]),
('squeeze', (10, 1, 20, 1), lambda: [DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL]),
('unbind', (2, 3, 4), lambda: [DIM_ARG], [FUNCTIONAL]),
('unsqueeze', (10, 20), lambda: [DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL], 1),
('logcumsumexp', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('cumprod', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('cumsum', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('cummax', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('cummin', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('mean', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('median', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('nanmedian', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('mode', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('norm', (10, 20), lambda: [2, DIM_ARG], [METHOD, FUNCTIONAL]),
('prod', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('std', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('sum', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('var', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('kthvalue', (10, 20), lambda: [3, DIM_ARG], [METHOD, FUNCTIONAL]),
('max', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('min', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('sort', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]),
('topk', (10, 20), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]),
('renorm', (10, 20), lambda: [2, DIM_ARG, 1], [METHOD, INPLACE_METHOD, FUNCTIONAL]),
('index_add', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), torch.randn(10, 10)], [INPLACE_METHOD]),
('index_copy', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), torch.randn(10, 10)], [INPLACE_METHOD]),
('index_fill', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), 12], [INPLACE_METHOD]),
('scatter', (10, 10), lambda: [DIM_ARG, idx_tensor((10, 10), 10), torch.randn(10, 10)], [INPLACE_METHOD]),
('select', (10, 20), lambda: [DIM_ARG, 3], [METHOD]),
('unfold', (10, 20), lambda: [DIM_ARG, 5, 2], [METHOD]),
]
for decl in neg_dim_tests:
if len(decl) == 4:
name, tensor_arg, arg_constr, types_ = decl
extra_dim = 0
elif len(decl) == 5:
name, tensor_arg, arg_constr, types_, extra_dim = decl
test_name = 'test_' + name + '_neg_dim'
assert not hasattr(TestTorch, test_name), "Duplicated test name: " + test_name
setattr(TestTorch, test_name, make_neg_dim_test(name, tensor_arg, arg_constr, types_, extra_dim))
class TestViewOps(TestCase):
pass
class TestTensorDeviceOps(TestCase):
pass
add_neg_dim_tests()
instantiate_device_type_tests(TestViewOps, globals(), only_for='privateuse1')
instantiate_device_type_tests(TestVitalSignsNpu, globals(), only_for='privateuse1')
instantiate_device_type_tests(TestTensorDeviceOps, globals(), only_for='privateuse1')
instantiate_device_type_tests(TestTorchDeviceType, globals(), only_for='privateuse1')
instantiate_device_type_tests(TestDevicePrecision, globals(), only_for='privateuse1')
if __name__ == '__main__':
torch.npu.config.allow_internal_format = False
torch.npu.set_compile_mode(jit_compile=False)
TestCase._default_dtype_check_enabled = True
run_tests()