import sys
import torch
from torch.testing._internal.common_utils import NoTest, run_tests, TestCase
if not torch.accelerator.is_available():
print("No available accelerator detected, skipping tests", file=sys.stderr)
TestCase = NoTest
sys.exit()
class TestAccelerator(TestCase):
def test_current_accelerator(self):
self.assertTrue(torch.accelerator.is_available())
accelerators = ["cuda", "xpu", "mps", "npu"]
for accelerator in accelerators:
if torch.get_device_module(accelerator).is_available():
self.assertEqual(
torch.accelerator.current_accelerator().type, accelerator
)
self.assertIsNone(torch.accelerator.current_accelerator().index)
with self.assertRaisesRegex(
ValueError, "doesn't match the current accelerator"
):
torch.accelerator.set_device_index("cpu")
def test_generic_stream_behavior(self):
s1 = torch.Stream()
s2 = torch.Stream()
torch.accelerator.set_stream(s1)
self.assertEqual(torch.accelerator.current_stream(), s1)
event = torch.Event()
a = torch.randn(1000)
b = torch.randn(1000)
c = a + b
torch.accelerator.set_stream(s2)
self.assertEqual(torch.accelerator.current_stream(), s2)
a_acc = a.to(torch.accelerator.current_accelerator(), non_blocking=True)
b_acc = b.to(torch.accelerator.current_accelerator(), non_blocking=True)
torch.accelerator.set_stream(s1)
self.assertEqual(torch.accelerator.current_stream(), s1)
event.record(s2)
event.synchronize()
c_acc = a_acc + b_acc
event.record(s2)
torch.accelerator.synchronize()
self.assertTrue(event.query())
self.assertEqual(c_acc.cpu(), c)
def test_current_stream_query(self):
s = torch.accelerator.current_stream()
self.assertEqual(torch.accelerator.current_stream(s.device), s)
self.assertEqual(torch.accelerator.current_stream(s.device.index), s)
self.assertEqual(torch.accelerator.current_stream(str(s.device)), s)
other_device = torch.device("cpu")
with self.assertRaisesRegex(
ValueError, "doesn't match the current accelerator"
):
torch.accelerator.current_stream(other_device)
def test_stream_context_manager(self):
prev_stream = torch.accelerator.current_stream()
with torch.Stream() as s:
self.assertEqual(torch.accelerator.current_stream(), s)
self.assertEqual(torch.accelerator.current_stream(), prev_stream)
def test_pin_memory_on_non_blocking_copy(self):
t_acc = torch.randn(100).to(torch.accelerator.current_accelerator())
t_host = t_acc.to("cpu", non_blocking=True)
torch.accelerator.synchronize()
self.assertTrue(t_host.is_pinned())
self.assertEqual(t_acc.cpu(), t_host)
def test_generic_event_behavior(self):
event1 = torch.Event(enable_timing=False)
event2 = torch.Event(enable_timing=False)
with self.assertRaisesRegex(
ValueError,
"Both events must be created with argument 'enable_timing=True'",
):
event1.elapsed_time(event2)
event1 = torch.Event(enable_timing=True)
event2 = torch.Event(enable_timing=True)
with self.assertRaisesRegex(
ValueError,
"Both events must be recorded before calculating elapsed time",
):
event1.elapsed_time(event2)
event1 = torch.Event()
event2 = torch.Event()
with self.assertRaisesRegex(
ValueError,
"Both events must be created with argument 'enable_timing=True'",
):
event1.elapsed_time(event2)
def test_event_elapsed_time(self):
start_event = torch.Event(enable_timing=True)
end_event = torch.Event(enable_timing=True)
start_event.record()
x = torch.randn(1000, 1000, device='npu')
y = torch.randn(1000, 1000, device='npu')
z = torch.matmul(x, y)
end_event.record()
torch.npu.synchronize()
ms = start_event.elapsed_time(end_event)
self.assertGreater(ms, 0)
if __name__ == "__main__":
run_tests()