"""pytorch_prof.py
"""
import torch
import torch.optim as optim
import torch.nn as nn
import time
import argparse
from models import networks
def build_model():
model = networks.define_G(3, 3, 64, 'unet_256', 'instance',
True, 'normal', 0.02, '[0]')
return model
def get_raw_data():
input_tensor = torch.randn(1, 3, 256, 256)
return input_tensor
def criterion(x):
base_func = nn.CrossEntropyLoss()
shape_list = x.shape
N = shape_list[0]
R = 1
if len(shape_list) > 1:
for r in shape_list[1:]:
R *= r
T = torch.randint(0,R, size=(N,)).to(x.device)
if str(T.device).startswith('npu'):
T = T.int()
return base_func(x.reshape(N, -1), T)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PyTorch Prof')
parser.add_argument('--device', type=str, default='cpu',
help='set which type of device used. Support cuda:0(device_id), npu:0(device_id).')
parser.add_argument('--amp', default=False, action='store_true',
help='use amp during prof')
parser.add_argument('--loss-scale', default=64.0, type=float,
help='loss scale using in amp, default 64.0, -1 means dynamic')
parser.add_argument('--opt-level', default='O2', type=str,
help='opt-level using in amp, default O2')
parser.add_argument('--FusedSGD', default=False, action='store_true',
help='use FusedSGD during prof')
args = parser.parse_args()
if args.device.startswith('cuda'):
torch.cuda.set_device(args.device)
prof_kwargs = {'use_cuda': True}
elif args.device.startswith('npu'):
torch.npu.set_device(args.device)
prof_kwargs = {'use_npu': True}
else:
prof_kwargs = {}
model = build_model()
if args.FusedSGD:
from apex.optimizers import NpuFusedSGD
optimizer = NpuFusedSGD(model.parameters(), lr=0.01)
model = model.to(args.device)
if args.amp:
from apex import amp
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level,
loss_scale=None if args.loss_scale == -1 else args.loss_scale,
combine_grad=True)
else:
optimizer = optim.SGD(model.parameters(), lr=0.01)
model = model.to(args.device)
if args.amp:
from apex import amp
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level,
loss_scale=None if args.loss_scale == -1 else args.loss_scale)
input_tensor = get_raw_data()
input_tensor = input_tensor.to(args.device)
def run():
output_tensor = model(input_tensor)
loss = criterion(output_tensor)
optimizer.zero_grad()
if args.amp:
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
else:
loss.backward()
optimizer.step()
return loss
for i in range(5):
start_time = time.time()
loss = run()
print('iter: %d, loss: %.2f, time: %.2f'%(i, loss, (time.time() - start_time)*1000))
with torch.autograd.profiler.profile(**prof_kwargs) as prof:
run()
print(prof.key_averages().table())
prof.export_chrome_trace("pytorch_prof_%s.prof" % args.device)