import sys
import time
import numpy as np
import openvino as ov

infer_cnt = 1000
core = ov.Core()

model = core.read_model(model=sys.argv[1])
model = core.compile_model(model, "NPU", config={"NPU_USE_NPUW":"YES"})

input = np.ones((1, 3, 224, 224), dtype=np.float32)

request = model.create_infer_request()
request.start_async(input)
request.wait()

print("inference start", file=sys.stderr, flush=True)

request.start_async(input)
request.wait()

print("inference done", file=sys.stderr, flush=True)

output = request.get_output_tensor().data
predicted_idx = np.argmax(output)
print(f'predicted class: {predicted_idx}', file=sys.stderr, flush=True)

print("warmup...", file=sys.stderr, flush=True)
for _ in range(infer_cnt):
    request.start_async(input)
    request.wait()

print("benchmark...", file=sys.stderr, flush=True)
start = time.time()
for _ in range(infer_cnt):
    request.start_async(input)
    request.wait()
end = time.time()
print(f"inference time: {(end - start) * 1000 / infer_cnt:.2f} ms", file=sys.stderr, flush=True)

output = request.get_output_tensor().data
predicted_idx = np.argmax(output)
print(f'predicted class: {predicted_idx}', file=sys.stderr, flush=True)