import os
import time
import argparse
import torch
import numpy as np
from openmind import pipeline, is_torch_npu_available
from openmind import AutoTokenizer, AutoModelForCausalLM
from openmind_hub import snapshot_download
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name_or_path",
"-m",
type=str,
help="Path to model",
default=None,
)
args = parser.parse_args()
return args
def model_npu_inference(model_path: str):
if is_torch_npu_available():
print("NPU available, use device_map='auto'.")
device_map = "auto"
else:
print("NPU not available, use device_map='cpu'.")
device_map = "cpu"
try:
task_pipeline = pipeline(
task="text-generation",
model=model_path,
device_map=device_map,
framework="pt",
truncation=True
)
abs_model_path = os.path.abspath(model_path)
model_name = os.path.basename(abs_model_path)
prompt = [
"""Classify the text into neutral, negative or positive.
Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
Sentiment:
"""
]
inference_times = []
num_runs = 10
print(f"\n=== NPU {model_name} 性能测试 ===")
for _ in range(num_runs):
input_text = prompt[_ % len(prompt)]
start_time = time.time()
results = task_pipeline(input_text, max_new_tokens=50)
torch.npu.synchronize()
inference_time = time.time() - start_time
inference_times.append(inference_time)
if _ == 0:
print(f"输入文本: {input_text}")
print("生成结果:")
print(f" {results[0]['generated_text']}")
avg_time = np.mean(inference_times)
std_time = np.std(inference_times)
print("\n性能分析:")
print(f"NPU平均推理时间: {avg_time:.4f} 秒")
print(f"NPU推理时间标准差: {std_time:.4f} 秒")
print("推理时间列表:", inference_times)
except Exception as e:
print(f"NPU 推理发生错误: {e}")
def main():
args = parse_args()
model_path = args.model_name_or_path
model_npu_inference(model_path)
if __name__ == "__main__":
main()