solar-merge-v1.0/examples/inference.py-代码预览-solar-merge-v1.0:基于 MoE 技术的混合专家模型项目 - AtomGit

HhuangjingwangUpload folder using openMind hub
08fad021创建于 2024年11月28日历史提交
import os
import time
import argparse
import torch
import numpy as np
from openmind import pipeline, is_torch_npu_available
from openmind import AutoTokenizer, AutoModelForCausalLM
from openmind_hub import snapshot_download

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name_or_path",
        "-m",
        type=str,
        help="Path to model",
        default=None,
    )
    args = parser.parse_args()
    return args

def model_npu_inference(model_path: str):
    # 确保使用 NPU 设备
    if is_torch_npu_available():
        print("NPU available, use device_map='auto'.")
        device_map = "auto"
    else:
        print("NPU not available, use device_map='cpu'.")
        device_map = "cpu"

        # 创建 Text Generation pipeline，指定 NPU 设备
    try:
        task_pipeline = pipeline(
            task="text-generation",
            model=model_path,
            device_map=device_map,
            framework="pt",
            truncation=True
        )

        abs_model_path = os.path.abspath(model_path)
        model_name = os.path.basename(abs_model_path)
        
        # 定义推理样例
        prompt = [
            """Classify the text into neutral, negative or positive. 
Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
Sentiment:
"""
        ]

        # 推理性能测试
        inference_times = []
        num_runs = 10

        print(f"\n=== NPU {model_name} 性能测试 ===")

        for _ in range(num_runs):
            # 随机选择输入文本
            input_text = prompt[_ % len(prompt)]

            # 性能计时
            start_time = time.time()
            results = task_pipeline(input_text, max_new_tokens=50)
            torch.npu.synchronize()

            inference_time = time.time() - start_time
            inference_times.append(inference_time)

            # 打印第一次推理的详细结果
            if _ == 0:
                print(f"输入文本: {input_text}")
                print("生成结果：")
                print(f"  {results[0]['generated_text']}")

                # 计算性能统计
        avg_time = np.mean(inference_times)
        std_time = np.std(inference_times)

        print("\n性能分析:")
        print(f"NPU平均推理时间: {avg_time:.4f} 秒")
        print(f"NPU推理时间标准差: {std_time:.4f} 秒")
        print("推理时间列表:", inference_times)

    except Exception as e:
        print(f"NPU 推理发生错误: {e}")

def main():
    # 解析命令行参数
    # 参数：--model_name_or_path
    args = parse_args()
    model_path = args.model_name_or_path
    model_npu_inference(model_path)


if __name__ == "__main__":
    main()