MindIE-LLM/tests/dlt/ut/llm_manager/test_llm_manager_response_stub.cpp-代码预览-MindIE-LLM:基于昇腾硬件的大语言模型推理加速套件 - AtomGit

40fb0630创建于 2025年12月22日历史提交
/**
 * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
 * MindIE is licensed under Mulan PSL v2.
 * You can use this software according to the terms and conditions of the Mulan PSL v2.
 * You may obtain a copy of Mulan PSL v2 at:
 *          http://license.coscl.org.cn/MulanPSL2
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 * See the Mulan PSL v2 for more details.
 */

#include "test_llm_manager_adapter.h"
#include "memory_utils.h"

namespace mindie_llm {

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubInputIds()
{
    std::vector<std::shared_ptr<InferRequest>> requests;

    InferRequestId requestId("req_1");
    auto req = std::make_shared<InferRequest>(requestId);

    std::vector<int64_t> inferTokens = {1, 2, 3};
    std::vector<int64_t> shape = {1, static_cast<int64_t>(inferTokens.size())};
    auto tensor = std::make_shared<InferTensor>("INPUT_IDS", InferDataType::TYPE_INT64, shape);
    tensor->Allocate(inferTokens.size() * sizeof(int64_t));
    std::copy(inferTokens.begin(), inferTokens.end(), static_cast<int64_t *>(tensor->data));
    req->AddTensor("INPUT_IDS", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubLoraId()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    auto req = std::make_shared<InferRequest>(requestId);
    std::string loraId = "test_lora_123";
    std::vector<int64_t> shape = {1, static_cast<int64_t>(loraId.size())};
    auto tensor = std::make_shared<InferTensor>("LORA_ID", InferDataType::TYPE_STRING, shape);
    tensor->Allocate((loraId.size() + 1) * sizeof(char));
    auto *buffer = static_cast<char *>(tensor->data);
    auto ret = strcpy_s(buffer, loraId.size() + 1, loraId.c_str());
    if (ret != 0) {
        std::cout << "Failed to strcpy_s data for lora ids" << std::endl;
        return requests;
    }
    req->AddTensor("LORA_ID", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubIgnoreEos()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    bool ignoreEos = true;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("IGNORE_EOS", InferDataType::TYPE_BOOL, shape);
    tensor->Allocate(paramNum * sizeof(bool));
    auto *buffer = static_cast<bool *>(tensor->data);
    buffer[0u] = ignoreEos;
    req->AddTensor("IGNORE_EOS", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubStopStrings()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    auto req = std::make_shared<InferRequest>(requestId);
    std::string stopStrings = "test_stopStrings_123";
    std::vector<int64_t> shape = {1, static_cast<int64_t>(stopStrings.size())};
    auto tensor = std::make_shared<InferTensor>("STOP_STRINGS", InferDataType::TYPE_STRING, shape);
    tensor->Allocate((stopStrings.size() + 1) * sizeof(char));
    auto *buffer = static_cast<char *>(tensor->data);
    auto ret = strcpy_s(buffer, stopStrings.size() + 1, stopStrings.c_str());
    if (ret != 0) {
        std::cout << "Failed to strcpy_s data for STOP_STRINGS" << std::endl;
        return requests;
    }
    req->AddTensor("STOP_STRINGS", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubLogProbs()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    bool logProbs = true;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("LOGPROBS", InferDataType::TYPE_BOOL, shape);
    tensor->Allocate(paramNum * sizeof(bool));
    auto *buffer = static_cast<bool *>(tensor->data);
    buffer[0u] = logProbs;
    req->AddTensor("LOGPROBS", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubTopLogProbs()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    uint32_t topLogProbs = 123;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("TOP_LOGPROBS", InferDataType::TYPE_UINT32, shape);
    tensor->Allocate(paramNum * sizeof(uint32_t));
    auto *buffer = static_cast<uint32_t *>(tensor->data);
    buffer[0u] = topLogProbs;
    req->AddTensor("TOP_LOGPROBS", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubTemperature()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    float temperature = 123.123f;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("TEMPERATURE", InferDataType::TYPE_FP32, shape);
    tensor->Allocate(paramNum * sizeof(float));
    auto *buffer = static_cast<float *>(tensor->data);
    buffer[0u] = temperature;
    req->AddTensor("TEMPERATURE", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubTopK()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    int32_t topK = 12;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("TOP_K", InferDataType::TYPE_INT32, shape);
    tensor->Allocate(paramNum * sizeof(int32_t));
    auto *buffer = static_cast<int32_t *>(tensor->data);
    buffer[0u] = topK;
    req->AddTensor("TOP_K", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubTopP()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    float topP = 12.12f;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("TOP_P", InferDataType::TYPE_FP32, shape);
    tensor->Allocate(paramNum * sizeof(float));
    auto *buffer = static_cast<float *>(tensor->data);
    buffer[0u] = topP;
    req->AddTensor("TOP_P", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubTypicalP()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    float typicalP = 1.1f;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("TYPICAL_P", InferDataType::TYPE_FP32, shape);
    tensor->Allocate(paramNum * sizeof(float));
    auto *buffer = static_cast<float *>(tensor->data);
    buffer[0u] = typicalP;
    req->AddTensor("TYPICAL_P", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubDoSample()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    bool doSample = true;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("DO_SAMPLE", InferDataType::TYPE_BOOL, shape);
    tensor->Allocate(paramNum * sizeof(bool));
    auto *buffer = static_cast<bool *>(tensor->data);
    buffer[0u] = doSample;
    req->AddTensor("DO_SAMPLE", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubSeed()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    uint64_t seed = 123;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("SEED", InferDataType::TYPE_UINT64, shape);
    tensor->Allocate(paramNum * sizeof(uint64_t));
    auto *buffer = static_cast<uint64_t *>(tensor->data);
    buffer[0u] = seed;
    req->AddTensor("SEED", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubRepetitionPenalty()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    float repetitionPenalty = 1.2f;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("REPETITION_PENALTY", InferDataType::TYPE_FP32, shape);
    tensor->Allocate(paramNum * sizeof(float));
    auto *buffer = static_cast<float *>(tensor->data);
    buffer[0u] = repetitionPenalty;
    req->AddTensor("REPETITION_PENALTY", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubFrequencyPenalty()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    float frequencyPenalty = 1.3f;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("FREQUENCY_PENALTY", InferDataType::TYPE_FP32, shape);
    tensor->Allocate(paramNum * sizeof(float));
    auto *buffer = static_cast<float *>(tensor->data);
    buffer[0u] = frequencyPenalty;
    req->AddTensor("FREQUENCY_PENALTY", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubPresencyPenalty()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    float presencyPenalty = 1.4f;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("PRESENCE_PENALTY", InferDataType::TYPE_FP32, shape);
    tensor->Allocate(paramNum * sizeof(float));
    auto *buffer = static_cast<float *>(tensor->data);
    buffer[0u] = presencyPenalty;
    req->AddTensor("PRESENCE_PENALTY", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubIncludeStopStrInOutput()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    bool includeStopStrInOutput = true;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("INCLUDE_STOP_STR_IN_OUTPUT", InferDataType::TYPE_BOOL, shape);
    tensor->Allocate(paramNum * sizeof(bool));
    auto *buffer = static_cast<bool *>(tensor->data);
    buffer[0u] = includeStopStrInOutput;
    req->AddTensor("INCLUDE_STOP_STR_IN_OUTPUT", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubWatermark()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    bool watermark = true;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("WATERMARK", InferDataType::TYPE_BOOL, shape);
    tensor->Allocate(paramNum * sizeof(bool));
    auto *buffer = static_cast<bool *>(tensor->data);
    buffer[0u] = watermark;
    req->AddTensor("WATERMARK", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubN()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    uint32_t n = 1234;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("N", InferDataType::TYPE_UINT32, shape);
    tensor->Allocate(paramNum * sizeof(uint32_t));
    auto *buffer = static_cast<uint32_t *>(tensor->data);
    buffer[0u] = n;
    req->AddTensor("N", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubBestOf()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    uint32_t bestOf = 1234;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("BEST_OF", InferDataType::TYPE_UINT32, shape);
    tensor->Allocate(paramNum * sizeof(uint32_t));
    auto *buffer = static_cast<uint32_t *>(tensor->data);
    buffer[0u] = bestOf;
    req->AddTensor("BEST_OF", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubUseBeamSearch()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    bool useBeamSearch = true;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("USE_BEAM_SEARCH", InferDataType::TYPE_BOOL, shape);
    tensor->Allocate(paramNum * sizeof(bool));
    auto *buffer = static_cast<bool *>(tensor->data);
    buffer[0u] = useBeamSearch;
    req->AddTensor("USE_BEAM_SEARCH", tensor);

    requests.emplace_back(req);
    return requests;
}

std::vector<std::shared_ptr<InferRequest>> GetRequestsStubLengthPenalty()
{
    std::vector<std::shared_ptr<InferRequest>> requests;
    InferRequestId requestId("req_1");
    const int64_t paramNum = 1;
    auto req = std::make_shared<InferRequest>(requestId);
    float lengthPenalty = 1.5f;
    std::vector<int64_t> shape = {1, paramNum};
    auto tensor = std::make_shared<InferTensor>("LENGTH_PENALTY", InferDataType::TYPE_FP32, shape);
    tensor->Allocate(paramNum * sizeof(float));
    auto *buffer = static_cast<float *>(tensor->data);
    buffer[0u] = lengthPenalty;
    req->AddTensor("LENGTH_PENALTY", tensor);

    requests.emplace_back(req);
    return requests;
}

}