* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
* MindIE is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/
#ifndef SEQUENCE_GROUP_META_DATA_H
#define SEQUENCE_GROUP_META_DATA_H
#include "basic_types.h"
#include "sampling.h"
#include "sequence.h"
namespace mindie_llm {
struct SequenceGroupState {
size_t numSteps_;
size_t currentSteps_;
};
struct SequenceGroupMetaData {
RequestId requestId_;
std::string serverid_;
std::shared_ptr<SamplingParams> samplingParams_;
bool doSample_;
size_t tokenChunkSize_;
SequenceGroupState state_;
std::vector<SequenceId> seqIds_;
std::vector<size_t> promptLens_;
std::vector<TokenId> tokenIds_;
std::vector<BlockIds> blockIds_;
uint64_t dpInstanceId_;
std::vector<BlockIds> srcBlockIds_;
std::vector<size_t> computedLens_;
std::vector<size_t> remoteComputedLens_;
std::optional<bool> skipSpecialTokens_;
std::optional<bool> ignoreEos_;
std::optional<std::string> loraId_;
bool isSp_{false};
bool isCp_{false};
bool isMtp_{false};
size_t spRankId_{0};
bool isAppendBlock_{false};
size_t appendBlockRankId_{0};
std::vector<size_t> spRankPromptTokenNum_;
std::vector<size_t> spRankBlockNum_;
std::vector<size_t> prefillBlockRankId_;
std::vector<SequenceId> reservedSeqIds_;
std::vector<bool> isReqPrefill_;
std::vector<bool> isReqLastChunk_;
std::vector<size_t> splitStartPos_;
std::vector<size_t> splitEndPos_;
int32_t requestGap_{0};
size_t lwdCloudSpRankId_{0};
size_t lwdCloudAppendBlockRankId_{0};
std::vector<size_t> lwdCloudSpRankPromptTokenNum_;
std::vector<size_t> lwdCloudSpRankBlockNum_;
std::vector<BlockId> lwdCloudBlockIds_;
std::optional<std::string> responseFormat_;
std::vector<TokenId> predictedTokenIds_;
};
struct SequenceGroupMetaDatas {
std::vector<SequenceGroupMetaData> metaList;
std::vector<std::vector<int64_t>> seqLenList;
int64_t maxBatchSize = 0;
int64_t maxSeqLen = 0;
};
}
#endif