* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
* MindIE is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/
#ifndef MINDIE_LLM_LORA_MANAGER_H
#define MINDIE_LLM_LORA_MANAGER_H
#include <mutex>
#include <string>
#include "executor/executor_interface.h"
#include "lora/ilora_manager.h"
namespace mindie_llm {
enum class LoraStatus {
LOAD_SUCCESS = 0,
DUPLICATED_LORA_ID = 1,
UNLOADING = 2,
INVALID_LORA_ID = 3,
INVALID_LORA_PATH = 4,
INVALID_LORA_RANK = 5,
SLOTS_FULL = 6,
SLOTS_FULL_WITH_UNLOADING = 7,
UNLOAD_SUCCESS = 8,
LORA_NOT_FOUND = 9,
UNSUPPORT_CMD = 10
};
class LoraManager : public ILoraManager {
public:
static void Initialize(std::vector<IExecutorSPtr> executors, uint32_t maxLoras);
static std::shared_ptr<LoraManager> GetInstance(size_t localDPRank);
LoraManager(IExecutorSPtr executor, uint32_t maxLoras);
~LoraManager() override = default;
Status Load(const LoraParamSPtr loraInfo) override;
Status StartToUnload(const std::string &loraName) override;
Status GetLoadedLoras(std::vector<LoraParamSPtr> &loraInfo) override;
void TryUnLoadWaiting() override;
bool ValidateLoraId(const std::optional<std::string> &loraId) override;
void InitLoadedLoras(const std::vector<ModelParam> &modelParamVec) override;
void IncLoraRef(const std::optional<std::string> &loraId) override;
void DecLoraRef(const std::optional<std::string> &loraId) override;
LoraStatus GetLoraStatus(const LoraParamSPtr loraInfo, bool &loraIsInvalid);
private:
ConcurrentMap<std::string, LoraParamSPtr> loaded_;
ConcurrentMap<std::string, LoraParamSPtr> wait2Unloaded_;
ConcurrentMap<std::string, uint32_t> loraIdRef_;
IExecutorSPtr executor_;
uint32_t maxLoras_ = 0;
static std::once_flag initFlag_;
static std::vector<std::shared_ptr<LoraManager>> instances_;
};
using LlmLoraPtr = std::shared_ptr<LoraManager>;
}
#endif