* Copyright (c) Huawei Technologies Co., Ltd. 2026. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Description: Device manager factory with runtime device detection.
* Both NPU and GPU backends are compiled in (when USE_NPU/USE_GPU defined).
* The actual backend is selected at runtime by probing /dev/davinci* or /dev/nvidia*.
*/
#ifndef DATASYSTEM_COMMON_DEVICE_DEVICE_MANAGER_FACTORY_H
#define DATASYSTEM_COMMON_DEVICE_DEVICE_MANAGER_FACTORY_H
#include "datasystem/common/device/device_manager_base.h"
#include "datasystem/common/log/log.h"
#if defined(USE_NPU) || defined(USE_GPU)
#include "datasystem/common/util/file_util.h"
#endif
#if defined(USE_NPU) || defined(WITH_TESTS)
#include "datasystem/common/device/ascend/acl_device_manager.h"
#endif
#ifdef USE_GPU
#include "datasystem/common/device/nvidia/cuda_device_manager.h"
#endif
#if defined(USE_NPU) || defined(USE_GPU) || defined(WITH_TESTS)
#include <glob.h>
#include <cstdlib>
#endif
namespace datasystem {
enum class DeviceBackend { NPU, GPU, UNKNOWN };
* @brief Device Manager Factory with runtime device detection.
*
* When both USE_NPU and USE_GPU are defined (universal wheel build),
* the factory probes /dev/davinci* and /dev/nvidia* at runtime to
* decide which backend to use. Only the selected backend's plugin
* is dlopen'd.
*
* Usage:
* DeviceManagerBase* mgr = DeviceManagerFactory::GetDeviceManager();
*/
class DeviceManagerFactory {
public:
static DeviceManagerBase *GetDeviceManager()
{
if (auto *overrideMgr = DetectTestOverrideManager(); overrideMgr != nullptr) {
return overrideMgr;
}
static DeviceManagerBase *inst = Detect();
return inst;
}
static DeviceBackend ProbeBackend()
{
#ifdef WITH_TESTS
if (IsTestAscendMockForced()) {
return DeviceBackend::NPU;
}
#endif
return ProbePhysicalBackend();
}
static DeviceBackend ProbePhysicalBackend()
{
static DeviceBackend backend = DetectPhysicalBackend();
return backend;
}
private:
DeviceManagerFactory() = delete;
~DeviceManagerFactory() = delete;
static DeviceManagerBase *DetectTestOverrideManager()
{
#ifdef WITH_TESTS
if (IsTestAscendMockForced()) {
return acl::AclDeviceManager::Instance();
}
#endif
return nullptr;
}
static DeviceBackend DetectPhysicalBackend()
{
const bool hasNpu = HasNpuDevice();
const bool hasGpu = HasGpuDevice();
if (hasNpu && hasGpu) {
LOG(WARNING) << "Both NPU (/dev/davinci*) and GPU (/dev/nvidia*) devices detected. "
"By policy, NPU backend is preferred.";
return DeviceBackend::NPU;
}
if (hasNpu) {
return DeviceBackend::NPU;
}
if (hasGpu) {
return DeviceBackend::GPU;
}
return DeviceBackend::UNKNOWN;
}
static bool HasNpuDevice()
{
#ifdef USE_NPU
return HasDevNode("/dev/davinci[0-16]*");
#else
return false;
#endif
}
static bool HasGpuDevice()
{
#ifdef USE_GPU
return HasDevNode("/dev/nvidia[0-9]*") || HasNvidiaSmi();
#else
return false;
#endif
}
static bool HasDevNode(const char *pattern)
{
#if defined(USE_NPU) || defined(USE_GPU)
std::vector<std::string> paths;
Status rc = Glob(pattern, paths);
if (rc.IsError()) {
LOG(WARNING) << "Glob failed for pattern " << pattern << ", rc: " << rc.ToString();
return false;
}
return !paths.empty();
#else
(void)pattern;
return false;
#endif
}
static bool HasNvidiaSmi()
{
#ifdef USE_GPU
int ret = system("nvidia-smi -L > /dev/null 2>&1");
return (ret == 0);
#else
return false;
#endif
}
static bool IsTestAscendMockForced()
{
#ifdef WITH_TESTS
const char *forceAscendMock = std::getenv("DS_TEST_FORCE_ASCEND_DEVICE_MANAGER");
return forceAscendMock != nullptr && forceAscendMock[0] != '\0' && forceAscendMock[0] != '0';
#else
return false;
#endif
}
static void LogNoAcceleratorDetected()
{
LOG(INFO) << "No accelerator device detected. "
"Checked:"
#ifdef USE_NPU
" /dev/davinci[0-16]*"
#endif
#ifdef USE_GPU
" /dev/nvidia[0-9]* and nvidia-smi"
#endif
". Ensure the device driver is installed and "
"the device node exists.";
}
static DeviceManagerBase *SelectManager(DeviceBackend backend)
{
switch (backend) {
case DeviceBackend::NPU:
#if defined(USE_NPU) || defined(WITH_TESTS)
LOG(INFO) << "Detected NPU device (/dev/davinci*), using Ascend ACL backend.";
return acl::AclDeviceManager::Instance();
#endif
break;
case DeviceBackend::GPU:
#ifdef USE_GPU
LOG(INFO) << "Detected GPU device (/dev/nvidia*), using CUDA backend.";
return cuda::CudaDeviceManager::Instance();
#endif
break;
case DeviceBackend::UNKNOWN:
default:
LogNoAcceleratorDetected();
break;
}
return nullptr;
}
static DeviceManagerBase *Detect()
{
if (auto *overrideMgr = DetectTestOverrideManager(); overrideMgr != nullptr) {
return overrideMgr;
}
#if defined(WITH_TESTS) && !defined(BUILD_HETERO)
LOG(INFO) << "BUILD_HETERO is OFF in test build, fallback to Ascend device manager for mock-based tests.";
return acl::AclDeviceManager::Instance();
#endif
return SelectManager(ProbePhysicalBackend());
}
};
}
#endif