#include "services/on_device_model/ml/performance_class.h"
#include "base/compiler_specific.h"
#include "base/metrics/field_trial_params.h"
#include "base/metrics/histogram_functions.h"
#include "base/strings/strcat.h"
#include "base/system/sys_info.h"
#include "components/optimization_guide/core/optimization_guide_features.h"
#include "services/on_device_model/ml/gpu_blocklist.h"
namespace ml {
namespace {
constexpr uint64_t kBytesPerMb = 1024 * 1024;
const base::FeatureParam<int> kLowRAMThreshold{
&optimization_guide::features::kOnDeviceModelPerformanceParams,
"on_device_low_ram_threshold_mb", 3000};
const base::FeatureParam<int> kHighRAMThreshold{
&optimization_guide::features::kOnDeviceModelPerformanceParams,
"on_device_high_ram_threshold_mb", 5500};
const base::FeatureParam<int> kLowOutputThreshold{
&optimization_guide::features::kOnDeviceModelPerformanceParams,
"on_device_low_output_threshold", 5};
const base::FeatureParam<int> kLowThreshold{
&optimization_guide::features::kOnDeviceModelPerformanceParams,
"on_device_low_threshold", 50};
const base::FeatureParam<int> kMediumThreshold{
&optimization_guide::features::kOnDeviceModelPerformanceParams,
"on_device_medium_threshold", 75};
const base::FeatureParam<int> kHighThreshold{
&optimization_guide::features::kOnDeviceModelPerformanceParams,
"on_device_high_threshold", 150};
const base::FeatureParam<int> kVeryHighThreshold{
&optimization_guide::features::kOnDeviceModelPerformanceParams,
"on_device_very_high_threshold", 500};
enum class VeryLowPerformanceReason {
kLowRAM = 0,
kSlowOutput = 1,
kSlowInput = 2,
kMaxValue = kSlowInput,
};
void LogVeryLowReason(VeryLowPerformanceReason reason) {
base::UmaHistogramEnumeration("OnDeviceModel.BenchmarkVeryLowReason", reason);
}
}
COMPONENT_EXPORT(ON_DEVICE_MODEL_ML)
uint64_t GetLowRamThresholdMb() {
return static_cast<uint64_t>(kLowRAMThreshold.Get());
}
COMPONENT_EXPORT(ON_DEVICE_MODEL_ML)
uint64_t GetHighRamThresholdMb() {
return static_cast<uint64_t>(kHighRAMThreshold.Get());
}
DISABLE_CFI_DLSYM
COMPONENT_EXPORT(ON_DEVICE_MODEL_ML)
std::pair<on_device_model::mojom::DevicePerformanceInfoPtr,
on_device_model::mojom::DeviceInfoPtr>
GetDeviceAndPerformanceInfo(const ChromeML& chrome_ml) {
auto perf_info = on_device_model::mojom::DevicePerformanceInfo::New();
auto device_info = on_device_model::mojom::DeviceInfo::New();
ml::DeviceInfo query_device_info =
ml::QueryDeviceInfo(chrome_ml.api(), true);
if (query_device_info.gpu_blocked_reason != GpuBlockedReason::kNotBlocked) {
perf_info->performance_class =
on_device_model::mojom::PerformanceClass::kGpuBlocked;
perf_info->vram_mb = 0ul;
return std::make_pair(std::move(perf_info), std::move(device_info));
}
device_info->vendor_id = query_device_info.vendor_id;
device_info->device_id = query_device_info.device_id;
device_info->driver_version = query_device_info.driver_version;
device_info->supports_fp16 = query_device_info.supports_fp16;
ChromeMLPerformanceInfo info;
bool success = chrome_ml.api().GetEstimatedPerformance(&info);
base::UmaHistogramBoolean("OnDeviceModel.BenchmarkSuccess", success);
if (!success) {
perf_info->performance_class =
on_device_model::mojom::PerformanceClass::kError;
perf_info->vram_mb = 0ul;
return std::make_pair(std::move(perf_info), std::move(device_info));
}
const float input_speed = info.input_speed;
const float output_speed = info.output_speed;
const bool is_integrated_gpu = info.is_integrated_gpu;
int system_ram = base::SysInfo::AmountOfPhysicalMemory().InMiB();
base::UmaHistogramMemoryLargeMB(
base::StrCat({"OnDeviceModel.SystemRAM.",
is_integrated_gpu ? "Integrated" : "Discrete"}),
system_ram);
uint64_t device_heap_mb = info.device_heap_size / kBytesPerMb;
base::UmaHistogramMemoryLargeMB(
base::StrCat({"OnDeviceModel.DeviceHeapSize.",
is_integrated_gpu ? "Integrated" : "Discrete"}),
device_heap_mb);
if (info.max_buffer_size) {
base::UmaHistogramMemoryLargeMB(
base::StrCat({"OnDeviceModel.MaxBufferSize.",
is_integrated_gpu ? "Integrated" : "Discrete"}),
info.max_buffer_size);
}
base::UmaHistogramCounts10000(
"OnDeviceModel.BenchmarkEstimatedTokensPerSecond.Input", input_speed);
base::UmaHistogramCounts1000(
"OnDeviceModel.BenchmarkEstimatedTokensPerSecond.Output", output_speed);
if (is_integrated_gpu) {
device_heap_mb =
std::max(static_cast<uint64_t>(system_ram / 2), device_heap_mb);
}
perf_info->vram_mb = device_heap_mb;
if (device_heap_mb < GetLowRamThresholdMb()) {
LogVeryLowReason(VeryLowPerformanceReason::kLowRAM);
perf_info->performance_class =
on_device_model::mojom::PerformanceClass::kVeryLow;
return std::make_pair(std::move(perf_info), std::move(device_info));
}
if (output_speed < kLowOutputThreshold.Get()) {
LogVeryLowReason(VeryLowPerformanceReason::kSlowOutput);
perf_info->performance_class =
on_device_model::mojom::PerformanceClass::kVeryLow;
return std::make_pair(std::move(perf_info), std::move(device_info));
}
if (input_speed < kLowThreshold.Get()) {
LogVeryLowReason(VeryLowPerformanceReason::kSlowInput);
perf_info->performance_class =
on_device_model::mojom::PerformanceClass::kVeryLow;
} else if (input_speed < kMediumThreshold.Get()) {
perf_info->performance_class =
on_device_model::mojom::PerformanceClass::kLow;
} else if (input_speed < kHighThreshold.Get() ||
device_heap_mb < GetHighRamThresholdMb()) {
perf_info->performance_class =
on_device_model::mojom::PerformanceClass::kMedium;
} else if (input_speed < kVeryHighThreshold.Get()) {
perf_info->performance_class =
on_device_model::mojom::PerformanceClass::kHigh;
} else {
perf_info->performance_class =
on_device_model::mojom::PerformanceClass::kVeryHigh;
}
return std::make_pair(std::move(perf_info), std::move(device_info));
}
}