#include "base/sampling_heap_profiler/poisson_allocation_sampler.h"
#include <algorithm>
#include <atomic>
#include <cmath>
#include <cstdint>
#include <memory>
#include <utility>
#include "base/allocator/dispatcher/reentry_guard.h"
#include "base/allocator/dispatcher/tls.h"
#include "base/check.h"
#include "base/compiler_specific.h"
#include "base/no_destructor.h"
#include "base/rand_util.h"
#include "build/build_config.h"
namespace base {
namespace {
using ::base::allocator::dispatcher::ReentryGuard;
const size_t kDefaultSamplingIntervalBytes = 128 * 1024;
const intptr_t kAccumulatedBytesOffset = 1 << 29;
bool g_deterministic = false;
constinit std::atomic<LockFreeAddressHashSet*> g_sampled_addresses_set{nullptr};
constinit std::atomic_size_t g_sampling_interval{kDefaultSamplingIntervalBytes};
struct ThreadLocalData {
intptr_t accumulated_bytes = 0;
intptr_t accumulated_bytes_snapshot = 0;
bool internal_reentry_guard = false;
bool sampling_interval_initialized = false;
};
ThreadLocalData* GetThreadLocalData() {
#if USE_LOCAL_TLS_EMULATION()
static base::NoDestructor<
base::allocator::dispatcher::ThreadLocalStorage<ThreadLocalData>>
thread_local_data("poisson_allocation_sampler");
return thread_local_data->GetThreadLocalData();
#else
thread_local ThreadLocalData thread_local_data;
return &thread_local_data;
#endif
}
}
PoissonAllocationSamplerStats::PoissonAllocationSamplerStats(
size_t address_cache_hits,
size_t address_cache_misses,
size_t address_cache_max_size,
float address_cache_max_load_factor,
AddressCacheBucketStats address_cache_bucket_stats,
size_t bloom_filter_hits,
size_t bloom_filter_misses,
size_t bloom_filter_max_saturation)
: address_cache_hits(address_cache_hits),
address_cache_misses(address_cache_misses),
address_cache_max_size(address_cache_max_size),
address_cache_max_load_factor(address_cache_max_load_factor),
address_cache_bucket_stats(std::move(address_cache_bucket_stats)),
bloom_filter_hits(bloom_filter_hits),
bloom_filter_misses(bloom_filter_misses),
bloom_filter_max_saturation(bloom_filter_max_saturation) {}
PoissonAllocationSamplerStats::~PoissonAllocationSamplerStats() = default;
PoissonAllocationSamplerStats::PoissonAllocationSamplerStats(
const PoissonAllocationSamplerStats&) = default;
PoissonAllocationSamplerStats& PoissonAllocationSamplerStats::operator=(
const PoissonAllocationSamplerStats&) = default;
PoissonAllocationSampler::ScopedMuteThreadSamples::ScopedMuteThreadSamples() {
ThreadLocalData* const thread_local_data = GetThreadLocalData();
was_muted_ = std::exchange(thread_local_data->internal_reentry_guard, true);
if (!was_muted_) {
thread_local_data->accumulated_bytes_snapshot =
thread_local_data->accumulated_bytes;
thread_local_data->accumulated_bytes -= kAccumulatedBytesOffset;
}
}
PoissonAllocationSampler::ScopedMuteThreadSamples::~ScopedMuteThreadSamples() {
ThreadLocalData* const thread_local_data = GetThreadLocalData();
DCHECK(thread_local_data->internal_reentry_guard);
thread_local_data->internal_reentry_guard = was_muted_;
if (!was_muted_) {
thread_local_data->accumulated_bytes =
thread_local_data->accumulated_bytes_snapshot;
}
}
bool PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted() {
ThreadLocalData* const thread_local_data = GetThreadLocalData();
return thread_local_data->internal_reentry_guard;
}
PoissonAllocationSampler::ScopedSuppressRandomnessForTesting::
ScopedSuppressRandomnessForTesting() {
DCHECK(!g_deterministic);
g_deterministic = true;
ThreadLocalData* const thread_local_data = GetThreadLocalData();
thread_local_data->accumulated_bytes = 0;
}
PoissonAllocationSampler::ScopedSuppressRandomnessForTesting::
~ScopedSuppressRandomnessForTesting() {
DCHECK(g_deterministic);
g_deterministic = false;
}
bool PoissonAllocationSampler::ScopedSuppressRandomnessForTesting::
IsSuppressed() {
return g_deterministic;
}
PoissonAllocationSampler::ScopedMuteHookedSamplesForTesting::
ScopedMuteHookedSamplesForTesting() {
SetProfilingStateFlag(ProfilingStateFlag::kHookedSamplesMutedForTesting);
ThreadLocalData* const thread_local_data = GetThreadLocalData();
accumulated_bytes_snapshot_ = thread_local_data->accumulated_bytes;
thread_local_data->accumulated_bytes = 0;
}
PoissonAllocationSampler::ScopedMuteHookedSamplesForTesting::
~ScopedMuteHookedSamplesForTesting() {
ThreadLocalData* const thread_local_data = GetThreadLocalData();
thread_local_data->accumulated_bytes = accumulated_bytes_snapshot_;
ResetProfilingStateFlag(ProfilingStateFlag::kHookedSamplesMutedForTesting);
}
PoissonAllocationSampler::ScopedMuteHookedSamplesForTesting::
ScopedMuteHookedSamplesForTesting(ScopedMuteHookedSamplesForTesting&&) =
default;
PoissonAllocationSampler::ScopedMuteHookedSamplesForTesting&
PoissonAllocationSampler::ScopedMuteHookedSamplesForTesting::operator=(
ScopedMuteHookedSamplesForTesting&&) = default;
constinit std::atomic<PoissonAllocationSampler::ProfilingStateFlagMask>
PoissonAllocationSampler::profiling_state_{0};
PoissonAllocationSampler::PoissonAllocationSampler() {
Init();
auto* sampled_addresses = new LockFreeAddressHashSet(64, mutex_);
g_sampled_addresses_set.store(sampled_addresses, std::memory_order_release);
}
void PoissonAllocationSampler::Init() {
[[maybe_unused]] static bool init_once = [] {
GetThreadLocalData();
ReentryGuard::InitTLSSlot();
return true;
}();
}
void PoissonAllocationSampler::SetSamplingInterval(
size_t sampling_interval_bytes) {
g_sampling_interval.store(sampling_interval_bytes, std::memory_order_relaxed);
}
size_t PoissonAllocationSampler::SamplingInterval() const {
return g_sampling_interval.load(std::memory_order_relaxed);
}
void PoissonAllocationSampler::SetTargetHashSetLoadFactor(
std::optional<float> load_factor) {
AutoLock lock(mutex_);
address_cache_target_load_factor_ = load_factor.value_or(1.0);
}
PoissonAllocationSamplerStats PoissonAllocationSampler::GetAndResetStats() {
ScopedMuteThreadSamples no_reentrancy_scope;
AutoLock lock(mutex_);
return PoissonAllocationSamplerStats(
address_cache_hits_.exchange(0, std::memory_order_relaxed),
address_cache_misses_.exchange(0, std::memory_order_relaxed),
std::exchange(address_cache_max_size_, 0),
std::exchange(address_cache_max_load_factor_, 0.0),
sampled_addresses_set().GetBucketStats(),
bloom_filter_hits_.exchange(0, std::memory_order_relaxed),
bloom_filter_misses_.exchange(0, std::memory_order_relaxed),
std::exchange(bloom_filter_max_saturation_, 0));
}
size_t PoissonAllocationSampler::GetNextSampleInterval(size_t interval) {
if (g_deterministic) [[unlikely]] {
return interval;
}
double uniform = internal::RandDoubleAvoidAllocation();
double value = -log(1 - uniform) * interval;
size_t min_value = sizeof(intptr_t);
size_t max_value = interval * 20;
if (value < min_value) [[unlikely]] {
return min_value;
}
if (value > max_value) [[unlikely]] {
return max_value;
}
return static_cast<size_t>(value);
}
void PoissonAllocationSampler::DoRecordAllocation(
const ProfilingStateFlagMask state,
void* address,
size_t size,
base::allocator::dispatcher::AllocationSubsystem type,
const char* context) {
ThreadLocalData* const thread_local_data = GetThreadLocalData();
thread_local_data->accumulated_bytes += size;
intptr_t accumulated_bytes = thread_local_data->accumulated_bytes;
if (accumulated_bytes < 0) [[likely]] {
return;
}
if (!(state & ProfilingStateFlag::kIsRunning)) [[unlikely]] {
thread_local_data->sampling_interval_initialized = false;
thread_local_data->accumulated_bytes = 0;
return;
}
if (!address) [[unlikely]] {
return;
}
size_t mean_interval = g_sampling_interval.load(std::memory_order_relaxed);
if (!thread_local_data->sampling_interval_initialized) [[unlikely]] {
thread_local_data->sampling_interval_initialized = true;
accumulated_bytes -= GetNextSampleInterval(mean_interval);
if (accumulated_bytes < 0) {
thread_local_data->accumulated_bytes = accumulated_bytes;
return;
}
}
size_t samples = static_cast<size_t>(accumulated_bytes) / mean_interval;
accumulated_bytes %= mean_interval;
do {
accumulated_bytes -= GetNextSampleInterval(mean_interval);
++samples;
} while (accumulated_bytes >= 0);
thread_local_data->accumulated_bytes = accumulated_bytes;
if (ScopedMuteThreadSamples::IsMuted()) [[unlikely]] {
return;
}
ScopedMuteThreadSamples no_reentrancy_scope;
std::vector<SamplesObserver*> observers_copy;
{
AutoLock lock(mutex_);
LockFreeAddressHashSet& address_cache = sampled_addresses_set();
if (address_cache.Contains(address) ==
LockFreeAddressHashSet::ContainsResult::kFound) {
return;
}
address_cache.Insert(address);
BalanceAddressesHashSet();
const LockFreeAddressHashSet& balanced_address_cache =
sampled_addresses_set();
address_cache_max_size_ =
std::max(address_cache_max_size_, balanced_address_cache.size());
address_cache_max_load_factor_ = std::max(
address_cache_max_load_factor_, balanced_address_cache.load_factor());
if (balanced_address_cache.HasBloomFilter()) {
bloom_filter_max_saturation_ =
std::max(bloom_filter_max_saturation_,
balanced_address_cache.MaxBloomFilterSaturation());
}
observers_copy = observers_;
}
size_t total_allocated = mean_interval * samples;
for (base::PoissonAllocationSampler::SamplesObserver* observer :
observers_copy) {
observer->SampleAdded(address, size, total_allocated, type, context);
}
}
void PoissonAllocationSampler::DoRecordFree(void* address) {
ScopedMuteThreadSamples no_reentrancy_scope;
std::vector<SamplesObserver*> observers_copy;
{
AutoLock lock(mutex_);
observers_copy = observers_;
sampled_addresses_set().Remove(address);
}
for (base::PoissonAllocationSampler::SamplesObserver* observer :
observers_copy) {
observer->SampleRemoved(address);
}
}
void PoissonAllocationSampler::BalanceAddressesHashSet() {
LockFreeAddressHashSet& current_set = sampled_addresses_set();
if (current_set.load_factor() < address_cache_target_load_factor_) {
return;
}
auto new_set = std::make_unique<LockFreeAddressHashSet>(
current_set.buckets_count() * 2, mutex_);
new_set->Copy(current_set);
g_sampled_addresses_set.store(new_set.release(), std::memory_order_release);
}
LockFreeAddressHashSet& PoissonAllocationSampler::sampled_addresses_set() {
return *g_sampled_addresses_set.load(std::memory_order_acquire);
}
PoissonAllocationSampler* PoissonAllocationSampler::Get() {
static NoDestructor<PoissonAllocationSampler> instance;
return instance.get();
}
intptr_t PoissonAllocationSampler::GetAccumulatedBytesForTesting() {
return GetThreadLocalData()->accumulated_bytes;
}
void PoissonAllocationSampler::SetProfilingStateFlag(ProfilingStateFlag flag) {
ProfilingStateFlagMask flags = flag;
if (flag == ProfilingStateFlag::kIsRunning) {
flags |= ProfilingStateFlag::kWasStarted;
}
ProfilingStateFlagMask old_state =
profiling_state_.fetch_or(flags, std::memory_order_relaxed);
DCHECK(!(old_state & flag));
}
void PoissonAllocationSampler::ResetProfilingStateFlag(
ProfilingStateFlag flag) {
DCHECK_NE(flag, kWasStarted);
ProfilingStateFlagMask old_state =
profiling_state_.fetch_and(~flag, std::memory_order_relaxed);
DCHECK(old_state & flag);
}
void PoissonAllocationSampler::AddSamplesObserver(SamplesObserver* observer) {
ReentryGuard guard;
ScopedMuteThreadSamples no_reentrancy_scope;
AutoLock lock(mutex_);
DCHECK(std::ranges::find(observers_, observer) == observers_.end());
bool profiler_was_stopped = observers_.empty();
observers_.push_back(observer);
DCHECK(g_sampled_addresses_set.load(std::memory_order_relaxed));
if (profiler_was_stopped) {
SetProfilingStateFlag(ProfilingStateFlag::kIsRunning);
}
DCHECK(profiling_state_.load(std::memory_order_relaxed) &
ProfilingStateFlag::kIsRunning);
}
void PoissonAllocationSampler::RemoveSamplesObserver(
SamplesObserver* observer) {
ReentryGuard guard;
ScopedMuteThreadSamples no_reentrancy_scope;
AutoLock lock(mutex_);
auto it = std::ranges::find(observers_, observer);
CHECK(it != observers_.end());
observers_.erase(it);
DCHECK(profiling_state_.load(std::memory_order_relaxed) &
ProfilingStateFlag::kIsRunning);
if (observers_.empty()) {
ResetProfilingStateFlag(ProfilingStateFlag::kIsRunning);
}
}
}