#ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_
#define BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_
#include <atomic>
#include <cstdint>
#include <limits>
#include <memory>
#include "base/allocator/partition_allocator/partition_alloc-inl.h"
#include "base/allocator/partition_allocator/partition_alloc_base/compiler_specific.h"
#include "base/allocator/partition_allocator/partition_alloc_base/component_export.h"
#include "base/allocator/partition_allocator/partition_alloc_base/debug/debugging_buildflags.h"
#include "base/allocator/partition_allocator/partition_alloc_base/gtest_prod_util.h"
#include "base/allocator/partition_allocator/partition_alloc_base/thread_annotations.h"
#include "base/allocator/partition_allocator/partition_alloc_base/time/time.h"
#include "base/allocator/partition_allocator/partition_alloc_buildflags.h"
#include "base/allocator/partition_allocator/partition_alloc_config.h"
#include "base/allocator/partition_allocator/partition_alloc_forward.h"
#include "base/allocator/partition_allocator/partition_bucket_lookup.h"
#include "base/allocator/partition_allocator/partition_freelist_entry.h"
#include "base/allocator/partition_allocator/partition_lock.h"
#include "base/allocator/partition_allocator/partition_stats.h"
#include "base/allocator/partition_allocator/partition_tls.h"
#include "build/build_config.h"
#if defined(ARCH_CPU_X86_64) && BUILDFLAG(HAS_64_BIT_POINTERS)
#include <algorithm>
#endif
namespace partition_alloc {
class ThreadCache;
namespace tools {
#if BUILDFLAG(HAS_64_BIT_POINTERS)
constexpr uintptr_t kNeedle1 = 0xe69e32f3ad9ea63;
constexpr uintptr_t kNeedle2 = 0x9615ee1c5eb14caf;
#else
constexpr uintptr_t kNeedle1 = 0xe69e32f3;
constexpr uintptr_t kNeedle2 = 0x9615ee1c;
#endif
constexpr size_t kThreadCacheNeedleArraySize = 4;
extern uintptr_t kThreadCacheNeedleArray[kThreadCacheNeedleArraySize];
class HeapDumper;
class ThreadCacheInspector;
}
namespace internal {
extern PA_COMPONENT_EXPORT(PARTITION_ALLOC) PartitionTlsKey g_thread_cache_key;
#if PA_CONFIG(THREAD_CACHE_FAST_TLS)
extern PA_COMPONENT_EXPORT(
PARTITION_ALLOC) thread_local ThreadCache* g_thread_cache;
#endif
}
struct ThreadCacheLimits {
static constexpr size_t kDefaultSizeThreshold = 512;
static constexpr size_t kLargeSizeThreshold = 1 << 15;
static_assert(kLargeSizeThreshold <= std::numeric_limits<uint16_t>::max(),
"");
};
class PA_COMPONENT_EXPORT(PARTITION_ALLOC) ThreadCacheRegistry {
public:
static ThreadCacheRegistry& Instance();
inline constexpr ThreadCacheRegistry();
void RegisterThreadCache(ThreadCache* cache);
void UnregisterThreadCache(ThreadCache* cache);
void DumpStats(bool my_thread_only, ThreadCacheStats* stats);
void PurgeAll();
void RunPeriodicPurge();
int64_t GetPeriodicPurgeNextIntervalInMicroseconds() const;
void SetThreadCacheMultiplier(float multiplier);
void SetLargestActiveBucketIndex(uint8_t largest_active_bucket_index);
static internal::Lock& GetLock() { return Instance().lock_; }
void ForcePurgeAllThreadAfterForkUnsafe();
void ResetForTesting();
static constexpr internal::base::TimeDelta kMinPurgeInterval =
internal::base::Seconds(1);
static constexpr internal::base::TimeDelta kMaxPurgeInterval =
internal::base::Minutes(1);
static constexpr internal::base::TimeDelta kDefaultPurgeInterval =
2 * kMinPurgeInterval;
static constexpr size_t kMinCachedMemoryForPurging = 500 * 1024;
private:
friend class tools::ThreadCacheInspector;
friend class tools::HeapDumper;
internal::Lock lock_;
ThreadCache* list_head_ PA_GUARDED_BY(GetLock()) = nullptr;
bool periodic_purge_is_initialized_ = false;
internal::base::TimeDelta periodic_purge_next_interval_ =
kDefaultPurgeInterval;
uint8_t largest_active_bucket_index_ = internal::BucketIndexLookup::GetIndex(
ThreadCacheLimits::kDefaultSizeThreshold);
};
constexpr ThreadCacheRegistry::ThreadCacheRegistry() = default;
#if PA_CONFIG(THREAD_CACHE_ENABLE_STATISTICS)
#define PA_INCREMENT_COUNTER(counter) ++counter
#else
#define PA_INCREMENT_COUNTER(counter) \
do { \
} while (0)
#endif
#if BUILDFLAG(PA_DCHECK_IS_ON)
namespace internal {
class ReentrancyGuard {
public:
explicit ReentrancyGuard(bool& flag) : flag_(flag) {
PA_CHECK(!flag_);
flag_ = true;
}
~ReentrancyGuard() { flag_ = false; }
private:
bool& flag_;
};
}
#define PA_REENTRANCY_GUARD(x) \
internal::ReentrancyGuard guard { \
x \
}
#else
#define PA_REENTRANCY_GUARD(x) \
do { \
} while (0)
#endif
class PA_COMPONENT_EXPORT(PARTITION_ALLOC) ThreadCache {
public:
static void Init(PartitionRoot<>* root);
static void DeleteForTesting(ThreadCache* tcache);
static void SwapForTesting(PartitionRoot<>* root);
static void RemoveTombstoneForTesting();
static void EnsureThreadSpecificDataInitialized();
static ThreadCache* Get() {
#if PA_CONFIG(THREAD_CACHE_FAST_TLS)
return internal::g_thread_cache;
#else
return reinterpret_cast<ThreadCache*>(
internal::PartitionTlsGet(internal::g_thread_cache_key));
#endif
}
static bool IsValid(ThreadCache* tcache) {
#if BUILDFLAG(IS_OHOS)
if (reinterpret_cast<uintptr_t>(tcache) < kInvalidPointer) {
return false;
}
#endif
return reinterpret_cast<uintptr_t>(tcache) & kTombstoneMask;
}
static bool IsTombstone(ThreadCache* tcache) {
return reinterpret_cast<uintptr_t>(tcache) == kTombstone;
}
static ThreadCache* Create(PartitionRoot<>* root);
~ThreadCache();
ThreadCache(const ThreadCache&) = delete;
ThreadCache(const ThreadCache&&) = delete;
ThreadCache& operator=(const ThreadCache&) = delete;
PA_ALWAYS_INLINE bool MaybePutInCache(uintptr_t slot_start,
size_t bucket_index,
size_t* slot_size);
PA_ALWAYS_INLINE uintptr_t GetFromCache(size_t bucket_index,
size_t* slot_size);
void SetShouldPurge();
void Purge();
void TryPurge();
size_t CachedMemory() const;
void AccumulateStats(ThreadCacheStats* stats) const;
static void PurgeCurrentThread();
const ThreadAllocStats& thread_alloc_stats() const {
return thread_alloc_stats_;
}
size_t bucket_count_for_testing(size_t index) const {
return buckets_[index].count;
}
internal::base::PlatformThreadId thread_id() const { return thread_id_; }
static void SetLargestCachedSize(size_t size);
PA_ALWAYS_INLINE void RecordAllocation(size_t size);
PA_ALWAYS_INLINE void RecordDeallocation(size_t size);
void ResetPerThreadAllocationStatsForTesting();
static constexpr uint16_t kBatchFillRatio = 8;
static constexpr float kDefaultMultiplier = 2.;
static constexpr uint8_t kSmallBucketBaseCount = 64;
static constexpr size_t kDefaultSizeThreshold =
ThreadCacheLimits::kDefaultSizeThreshold;
static constexpr size_t kLargeSizeThreshold =
ThreadCacheLimits::kLargeSizeThreshold;
const ThreadCache* prev_for_testing() const
PA_EXCLUSIVE_LOCKS_REQUIRED(ThreadCacheRegistry::GetLock()) {
return prev_;
}
const ThreadCache* next_for_testing() const
PA_EXCLUSIVE_LOCKS_REQUIRED(ThreadCacheRegistry::GetLock()) {
return next_;
}
private:
friend class tools::HeapDumper;
friend class tools::ThreadCacheInspector;
struct Bucket {
internal::PartitionFreelistEntry* freelist_head = nullptr;
uint8_t count = 0;
std::atomic<uint8_t> limit{};
uint16_t slot_size = 0;
Bucket();
};
static_assert(sizeof(Bucket) <= 2 * sizeof(void*), "Keep Bucket small.");
explicit ThreadCache(PartitionRoot<>* root);
static void Delete(void* thread_cache_ptr);
static void* operator new(size_t count);
static void operator delete(void* ptr);
void PurgeInternal();
template <bool crash_on_corruption>
void PurgeInternalHelper();
void FillBucket(size_t bucket_index);
template <bool crash_on_corruption>
void ClearBucketHelper(Bucket& bucket, size_t limit);
void ClearBucket(Bucket& bucket, size_t limit);
PA_ALWAYS_INLINE void PutInBucket(Bucket& bucket, uintptr_t slot_start);
void ResetForTesting();
template <bool crash_on_corruption>
void FreeAfter(internal::PartitionFreelistEntry* head, size_t slot_size);
static void SetGlobalLimits(PartitionRoot<>* root, float multiplier);
static constexpr uint16_t kBucketCount =
internal::BucketIndexLookup::GetIndex(ThreadCache::kLargeSizeThreshold) +
1;
static_assert(
kBucketCount < internal::kNumBuckets,
"Cannot have more cached buckets than what the allocator supports");
static constexpr uintptr_t kTombstone = 0x1;
static constexpr uintptr_t kTombstoneMask = ~kTombstone;
#if BUILDFLAG(IS_OHOS)
static constexpr uintptr_t kInvalidPointer = 0x1000;
#endif
static uint8_t global_limits_[kBucketCount];
static uint16_t largest_active_bucket_index_;
uint32_t cached_memory_ = 0;
std::atomic<bool> should_purge_;
ThreadCacheStats stats_;
ThreadAllocStats thread_alloc_stats_;
Bucket buckets_[kBucketCount];
PartitionRoot<>* const root_;
const internal::base::PlatformThreadId thread_id_;
#if BUILDFLAG(PA_DCHECK_IS_ON)
bool is_in_thread_cache_ = false;
#endif
ThreadCache* next_ PA_GUARDED_BY(ThreadCacheRegistry::GetLock());
ThreadCache* prev_ PA_GUARDED_BY(ThreadCacheRegistry::GetLock());
friend class ThreadCacheRegistry;
friend class PartitionAllocThreadCacheTest;
friend class tools::ThreadCacheInspector;
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, Simple);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
MultipleObjectsCachedPerBucket);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
LargeAllocationsAreNotCached);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
MultipleThreadCaches);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, RecordStats);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
ThreadCacheRegistry);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
MultipleThreadCachesAccounting);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
DynamicCountPerBucket);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
DynamicCountPerBucketClamping);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
DynamicCountPerBucketMultipleThreads);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
DynamicSizeThreshold);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
DynamicSizeThresholdPurge);
PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, ClearFromTail);
};
PA_ALWAYS_INLINE bool ThreadCache::MaybePutInCache(uintptr_t slot_start,
size_t bucket_index,
size_t* slot_size) {
PA_REENTRANCY_GUARD(is_in_thread_cache_);
PA_INCREMENT_COUNTER(stats_.cache_fill_count);
if (PA_UNLIKELY(bucket_index > largest_active_bucket_index_)) {
PA_INCREMENT_COUNTER(stats_.cache_fill_misses);
return false;
}
auto& bucket = buckets_[bucket_index];
PA_DCHECK(bucket.count != 0 || bucket.freelist_head == nullptr);
PutInBucket(bucket, slot_start);
cached_memory_ += bucket.slot_size;
PA_INCREMENT_COUNTER(stats_.cache_fill_hits);
uint8_t limit = bucket.limit.load(std::memory_order_relaxed);
if (PA_UNLIKELY(bucket.count > limit)) {
ClearBucket(bucket, limit / 2);
}
if (PA_UNLIKELY(should_purge_.load(std::memory_order_relaxed))) {
PurgeInternal();
}
*slot_size = bucket.slot_size;
return true;
}
PA_ALWAYS_INLINE uintptr_t ThreadCache::GetFromCache(size_t bucket_index,
size_t* slot_size) {
#if PA_CONFIG(THREAD_CACHE_ALLOC_STATS)
stats_.allocs_per_bucket_[bucket_index]++;
#endif
PA_REENTRANCY_GUARD(is_in_thread_cache_);
PA_INCREMENT_COUNTER(stats_.alloc_count);
if (PA_UNLIKELY(bucket_index > largest_active_bucket_index_)) {
PA_INCREMENT_COUNTER(stats_.alloc_miss_too_large);
PA_INCREMENT_COUNTER(stats_.alloc_misses);
return 0;
}
auto& bucket = buckets_[bucket_index];
if (PA_LIKELY(bucket.freelist_head)) {
PA_INCREMENT_COUNTER(stats_.alloc_hits);
} else {
PA_DCHECK(bucket.count == 0);
PA_INCREMENT_COUNTER(stats_.alloc_miss_empty);
PA_INCREMENT_COUNTER(stats_.alloc_misses);
FillBucket(bucket_index);
if (PA_UNLIKELY(!bucket.freelist_head)) {
return 0;
}
}
PA_DCHECK(bucket.count != 0);
internal::PartitionFreelistEntry* entry = bucket.freelist_head;
#if BUILDFLAG(IS_CHROMEOS) && defined(ARCH_CPU_X86_64) && \
BUILDFLAG(HAS_64_BIT_POINTERS)
constexpr uintptr_t kCanonicalPointerMask = (1ULL << 48) - 1;
PA_CHECK(!(reinterpret_cast<uintptr_t>(entry) & ~kCanonicalPointerMask));
#endif
internal::PartitionFreelistEntry* next =
entry->GetNextForThreadCache<true>(bucket.slot_size);
PA_DCHECK(entry != next);
bucket.count--;
PA_DCHECK(bucket.count != 0 || !next);
bucket.freelist_head = next;
*slot_size = bucket.slot_size;
PA_DCHECK(cached_memory_ >= bucket.slot_size);
cached_memory_ -= bucket.slot_size;
return internal::SlotStartPtr2Addr(entry);
}
PA_ALWAYS_INLINE void ThreadCache::PutInBucket(Bucket& bucket,
uintptr_t slot_start) {
#if PA_CONFIG(HAS_FREELIST_SHADOW_ENTRY) && defined(ARCH_CPU_X86_64) && \
BUILDFLAG(HAS_64_BIT_POINTERS)
static_assert(internal::kAlignment == 16, "");
static_assert(
internal::kPartitionCachelineSize == 64,
"The computation below assumes that cache lines are 64 bytes long.");
int distance_to_next_cacheline_in_16_bytes = 4 - ((slot_start >> 4) & 3);
int slot_size_remaining_in_16_bytes =
#if BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT)
(bucket.slot_size - sizeof(PartitionRefCount)) / 16;
#else
bucket.slot_size / 16;
#endif
slot_size_remaining_in_16_bytes = std::min(
slot_size_remaining_in_16_bytes, distance_to_next_cacheline_in_16_bytes);
static const uint32_t poison_16_bytes[4] = {0xbadbad00, 0xbadbad00,
0xbadbad00, 0xbadbad00};
#if PA_HAS_BUILTIN(__builtin_assume_aligned)
void* slot_start_tagged = __builtin_assume_aligned(
internal::SlotStartAddr2Ptr(slot_start), internal::kAlignment);
#else
void* slot_start_tagged = internal::SlotStartAddr2Ptr(slot_start);
#endif
uint32_t* address_aligned = static_cast<uint32_t*>(slot_start_tagged);
for (int i = 0; i < slot_size_remaining_in_16_bytes; i++) {
memcpy(address_aligned, poison_16_bytes, sizeof(poison_16_bytes));
address_aligned += 4;
}
#endif
auto* entry = internal::PartitionFreelistEntry::EmplaceAndInitForThreadCache(
slot_start, bucket.freelist_head);
bucket.freelist_head = entry;
bucket.count++;
}
PA_ALWAYS_INLINE void ThreadCache::RecordAllocation(size_t size) {
thread_alloc_stats_.alloc_count++;
thread_alloc_stats_.alloc_total_size += size;
}
PA_ALWAYS_INLINE void ThreadCache::RecordDeallocation(size_t size) {
thread_alloc_stats_.dealloc_count++;
thread_alloc_stats_.dealloc_total_size += size;
}
}
#endif