1 //===-- sanitizer_allocator_size_class_map.h --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Part of the Sanitizer Allocator. 10 // 11 //===----------------------------------------------------------------------===// 12 #ifndef SANITIZER_ALLOCATOR_H 13 #error This file must be included inside sanitizer_allocator.h 14 #endif 15 16 // SizeClassMap maps allocation sizes into size classes and back. 17 // Class 0 always corresponds to size 0. 18 // The other sizes are controlled by the template parameters: 19 // kMinSizeLog: defines the class 1 as 2^kMinSizeLog. 20 // kMaxSizeLog: defines the last class as 2^kMaxSizeLog. 21 // kMidSizeLog: the classes starting from 1 increase with step 22 // 2^kMinSizeLog until 2^kMidSizeLog. 23 // kNumBits: the number of non-zero bits in sizes after 2^kMidSizeLog. 24 // E.g. with kNumBits==3 all size classes after 2^kMidSizeLog 25 // look like 0b1xx0..0, where x is either 0 or 1. 26 // 27 // Example: kNumBits=3, kMidSizeLog=4, kMidSizeLog=8, kMaxSizeLog=17: 28 // 29 // Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16). 30 // Next 4 classes: 256 + i * 64 (i = 1 to 4). 31 // Next 4 classes: 512 + i * 128 (i = 1 to 4). 32 // ... 33 // Next 4 classes: 2^k + i * 2^(k-2) (i = 1 to 4). 34 // Last class corresponds to kMaxSize = 1 << kMaxSizeLog. 35 // 36 // This structure of the size class map gives us: 37 // - Efficient table-free class-to-size and size-to-class functions. 38 // - Difference between two consequent size classes is between 14% and 25% 39 // 40 // This class also gives a hint to a thread-caching allocator about the amount 41 // of chunks that need to be cached per-thread: 42 // - kMaxNumCachedHint is a hint for maximal number of chunks per size class. 43 // The actual number is computed in TransferBatch. 44 // - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class. 45 // 46 // Part of output of SizeClassMap::Print(): 47 // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0 48 // c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1 49 // c02 => s: 32 diff: +16 100% l 5 cached: 256 8192; id 2 50 // c03 => s: 48 diff: +16 50% l 5 cached: 256 12288; id 3 51 // c04 => s: 64 diff: +16 33% l 6 cached: 256 16384; id 4 52 // c05 => s: 80 diff: +16 25% l 6 cached: 256 20480; id 5 53 // c06 => s: 96 diff: +16 20% l 6 cached: 256 24576; id 6 54 // c07 => s: 112 diff: +16 16% l 6 cached: 256 28672; id 7 55 // 56 // c08 => s: 128 diff: +16 14% l 7 cached: 256 32768; id 8 57 // c09 => s: 144 diff: +16 12% l 7 cached: 256 36864; id 9 58 // c10 => s: 160 diff: +16 11% l 7 cached: 256 40960; id 10 59 // c11 => s: 176 diff: +16 10% l 7 cached: 256 45056; id 11 60 // c12 => s: 192 diff: +16 09% l 7 cached: 256 49152; id 12 61 // c13 => s: 208 diff: +16 08% l 7 cached: 256 53248; id 13 62 // c14 => s: 224 diff: +16 07% l 7 cached: 256 57344; id 14 63 // c15 => s: 240 diff: +16 07% l 7 cached: 256 61440; id 15 64 // 65 // c16 => s: 256 diff: +16 06% l 8 cached: 256 65536; id 16 66 // c17 => s: 320 diff: +64 25% l 8 cached: 204 65280; id 17 67 // c18 => s: 384 diff: +64 20% l 8 cached: 170 65280; id 18 68 // c19 => s: 448 diff: +64 16% l 8 cached: 146 65408; id 19 69 // 70 // c20 => s: 512 diff: +64 14% l 9 cached: 128 65536; id 20 71 // c21 => s: 640 diff: +128 25% l 9 cached: 102 65280; id 21 72 // c22 => s: 768 diff: +128 20% l 9 cached: 85 65280; id 22 73 // c23 => s: 896 diff: +128 16% l 9 cached: 73 65408; id 23 74 // 75 // c24 => s: 1024 diff: +128 14% l 10 cached: 64 65536; id 24 76 // c25 => s: 1280 diff: +256 25% l 10 cached: 51 65280; id 25 77 // c26 => s: 1536 diff: +256 20% l 10 cached: 42 64512; id 26 78 // c27 => s: 1792 diff: +256 16% l 10 cached: 36 64512; id 27 79 // 80 // ... 81 // 82 // c48 => s: 65536 diff: +8192 14% l 16 cached: 1 65536; id 48 83 // c49 => s: 81920 diff: +16384 25% l 16 cached: 1 81920; id 49 84 // c50 => s: 98304 diff: +16384 20% l 16 cached: 1 98304; id 50 85 // c51 => s: 114688 diff: +16384 16% l 16 cached: 1 114688; id 51 86 // 87 // c52 => s: 131072 diff: +16384 14% l 17 cached: 1 131072; id 52 88 // 89 // 90 // Another example (kNumBits=2): 91 // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0 92 // c01 => s: 32 diff: +32 00% l 5 cached: 64 2048; id 1 93 // c02 => s: 64 diff: +32 100% l 6 cached: 64 4096; id 2 94 // c03 => s: 96 diff: +32 50% l 6 cached: 64 6144; id 3 95 // c04 => s: 128 diff: +32 33% l 7 cached: 64 8192; id 4 96 // c05 => s: 160 diff: +32 25% l 7 cached: 64 10240; id 5 97 // c06 => s: 192 diff: +32 20% l 7 cached: 64 12288; id 6 98 // c07 => s: 224 diff: +32 16% l 7 cached: 64 14336; id 7 99 // c08 => s: 256 diff: +32 14% l 8 cached: 64 16384; id 8 100 // c09 => s: 384 diff: +128 50% l 8 cached: 42 16128; id 9 101 // c10 => s: 512 diff: +128 33% l 9 cached: 32 16384; id 10 102 // c11 => s: 768 diff: +256 50% l 9 cached: 21 16128; id 11 103 // c12 => s: 1024 diff: +256 33% l 10 cached: 16 16384; id 12 104 // c13 => s: 1536 diff: +512 50% l 10 cached: 10 15360; id 13 105 // c14 => s: 2048 diff: +512 33% l 11 cached: 8 16384; id 14 106 // c15 => s: 3072 diff: +1024 50% l 11 cached: 5 15360; id 15 107 // c16 => s: 4096 diff: +1024 33% l 12 cached: 4 16384; id 16 108 // c17 => s: 6144 diff: +2048 50% l 12 cached: 2 12288; id 17 109 // c18 => s: 8192 diff: +2048 33% l 13 cached: 2 16384; id 18 110 // c19 => s: 12288 diff: +4096 50% l 13 cached: 1 12288; id 19 111 // c20 => s: 16384 diff: +4096 33% l 14 cached: 1 16384; id 20 112 // c21 => s: 24576 diff: +8192 50% l 14 cached: 1 24576; id 21 113 // c22 => s: 32768 diff: +8192 33% l 15 cached: 1 32768; id 22 114 // c23 => s: 49152 diff: +16384 50% l 15 cached: 1 49152; id 23 115 // c24 => s: 65536 diff: +16384 33% l 16 cached: 1 65536; id 24 116 // c25 => s: 98304 diff: +32768 50% l 16 cached: 1 98304; id 25 117 // c26 => s: 131072 diff: +32768 33% l 17 cached: 1 131072; id 26 118 119 template <uptr kNumBits, uptr kMinSizeLog, uptr kMidSizeLog, uptr kMaxSizeLog, 120 uptr kMaxNumCachedHintT, uptr kMaxBytesCachedLog> 121 class SizeClassMap { 122 static const uptr kMinSize = 1 << kMinSizeLog; 123 static const uptr kMidSize = 1 << kMidSizeLog; 124 static const uptr kMidClass = kMidSize / kMinSize; 125 static const uptr S = kNumBits - 1; 126 static const uptr M = (1 << S) - 1; 127 128 public: 129 // kMaxNumCachedHintT is a power of two. It serves as a hint 130 // for the size of TransferBatch, the actual size could be a bit smaller. 131 static const uptr kMaxNumCachedHint = kMaxNumCachedHintT; 132 COMPILER_CHECK((kMaxNumCachedHint & (kMaxNumCachedHint - 1)) == 0); 133 134 static const uptr kMaxSize = 1UL << kMaxSizeLog; 135 static const uptr kNumClasses = 136 kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1 + 1; 137 static const uptr kLargestClassID = kNumClasses - 2; 138 static const uptr kBatchClassID = kNumClasses - 1; 139 COMPILER_CHECK(kNumClasses >= 16 && kNumClasses <= 256); 140 static const uptr kNumClassesRounded = 141 kNumClasses <= 32 ? 32 : 142 kNumClasses <= 64 ? 64 : 143 kNumClasses <= 128 ? 128 : 256; 144 Size(uptr class_id)145 static uptr Size(uptr class_id) { 146 // Estimate the result for kBatchClassID because this class does not know 147 // the exact size of TransferBatch. It's OK since we are using the actual 148 // sizeof(TransferBatch) where it matters. 149 if (UNLIKELY(class_id == kBatchClassID)) 150 return kMaxNumCachedHint * sizeof(uptr); 151 if (class_id <= kMidClass) 152 return kMinSize * class_id; 153 class_id -= kMidClass; 154 uptr t = kMidSize << (class_id >> S); 155 return t + (t >> S) * (class_id & M); 156 } 157 ClassID(uptr size)158 static uptr ClassID(uptr size) { 159 if (UNLIKELY(size > kMaxSize)) 160 return 0; 161 if (size <= kMidSize) 162 return (size + kMinSize - 1) >> kMinSizeLog; 163 const uptr l = MostSignificantSetBitIndex(size); 164 const uptr hbits = (size >> (l - S)) & M; 165 const uptr lbits = size & ((1U << (l - S)) - 1); 166 const uptr l1 = l - kMidSizeLog; 167 return kMidClass + (l1 << S) + hbits + (lbits > 0); 168 } 169 MaxCachedHint(uptr size)170 static uptr MaxCachedHint(uptr size) { 171 DCHECK_LE(size, kMaxSize); 172 if (UNLIKELY(size == 0)) 173 return 0; 174 uptr n; 175 // Force a 32-bit division if the template parameters allow for it. 176 if (kMaxBytesCachedLog > 31 || kMaxSizeLog > 31) 177 n = (1UL << kMaxBytesCachedLog) / size; 178 else 179 n = (1U << kMaxBytesCachedLog) / static_cast<u32>(size); 180 return Max<uptr>(1U, Min(kMaxNumCachedHint, n)); 181 } 182 Print()183 static void Print() { 184 uptr prev_s = 0; 185 uptr total_cached = 0; 186 for (uptr i = 0; i < kNumClasses; i++) { 187 uptr s = Size(i); 188 if (s >= kMidSize / 2 && (s & (s - 1)) == 0) 189 Printf("\n"); 190 uptr d = s - prev_s; 191 uptr p = prev_s ? (d * 100 / prev_s) : 0; 192 uptr l = s ? MostSignificantSetBitIndex(s) : 0; 193 uptr cached = MaxCachedHint(s) * s; 194 if (i == kBatchClassID) 195 d = p = l = 0; 196 Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd " 197 "cached: %zd %zd; id %zd\n", 198 i, Size(i), d, p, l, MaxCachedHint(s), cached, ClassID(s)); 199 total_cached += cached; 200 prev_s = s; 201 } 202 Printf("Total cached: %zd\n", total_cached); 203 } 204 Validate()205 static void Validate() { 206 for (uptr c = 1; c < kNumClasses; c++) { 207 // Printf("Validate: c%zd\n", c); 208 uptr s = Size(c); 209 CHECK_NE(s, 0U); 210 if (c == kBatchClassID) 211 continue; 212 CHECK_EQ(ClassID(s), c); 213 if (c < kLargestClassID) 214 CHECK_EQ(ClassID(s + 1), c + 1); 215 CHECK_EQ(ClassID(s - 1), c); 216 CHECK_GT(Size(c), Size(c - 1)); 217 } 218 CHECK_EQ(ClassID(kMaxSize + 1), 0); 219 220 for (uptr s = 1; s <= kMaxSize; s++) { 221 uptr c = ClassID(s); 222 // Printf("s%zd => c%zd\n", s, c); 223 CHECK_LT(c, kNumClasses); 224 CHECK_GE(Size(c), s); 225 if (c > 0) 226 CHECK_LT(Size(c - 1), s); 227 } 228 } 229 }; 230 231 typedef SizeClassMap<3, 4, 8, 17, 128, 16> DefaultSizeClassMap; 232 typedef SizeClassMap<3, 4, 8, 17, 64, 14> CompactSizeClassMap; 233 typedef SizeClassMap<2, 5, 9, 16, 64, 14> VeryCompactSizeClassMap; 234 235 // The following SizeClassMap only holds a way small number of cached entries, 236 // allowing for denser per-class arrays, smaller memory footprint and usually 237 // better performances in threaded environments. 238 typedef SizeClassMap<3, 4, 8, 17, 8, 10> DenseSizeClassMap; 239 // Similar to VeryCompact map above, this one has a small number of different 240 // size classes, and also reduced thread-local caches. 241 typedef SizeClassMap<2, 5, 9, 16, 8, 10> VeryDenseSizeClassMap; 242