1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_ 18 #define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_ 19 20 #include <stdint.h> 21 #include <stdlib.h> 22 #include <sys/mman.h> 23 #include <memory> 24 #include <set> 25 #include <string> 26 #include <unordered_set> 27 #include <vector> 28 29 #include <android-base/logging.h> 30 31 #include "base/allocator.h" 32 #include "base/bit_utils.h" 33 #include "base/mutex.h" 34 #include "globals.h" 35 #include "thread.h" 36 37 namespace art { 38 39 class MemMap; 40 41 namespace gc { 42 namespace allocator { 43 44 // A runs-of-slots memory allocator. 45 class RosAlloc { 46 private: 47 // Represents a run of free pages. 48 class FreePageRun { 49 public: 50 uint8_t magic_num_; // The magic number used for debugging only. 51 IsFree()52 bool IsFree() const { 53 return !kIsDebugBuild || magic_num_ == kMagicNumFree; 54 } ByteSize(RosAlloc * rosalloc)55 size_t ByteSize(RosAlloc* rosalloc) const REQUIRES(rosalloc->lock_) { 56 const uint8_t* fpr_base = reinterpret_cast<const uint8_t*>(this); 57 size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base); 58 size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx]; 59 DCHECK_GE(byte_size, static_cast<size_t>(0)); 60 DCHECK_ALIGNED(byte_size, kPageSize); 61 return byte_size; 62 } SetByteSize(RosAlloc * rosalloc,size_t byte_size)63 void SetByteSize(RosAlloc* rosalloc, size_t byte_size) 64 REQUIRES(rosalloc->lock_) { 65 DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0)); 66 uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this); 67 size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base); 68 rosalloc->free_page_run_size_map_[pm_idx] = byte_size; 69 } Begin()70 void* Begin() { 71 return reinterpret_cast<void*>(this); 72 } End(RosAlloc * rosalloc)73 void* End(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { 74 uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this); 75 uint8_t* end = fpr_base + ByteSize(rosalloc); 76 return end; 77 } IsLargerThanPageReleaseThreshold(RosAlloc * rosalloc)78 bool IsLargerThanPageReleaseThreshold(RosAlloc* rosalloc) 79 REQUIRES(rosalloc->lock_) { 80 return ByteSize(rosalloc) >= rosalloc->page_release_size_threshold_; 81 } IsAtEndOfSpace(RosAlloc * rosalloc)82 bool IsAtEndOfSpace(RosAlloc* rosalloc) 83 REQUIRES(rosalloc->lock_) { 84 return reinterpret_cast<uint8_t*>(this) + ByteSize(rosalloc) == rosalloc->base_ + rosalloc->footprint_; 85 } ShouldReleasePages(RosAlloc * rosalloc)86 bool ShouldReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { 87 switch (rosalloc->page_release_mode_) { 88 case kPageReleaseModeNone: 89 return false; 90 case kPageReleaseModeEnd: 91 return IsAtEndOfSpace(rosalloc); 92 case kPageReleaseModeSize: 93 return IsLargerThanPageReleaseThreshold(rosalloc); 94 case kPageReleaseModeSizeAndEnd: 95 return IsLargerThanPageReleaseThreshold(rosalloc) && IsAtEndOfSpace(rosalloc); 96 case kPageReleaseModeAll: 97 return true; 98 default: 99 LOG(FATAL) << "Unexpected page release mode "; 100 return false; 101 } 102 } ReleasePages(RosAlloc * rosalloc)103 void ReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { 104 uint8_t* start = reinterpret_cast<uint8_t*>(this); 105 size_t byte_size = ByteSize(rosalloc); 106 DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0)); 107 if (ShouldReleasePages(rosalloc)) { 108 rosalloc->ReleasePageRange(start, start + byte_size); 109 } 110 } 111 112 private: 113 DISALLOW_COPY_AND_ASSIGN(FreePageRun); 114 }; 115 116 // The slot header. 117 class Slot { 118 public: Next()119 Slot* Next() const { 120 return next_; 121 } SetNext(Slot * next)122 void SetNext(Slot* next) { 123 next_ = next; 124 } 125 // The slot right before this slot in terms of the address. Left(size_t bracket_size)126 Slot* Left(size_t bracket_size) { 127 return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size); 128 } Clear()129 void Clear() { 130 next_ = nullptr; 131 } 132 133 private: 134 Slot* next_; // Next slot in the list. 135 friend class RosAlloc; 136 }; 137 138 // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to 139 // traverse the list from the head to the tail when merging free lists. 140 // We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the 141 // tail in the allocation fast path for a performance reason. 142 template<bool kUseTail = true> 143 class SlotFreeList { 144 public: SlotFreeList()145 SlotFreeList() : head_(0U), tail_(0), size_(0), padding_(0) {} Head()146 Slot* Head() const { 147 return reinterpret_cast<Slot*>(head_); 148 } Tail()149 Slot* Tail() const { 150 CHECK(kUseTail); 151 return reinterpret_cast<Slot*>(tail_); 152 } Size()153 size_t Size() const { 154 return size_; 155 } 156 // Removes from the head of the free list. Remove()157 Slot* Remove() { 158 Slot* slot; 159 if (kIsDebugBuild) { 160 Verify(); 161 } 162 Slot** headp = reinterpret_cast<Slot**>(&head_); 163 Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; 164 Slot* old_head = *headp; 165 if (old_head == nullptr) { 166 // List was empty. 167 if (kUseTail) { 168 DCHECK(*tailp == nullptr); 169 } 170 return nullptr; 171 } else { 172 // List wasn't empty. 173 if (kUseTail) { 174 DCHECK(*tailp != nullptr); 175 } 176 Slot* old_head_next = old_head->Next(); 177 slot = old_head; 178 *headp = old_head_next; 179 if (kUseTail && old_head_next == nullptr) { 180 // List becomes empty. 181 *tailp = nullptr; 182 } 183 } 184 slot->Clear(); 185 --size_; 186 if (kIsDebugBuild) { 187 Verify(); 188 } 189 return slot; 190 } Add(Slot * slot)191 void Add(Slot* slot) { 192 if (kIsDebugBuild) { 193 Verify(); 194 } 195 DCHECK(slot != nullptr); 196 DCHECK(slot->Next() == nullptr); 197 Slot** headp = reinterpret_cast<Slot**>(&head_); 198 Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; 199 Slot* old_head = *headp; 200 if (old_head == nullptr) { 201 // List was empty. 202 if (kUseTail) { 203 DCHECK(*tailp == nullptr); 204 } 205 *headp = slot; 206 if (kUseTail) { 207 *tailp = slot; 208 } 209 } else { 210 // List wasn't empty. 211 if (kUseTail) { 212 DCHECK(*tailp != nullptr); 213 } 214 *headp = slot; 215 slot->SetNext(old_head); 216 } 217 ++size_; 218 if (kIsDebugBuild) { 219 Verify(); 220 } 221 } 222 // Merge the given list into this list. Empty the given list. 223 // Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't 224 // currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2) 225 // supporting the kUseTail == false parameter would require a O(n) linked list traversal to do 226 // the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid. Merge(SlotFreeList<true> * list)227 void Merge(SlotFreeList<true>* list) { 228 if (kIsDebugBuild) { 229 Verify(); 230 CHECK(list != nullptr); 231 list->Verify(); 232 } 233 if (list->Size() == 0) { 234 return; 235 } 236 Slot** headp = reinterpret_cast<Slot**>(&head_); 237 Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; 238 Slot* old_head = *headp; 239 if (old_head == nullptr) { 240 // List was empty. 241 *headp = list->Head(); 242 if (kUseTail) { 243 *tailp = list->Tail(); 244 } 245 size_ = list->Size(); 246 } else { 247 // List wasn't empty. 248 DCHECK(list->Head() != nullptr); 249 *headp = list->Head(); 250 DCHECK(list->Tail() != nullptr); 251 list->Tail()->SetNext(old_head); 252 // if kUseTail, no change to tailp. 253 size_ += list->Size(); 254 } 255 list->Reset(); 256 if (kIsDebugBuild) { 257 Verify(); 258 } 259 } 260 Reset()261 void Reset() { 262 head_ = 0; 263 if (kUseTail) { 264 tail_ = 0; 265 } 266 size_ = 0; 267 } 268 Verify()269 void Verify() { 270 Slot* head = reinterpret_cast<Slot*>(head_); 271 Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr; 272 if (size_ == 0) { 273 CHECK(head == nullptr); 274 if (kUseTail) { 275 CHECK(tail == nullptr); 276 } 277 } else { 278 CHECK(head != nullptr); 279 if (kUseTail) { 280 CHECK(tail != nullptr); 281 } 282 size_t count = 0; 283 for (Slot* slot = head; slot != nullptr; slot = slot->Next()) { 284 ++count; 285 if (kUseTail && slot->Next() == nullptr) { 286 CHECK_EQ(slot, tail); 287 } 288 } 289 CHECK_EQ(size_, count); 290 } 291 } 292 293 private: 294 // A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same 295 // layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1) 296 // uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the 297 // future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise 298 // (won't open up enough space to cause an extra slot to be available). 299 uint64_t head_; 300 // A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same 301 // layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists. 302 // Unused if kUseTail is false. 303 uint64_t tail_; 304 // The number of slots in the list. This is used to make it fast to check if a free list is all 305 // free without traversing the whole free list. 306 uint32_t size_; 307 uint32_t padding_ ATTRIBUTE_UNUSED; 308 friend class RosAlloc; 309 }; 310 311 // Represents a run of memory slots of the same size. 312 // 313 // A run's memory layout: 314 // 315 // +-------------------+ 316 // | magic_num | 317 // +-------------------+ 318 // | size_bracket_idx | 319 // +-------------------+ 320 // | is_thread_local | 321 // +-------------------+ 322 // | to_be_bulk_freed | 323 // +-------------------+ 324 // | | 325 // | free list | 326 // | | 327 // +-------------------+ 328 // | | 329 // | bulk free list | 330 // | | 331 // +-------------------+ 332 // | | 333 // | thread-local free | 334 // | list | 335 // | | 336 // +-------------------+ 337 // | padding due to | 338 // | alignment | 339 // +-------------------+ 340 // | slot 0 | 341 // +-------------------+ 342 // | slot 1 | 343 // +-------------------+ 344 // | slot 2 | 345 // +-------------------+ 346 // ... 347 // +-------------------+ 348 // | last slot | 349 // +-------------------+ 350 // 351 class Run { 352 public: 353 uint8_t magic_num_; // The magic number used for debugging. 354 uint8_t size_bracket_idx_; // The index of the size bracket of this run. 355 uint8_t is_thread_local_; // True if this run is used as a thread-local run. 356 uint8_t to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with a bulk free. 357 uint32_t padding_ ATTRIBUTE_UNUSED; 358 // Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail. 359 SlotFreeList<false> free_list_; 360 SlotFreeList<true> bulk_free_list_; 361 SlotFreeList<true> thread_local_free_list_; 362 // Padding due to alignment 363 // Slot 0 364 // Slot 1 365 // ... 366 367 // Returns the byte size of the header. fixed_header_size()368 static size_t fixed_header_size() { 369 return sizeof(Run); 370 } FirstSlot()371 Slot* FirstSlot() const { 372 const uint8_t idx = size_bracket_idx_; 373 return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]); 374 } LastSlot()375 Slot* LastSlot() { 376 const uint8_t idx = size_bracket_idx_; 377 const size_t bracket_size = bracketSizes[idx]; 378 uintptr_t end = reinterpret_cast<uintptr_t>(End()); 379 Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size); 380 DCHECK_LE(FirstSlot(), last_slot); 381 return last_slot; 382 } FreeList()383 SlotFreeList<false>* FreeList() { 384 return &free_list_; 385 } BulkFreeList()386 SlotFreeList<true>* BulkFreeList() { 387 return &bulk_free_list_; 388 } ThreadLocalFreeList()389 SlotFreeList<true>* ThreadLocalFreeList() { 390 return &thread_local_free_list_; 391 } End()392 void* End() { 393 return reinterpret_cast<uint8_t*>(this) + kPageSize * numOfPages[size_bracket_idx_]; 394 } SetIsThreadLocal(bool is_thread_local)395 void SetIsThreadLocal(bool is_thread_local) { 396 is_thread_local_ = is_thread_local ? 1 : 0; 397 } IsThreadLocal()398 bool IsThreadLocal() const { 399 return is_thread_local_ != 0; 400 } 401 // Set up the free list for a new/empty run. InitFreeList()402 void InitFreeList() { 403 const uint8_t idx = size_bracket_idx_; 404 const size_t bracket_size = bracketSizes[idx]; 405 Slot* first_slot = FirstSlot(); 406 // Add backwards so the first slot is at the head of the list. 407 for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) { 408 free_list_.Add(slot); 409 } 410 } 411 // Merge the thread local free list to the free list. Used when a thread-local run becomes 412 // full. 413 bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out); 414 // Merge the bulk free list to the free list. Used in a bulk free. 415 void MergeBulkFreeListToFreeList(); 416 // Merge the bulk free list to the thread local free list. In a bulk free, as a two-step 417 // process, GC will first record all the slots to free in a run in the bulk free list where it 418 // can write without a lock, and later acquire a lock once per run to merge the bulk free list 419 // to the thread-local free list. 420 void MergeBulkFreeListToThreadLocalFreeList(); 421 // Allocates a slot in a run. 422 ALWAYS_INLINE void* AllocSlot(); 423 // Frees a slot in a run. This is used in a non-bulk free. 424 void FreeSlot(void* ptr); 425 // Add the given slot to the bulk free list. Returns the bracket size. 426 size_t AddToBulkFreeList(void* ptr); 427 // Add the given slot to the thread-local free list. 428 void AddToThreadLocalFreeList(void* ptr); 429 // Returns true if all the slots in the run are not in use. IsAllFree()430 bool IsAllFree() const { 431 return free_list_.Size() == numOfSlots[size_bracket_idx_]; 432 } 433 // Returns the number of free slots. NumberOfFreeSlots()434 size_t NumberOfFreeSlots() { 435 return free_list_.Size(); 436 } 437 // Returns true if all the slots in the run are in use. 438 ALWAYS_INLINE bool IsFull(); 439 // Returns true if the bulk free list is empty. IsBulkFreeListEmpty()440 bool IsBulkFreeListEmpty() const { 441 return bulk_free_list_.Size() == 0; 442 } 443 // Returns true if the thread local free list is empty. IsThreadLocalFreeListEmpty()444 bool IsThreadLocalFreeListEmpty() const { 445 return thread_local_free_list_.Size() == 0; 446 } 447 // Zero the run's data. 448 void ZeroData(); 449 // Zero the run's header and the slot headers. 450 void ZeroHeaderAndSlotHeaders(); 451 // Iterate over all the slots and apply the given function. 452 void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg); 453 // Dump the run metadata for debugging. 454 std::string Dump(); 455 // Verify for debugging. 456 void Verify(Thread* self, RosAlloc* rosalloc, bool running_on_memory_tool) 457 REQUIRES(Locks::mutator_lock_) 458 REQUIRES(Locks::thread_list_lock_); 459 460 private: 461 // The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket 462 // size. 463 size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name); 464 // Turns a FreeList into a string for debugging. 465 template<bool kUseTail> 466 std::string FreeListToStr(SlotFreeList<kUseTail>* free_list); 467 // Check a given pointer is a valid slot address and return it as Slot*. ToSlot(void * ptr)468 Slot* ToSlot(void* ptr) { 469 const uint8_t idx = size_bracket_idx_; 470 const size_t bracket_size = bracketSizes[idx]; 471 const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr) 472 - reinterpret_cast<uint8_t*>(FirstSlot()); 473 DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0)); 474 size_t slot_idx = offset_from_slot_base / bracket_size; 475 DCHECK_LT(slot_idx, numOfSlots[idx]); 476 return reinterpret_cast<Slot*>(ptr); 477 } SlotIndex(Slot * slot)478 size_t SlotIndex(Slot* slot) const { 479 const uint8_t idx = size_bracket_idx_; 480 const size_t bracket_size = bracketSizes[idx]; 481 const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot) 482 - reinterpret_cast<uint8_t*>(FirstSlot()); 483 DCHECK_EQ(offset_from_slot_base % bracket_size, 0U); 484 size_t slot_idx = offset_from_slot_base / bracket_size; 485 DCHECK_LT(slot_idx, numOfSlots[idx]); 486 return slot_idx; 487 } 488 489 // TODO: DISALLOW_COPY_AND_ASSIGN(Run); 490 }; 491 492 // The magic number for a run. 493 static constexpr uint8_t kMagicNum = 42; 494 // The magic number for free pages. 495 static constexpr uint8_t kMagicNumFree = 43; 496 // The number of size brackets. 497 static constexpr size_t kNumOfSizeBrackets = 42; 498 // The sizes (the slot sizes, in bytes) of the size brackets. 499 static size_t bracketSizes[kNumOfSizeBrackets]; 500 // The numbers of pages that are used for runs for each size bracket. 501 static size_t numOfPages[kNumOfSizeBrackets]; 502 // The numbers of slots of the runs for each size bracket. 503 static size_t numOfSlots[kNumOfSizeBrackets]; 504 // The header sizes in bytes of the runs for each size bracket. 505 static size_t headerSizes[kNumOfSizeBrackets]; 506 507 // Initialize the run specs (the above arrays). 508 static void Initialize(); 509 static bool initialized_; 510 511 // Returns the byte size of the bracket size from the index. IndexToBracketSize(size_t idx)512 static size_t IndexToBracketSize(size_t idx) { 513 DCHECK_LT(idx, kNumOfSizeBrackets); 514 return bracketSizes[idx]; 515 } 516 // Returns the index of the size bracket from the bracket size. BracketSizeToIndex(size_t size)517 static size_t BracketSizeToIndex(size_t size) { 518 DCHECK(8 <= size && 519 ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) || 520 (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) || 521 size == 1 * KB || size == 2 * KB)); 522 size_t idx; 523 if (UNLIKELY(size == 1 * KB)) { 524 idx = kNumOfSizeBrackets - 2; 525 } else if (UNLIKELY(size == 2 * KB)) { 526 idx = kNumOfSizeBrackets - 1; 527 } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 528 DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U); 529 idx = size / kThreadLocalBracketQuantumSize - 1; 530 } else { 531 DCHECK(size <= kMaxRegularBracketSize); 532 DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U); 533 idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1) 534 + kNumThreadLocalSizeBrackets; 535 } 536 DCHECK(bracketSizes[idx] == size); 537 return idx; 538 } 539 // Returns true if the given allocation size is for a thread local allocation. IsSizeForThreadLocal(size_t size)540 static bool IsSizeForThreadLocal(size_t size) { 541 bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize; 542 DCHECK(size > kLargeSizeThreshold || 543 (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets))); 544 return is_size_for_thread_local; 545 } 546 // Rounds up the size up the nearest bracket size. RoundToBracketSize(size_t size)547 static size_t RoundToBracketSize(size_t size) { 548 DCHECK(size <= kLargeSizeThreshold); 549 if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 550 return RoundUp(size, kThreadLocalBracketQuantumSize); 551 } else if (size <= kMaxRegularBracketSize) { 552 return RoundUp(size, kBracketQuantumSize); 553 } else if (UNLIKELY(size <= 1 * KB)) { 554 return 1 * KB; 555 } else { 556 DCHECK_LE(size, 2 * KB); 557 return 2 * KB; 558 } 559 } 560 // Returns the size bracket index from the byte size with rounding. SizeToIndex(size_t size)561 static size_t SizeToIndex(size_t size) { 562 DCHECK(size <= kLargeSizeThreshold); 563 if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 564 return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1; 565 } else if (size <= kMaxRegularBracketSize) { 566 return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize 567 - 1 + kNumThreadLocalSizeBrackets; 568 } else if (size <= 1 * KB) { 569 return kNumOfSizeBrackets - 2; 570 } else { 571 DCHECK_LE(size, 2 * KB); 572 return kNumOfSizeBrackets - 1; 573 } 574 } 575 // A combination of SizeToIndex() and RoundToBracketSize(). SizeToIndexAndBracketSize(size_t size,size_t * bracket_size_out)576 static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) { 577 DCHECK(size <= kLargeSizeThreshold); 578 size_t idx; 579 size_t bracket_size; 580 if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 581 bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize); 582 idx = bracket_size / kThreadLocalBracketQuantumSize - 1; 583 } else if (size <= kMaxRegularBracketSize) { 584 bracket_size = RoundUp(size, kBracketQuantumSize); 585 idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1) 586 + kNumThreadLocalSizeBrackets; 587 } else if (size <= 1 * KB) { 588 bracket_size = 1 * KB; 589 idx = kNumOfSizeBrackets - 2; 590 } else { 591 DCHECK(size <= 2 * KB); 592 bracket_size = 2 * KB; 593 idx = kNumOfSizeBrackets - 1; 594 } 595 DCHECK_EQ(idx, SizeToIndex(size)) << idx; 596 DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx; 597 DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx; 598 DCHECK_LE(size, bracket_size) << idx; 599 DCHECK(size > kMaxRegularBracketSize || 600 (size <= kMaxThreadLocalBracketSize && 601 bracket_size - size < kThreadLocalBracketQuantumSize) || 602 (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx; 603 *bracket_size_out = bracket_size; 604 return idx; 605 } 606 607 // Returns the page map index from an address. Requires that the 608 // address is page size aligned. ToPageMapIndex(const void * addr)609 size_t ToPageMapIndex(const void* addr) const { 610 DCHECK_LE(base_, addr); 611 DCHECK_LT(addr, base_ + capacity_); 612 size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_; 613 DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0)); 614 return byte_offset / kPageSize; 615 } 616 // Returns the page map index from an address with rounding. RoundDownToPageMapIndex(const void * addr)617 size_t RoundDownToPageMapIndex(const void* addr) const { 618 DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_); 619 return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize; 620 } 621 622 // A memory allocation request larger than this size is treated as a large object and allocated 623 // at a page-granularity. 624 static const size_t kLargeSizeThreshold = 2048; 625 626 // If true, check that the returned memory is actually zero. 627 static constexpr bool kCheckZeroMemory = kIsDebugBuild; 628 // Valgrind protects memory, so do not check memory when running under valgrind. In a normal 629 // build with kCheckZeroMemory the whole test should be optimized away. 630 // TODO: Unprotect before checks. 631 ALWAYS_INLINE bool ShouldCheckZeroMemory(); 632 633 // If true, log verbose details of operations. 634 static constexpr bool kTraceRosAlloc = false; 635 636 struct hash_run { operatorhash_run637 size_t operator()(const RosAlloc::Run* r) const { 638 return reinterpret_cast<size_t>(r); 639 } 640 }; 641 642 struct eq_run { operatoreq_run643 bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const { 644 return r1 == r2; 645 } 646 }; 647 648 public: 649 // Different page release modes. 650 enum PageReleaseMode { 651 kPageReleaseModeNone, // Release no empty pages. 652 kPageReleaseModeEnd, // Release empty pages at the end of the space. 653 kPageReleaseModeSize, // Release empty pages that are larger than the threshold. 654 kPageReleaseModeSizeAndEnd, // Release empty pages that are larger than the threshold or 655 // at the end of the space. 656 kPageReleaseModeAll, // Release all empty pages. 657 }; 658 659 // The default value for page_release_size_threshold_. 660 static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB; 661 662 // We use thread-local runs for the size brackets whose indexes 663 // are less than this index. We use shared (current) runs for the rest. 664 // Sync this with the length of Thread::rosalloc_runs_. 665 static const size_t kNumThreadLocalSizeBrackets = 16; 666 static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread, 667 "Mismatch between kNumThreadLocalSizeBrackets and " 668 "kNumRosAllocThreadLocalSizeBracketsInThread"); 669 670 // The size of the largest bracket we use thread-local runs for. 671 // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1]. 672 static const size_t kMaxThreadLocalBracketSize = 128; 673 674 // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than 675 // this index. 676 static const size_t kNumRegularSizeBrackets = 40; 677 678 // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the 679 // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1]. 680 static const size_t kMaxRegularBracketSize = 512; 681 682 // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes). 683 static constexpr size_t kThreadLocalBracketQuantumSize = 8; 684 685 // Equal to Log2(kThreadLocalBracketQuantumSize). 686 static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3; 687 688 // The bracket size increment for the non-thread-local, regular brackets (of size <= 689 // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes). 690 static constexpr size_t kBracketQuantumSize = 16; 691 692 // Equal to Log2(kBracketQuantumSize). 693 static constexpr size_t kBracketQuantumSizeShift = 4; 694 695 private: 696 // The base address of the memory region that's managed by this allocator. 697 uint8_t* base_; 698 699 // The footprint in bytes of the currently allocated portion of the 700 // memory region. 701 size_t footprint_; 702 703 // The maximum footprint. The address, base_ + capacity_, indicates 704 // the end of the memory region that's currently managed by this allocator. 705 size_t capacity_; 706 707 // The maximum capacity. The address, base_ + max_capacity_, indicates 708 // the end of the memory region that's ever managed by this allocator. 709 size_t max_capacity_; 710 711 template<class Key, AllocatorTag kTag, class Compare = std::less<Key>> 712 using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>; 713 714 // The run sets that hold the runs whose slots are not all 715 // full. non_full_runs_[i] is guarded by size_bracket_locks_[i]. 716 AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets]; 717 // The run sets that hold the runs whose slots are all full. This is 718 // debug only. full_runs_[i] is guarded by size_bracket_locks_[i]. 719 std::unordered_set<Run*, hash_run, eq_run, TrackingAllocator<Run*, kAllocatorTagRosAlloc>> 720 full_runs_[kNumOfSizeBrackets]; 721 // The set of free pages. 722 AllocationTrackingSet<FreePageRun*, kAllocatorTagRosAlloc> free_page_runs_ GUARDED_BY(lock_); 723 // The dedicated full run, it is always full and shared by all threads when revoking happens. 724 // This is an optimization since enables us to avoid a null check for revoked runs. 725 static Run* dedicated_full_run_; 726 // Using size_t to ensure that it is at least word aligned. 727 static size_t dedicated_full_run_storage_[]; 728 // The current runs where the allocations are first attempted for 729 // the size brackes that do not use thread-local 730 // runs. current_runs_[i] is guarded by size_bracket_locks_[i]. 731 Run* current_runs_[kNumOfSizeBrackets]; 732 // The mutexes, one per size bracket. 733 Mutex* size_bracket_locks_[kNumOfSizeBrackets]; 734 // Bracket lock names (since locks only have char* names). 735 std::string size_bracket_lock_names_[kNumOfSizeBrackets]; 736 // The types of page map entries. 737 enum PageMapKind { 738 kPageMapReleased = 0, // Zero and released back to the OS. 739 kPageMapEmpty, // Zero but probably dirty. 740 kPageMapRun, // The beginning of a run. 741 kPageMapRunPart, // The non-beginning part of a run. 742 kPageMapLargeObject, // The beginning of a large object. 743 kPageMapLargeObjectPart, // The non-beginning part of a large object. 744 }; 745 // The table that indicates what pages are currently used for. 746 volatile uint8_t* page_map_; // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree. 747 size_t page_map_size_; 748 size_t max_page_map_size_; 749 std::unique_ptr<MemMap> page_map_mem_map_; 750 751 // The table that indicates the size of free page runs. These sizes 752 // are stored here to avoid storing in the free page header and 753 // release backing pages. 754 std::vector<size_t, TrackingAllocator<size_t, kAllocatorTagRosAlloc>> free_page_run_size_map_ 755 GUARDED_BY(lock_); 756 // The global lock. Used to guard the page map, the free page set, 757 // and the footprint. 758 Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; 759 // The reader-writer lock to allow one bulk free at a time while 760 // allowing multiple individual frees at the same time. Also, this 761 // is used to avoid race conditions between BulkFree() and 762 // RevokeThreadLocalRuns() on the bulk free list. 763 ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; 764 765 // The page release mode. 766 const PageReleaseMode page_release_mode_; 767 // Under kPageReleaseModeSize(AndEnd), if the free page run size is 768 // greater than or equal to this value, release pages. 769 const size_t page_release_size_threshold_; 770 771 // Whether this allocator is running under Valgrind. 772 bool is_running_on_memory_tool_; 773 774 // The base address of the memory region that's managed by this allocator. Begin()775 uint8_t* Begin() { return base_; } 776 // The end address of the memory region that's managed by this allocator. End()777 uint8_t* End() { return base_ + capacity_; } 778 779 // Page-granularity alloc/free 780 void* AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type) 781 REQUIRES(lock_); 782 // Returns how many bytes were freed. 783 size_t FreePages(Thread* self, void* ptr, bool already_zero) REQUIRES(lock_); 784 785 // Allocate/free a run slot. 786 void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, 787 size_t* bytes_tl_bulk_allocated) 788 REQUIRES(!lock_); 789 // Allocate/free a run slot without acquiring locks. 790 // TODO: REQUIRES(Locks::mutator_lock_) 791 void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated, 792 size_t* usable_size, size_t* bytes_tl_bulk_allocated) 793 REQUIRES(!lock_); 794 void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx) REQUIRES(!lock_); 795 796 // Returns the bracket size. 797 size_t FreeFromRun(Thread* self, void* ptr, Run* run) 798 REQUIRES(!lock_); 799 800 // Used to allocate a new thread local run for a size bracket. 801 Run* AllocRun(Thread* self, size_t idx) REQUIRES(!lock_); 802 803 // Used to acquire a new/reused run for a size bracket. Used when a 804 // thread-local or current run gets full. 805 Run* RefillRun(Thread* self, size_t idx) REQUIRES(!lock_); 806 807 // The internal of non-bulk Free(). 808 size_t FreeInternal(Thread* self, void* ptr) REQUIRES(!lock_); 809 810 // Allocates large objects. 811 void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated, 812 size_t* usable_size, size_t* bytes_tl_bulk_allocated) 813 REQUIRES(!lock_); 814 815 // Revoke a run by adding it to non_full_runs_ or freeing the pages. 816 void RevokeRun(Thread* self, size_t idx, Run* run) REQUIRES(!lock_); 817 818 // Revoke the current runs which share an index with the thread local runs. 819 void RevokeThreadUnsafeCurrentRuns() REQUIRES(!lock_); 820 821 // Release a range of pages. 822 size_t ReleasePageRange(uint8_t* start, uint8_t* end) REQUIRES(lock_); 823 824 // Dumps the page map for debugging. 825 std::string DumpPageMap() REQUIRES(lock_); 826 827 public: 828 RosAlloc(void* base, size_t capacity, size_t max_capacity, 829 PageReleaseMode page_release_mode, 830 bool running_on_memory_tool, 831 size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold); 832 ~RosAlloc(); 833 RunFreeListOffset()834 static size_t RunFreeListOffset() { 835 return OFFSETOF_MEMBER(Run, free_list_); 836 } RunFreeListHeadOffset()837 static size_t RunFreeListHeadOffset() { 838 return OFFSETOF_MEMBER(SlotFreeList<false>, head_); 839 } RunFreeListSizeOffset()840 static size_t RunFreeListSizeOffset() { 841 return OFFSETOF_MEMBER(SlotFreeList<false>, size_); 842 } RunSlotNextOffset()843 static size_t RunSlotNextOffset() { 844 return OFFSETOF_MEMBER(Slot, next_); 845 } 846 847 // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization. 848 // If used, this may cause race conditions if multiple threads are allocating at the same time. 849 template<bool kThreadSafe = true> 850 void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, 851 size_t* bytes_tl_bulk_allocated) 852 REQUIRES(!lock_); 853 size_t Free(Thread* self, void* ptr) 854 REQUIRES(!bulk_free_lock_, !lock_); 855 size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs) 856 REQUIRES(!bulk_free_lock_, !lock_); 857 858 // Returns true if the given allocation request can be allocated in 859 // an existing thread local run without allocating a new run. 860 ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size); 861 // Allocate the given allocation request in an existing thread local 862 // run without allocating a new run. 863 ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated); 864 865 // Returns the maximum bytes that could be allocated for the given 866 // size in bulk, that is the maximum value for the 867 // bytes_allocated_bulk out param returned by RosAlloc::Alloc(). 868 ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size); 869 870 // Returns the size of the allocated slot for a given allocated memory chunk. 871 size_t UsableSize(const void* ptr) REQUIRES(!lock_); 872 // Returns the size of the allocated slot for a given size. UsableSize(size_t bytes)873 size_t UsableSize(size_t bytes) { 874 if (UNLIKELY(bytes > kLargeSizeThreshold)) { 875 return RoundUp(bytes, kPageSize); 876 } else { 877 return RoundToBracketSize(bytes); 878 } 879 } 880 // Try to reduce the current footprint by releasing the free page 881 // run at the end of the memory region, if any. 882 bool Trim() REQUIRES(!lock_); 883 // Iterates over all the memory slots and apply the given function. 884 void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), 885 void* arg) 886 REQUIRES(!lock_); 887 888 // Release empty pages. 889 size_t ReleasePages() REQUIRES(!lock_); 890 // Returns the current footprint. 891 size_t Footprint() REQUIRES(!lock_); 892 // Returns the current capacity, maximum footprint. 893 size_t FootprintLimit() REQUIRES(!lock_); 894 // Update the current capacity. 895 void SetFootprintLimit(size_t bytes) REQUIRES(!lock_); 896 897 // Releases the thread-local runs assigned to the given thread back to the common set of runs. 898 // Returns the total bytes of free slots in the revoked thread local runs. This is to be 899 // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. 900 size_t RevokeThreadLocalRuns(Thread* thread) REQUIRES(!lock_, !bulk_free_lock_); 901 // Releases the thread-local runs assigned to all the threads back to the common set of runs. 902 // Returns the total bytes of free slots in the revoked thread local runs. This is to be 903 // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. 904 size_t RevokeAllThreadLocalRuns() REQUIRES(!Locks::thread_list_lock_, !lock_, !bulk_free_lock_); 905 // Assert the thread local runs of a thread are revoked. 906 void AssertThreadLocalRunsAreRevoked(Thread* thread) REQUIRES(!bulk_free_lock_); 907 // Assert all the thread local runs are revoked. 908 void AssertAllThreadLocalRunsAreRevoked() REQUIRES(!Locks::thread_list_lock_, !bulk_free_lock_); 909 GetDedicatedFullRun()910 static Run* GetDedicatedFullRun() { 911 return dedicated_full_run_; 912 } IsFreePage(size_t idx)913 bool IsFreePage(size_t idx) const { 914 DCHECK_LT(idx, capacity_ / kPageSize); 915 uint8_t pm_type = page_map_[idx]; 916 return pm_type == kPageMapReleased || pm_type == kPageMapEmpty; 917 } 918 919 // Callbacks for InspectAll that will count the number of bytes 920 // allocated and objects allocated, respectively. 921 static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg); 922 static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg); 923 DoesReleaseAllPages()924 bool DoesReleaseAllPages() const { 925 return page_release_mode_ == kPageReleaseModeAll; 926 } 927 928 // Verify for debugging. 929 void Verify() REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !bulk_free_lock_, 930 !lock_); 931 932 void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) 933 REQUIRES(!bulk_free_lock_, !lock_); 934 935 void DumpStats(std::ostream& os) 936 REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_) REQUIRES(!bulk_free_lock_); 937 938 private: 939 friend std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs); 940 941 DISALLOW_COPY_AND_ASSIGN(RosAlloc); 942 }; 943 std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs); 944 945 // Callback from rosalloc when it needs to increase the footprint. Must be implemented somewhere 946 // else (currently rosalloc_space.cc). 947 void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment); 948 949 } // namespace allocator 950 } // namespace gc 951 } // namespace art 952 953 #endif // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_ 954