1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
18 #define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
19 
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/mman.h>
23 #include <memory>
24 #include <set>
25 #include <string>
26 #include <unordered_set>
27 #include <vector>
28 
29 #include <android-base/logging.h>
30 
31 #include "base/allocator.h"
32 #include "base/bit_utils.h"
33 #include "base/mem_map.h"
34 #include "base/mutex.h"
35 #include "runtime_globals.h"
36 #include "thread.h"
37 
38 namespace art {
39 
40 namespace gc {
41 namespace allocator {
42 
43 // A runs-of-slots memory allocator.
44 class RosAlloc {
45  private:
46   // Represents a run of free pages.
47   class FreePageRun {
48    public:
49     uint8_t magic_num_;  // The magic number used for debugging only.
50 
IsFree()51     bool IsFree() const {
52       return !kIsDebugBuild || magic_num_ == kMagicNumFree;
53     }
ByteSize(RosAlloc * rosalloc)54     size_t ByteSize(RosAlloc* rosalloc) const REQUIRES(rosalloc->lock_) {
55       const uint8_t* fpr_base = reinterpret_cast<const uint8_t*>(this);
56       size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
57       size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
58       DCHECK_GE(byte_size, static_cast<size_t>(0));
59       DCHECK_ALIGNED(byte_size, kPageSize);
60       return byte_size;
61     }
SetByteSize(RosAlloc * rosalloc,size_t byte_size)62     void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
63         REQUIRES(rosalloc->lock_) {
64       DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
65       uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
66       size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
67       rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
68     }
Begin()69     void* Begin() {
70       return reinterpret_cast<void*>(this);
71     }
End(RosAlloc * rosalloc)72     void* End(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
73       uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
74       uint8_t* end = fpr_base + ByteSize(rosalloc);
75       return end;
76     }
IsLargerThanPageReleaseThreshold(RosAlloc * rosalloc)77     bool IsLargerThanPageReleaseThreshold(RosAlloc* rosalloc)
78         REQUIRES(rosalloc->lock_) {
79       return ByteSize(rosalloc) >= rosalloc->page_release_size_threshold_;
80     }
IsAtEndOfSpace(RosAlloc * rosalloc)81     bool IsAtEndOfSpace(RosAlloc* rosalloc)
82         REQUIRES(rosalloc->lock_) {
83       return reinterpret_cast<uint8_t*>(this) + ByteSize(rosalloc) == rosalloc->base_ + rosalloc->footprint_;
84     }
ShouldReleasePages(RosAlloc * rosalloc)85     bool ShouldReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
86       switch (rosalloc->page_release_mode_) {
87         case kPageReleaseModeNone:
88           return false;
89         case kPageReleaseModeEnd:
90           return IsAtEndOfSpace(rosalloc);
91         case kPageReleaseModeSize:
92           return IsLargerThanPageReleaseThreshold(rosalloc);
93         case kPageReleaseModeSizeAndEnd:
94           return IsLargerThanPageReleaseThreshold(rosalloc) && IsAtEndOfSpace(rosalloc);
95         case kPageReleaseModeAll:
96           return true;
97         default:
98           LOG(FATAL) << "Unexpected page release mode ";
99           return false;
100       }
101     }
ReleasePages(RosAlloc * rosalloc)102     void ReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
103       uint8_t* start = reinterpret_cast<uint8_t*>(this);
104       size_t byte_size = ByteSize(rosalloc);
105       DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
106       if (ShouldReleasePages(rosalloc)) {
107         rosalloc->ReleasePageRange(start, start + byte_size);
108       }
109     }
110 
111    private:
112     DISALLOW_COPY_AND_ASSIGN(FreePageRun);
113   };
114 
115   // The slot header.
116   class Slot {
117    public:
Next()118     Slot* Next() const {
119       return next_;
120     }
SetNext(Slot * next)121     void SetNext(Slot* next) {
122       next_ = next;
123     }
124     // The slot right before this slot in terms of the address.
Left(size_t bracket_size)125     Slot* Left(size_t bracket_size) {
126       return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size);
127     }
Clear()128     void Clear() {
129       next_ = nullptr;
130     }
131 
132    private:
133     Slot* next_;  // Next slot in the list.
134     friend class RosAlloc;
135   };
136 
137   // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to
138   // traverse the list from the head to the tail when merging free lists.
139   // We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the
140   // tail in the allocation fast path for a performance reason.
141   template<bool kUseTail = true>
142   class SlotFreeList {
143    public:
SlotFreeList()144     SlotFreeList() : head_(0U), tail_(0), size_(0), padding_(0) {}
Head()145     Slot* Head() const {
146       return reinterpret_cast<Slot*>(head_);
147     }
Tail()148     Slot* Tail() const {
149       CHECK(kUseTail);
150       return reinterpret_cast<Slot*>(tail_);
151     }
Size()152     size_t Size() const {
153       return size_;
154     }
155     // Removes from the head of the free list.
Remove()156     Slot* Remove() {
157       Slot* slot;
158       if (kIsDebugBuild) {
159         Verify();
160       }
161       Slot** headp = reinterpret_cast<Slot**>(&head_);
162       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
163       Slot* old_head = *headp;
164       if (old_head == nullptr) {
165         // List was empty.
166         if (kUseTail) {
167           DCHECK(*tailp == nullptr);
168         }
169         return nullptr;
170       } else {
171         // List wasn't empty.
172         if (kUseTail) {
173           DCHECK(*tailp != nullptr);
174         }
175         Slot* old_head_next = old_head->Next();
176         slot = old_head;
177         *headp = old_head_next;
178         if (kUseTail && old_head_next == nullptr) {
179           // List becomes empty.
180           *tailp = nullptr;
181         }
182       }
183       slot->Clear();
184       --size_;
185       if (kIsDebugBuild) {
186         Verify();
187       }
188       return slot;
189     }
Add(Slot * slot)190     void Add(Slot* slot) {
191       if (kIsDebugBuild) {
192         Verify();
193       }
194       DCHECK(slot != nullptr);
195       DCHECK(slot->Next() == nullptr);
196       Slot** headp = reinterpret_cast<Slot**>(&head_);
197       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
198       Slot* old_head = *headp;
199       if (old_head == nullptr) {
200         // List was empty.
201         if (kUseTail) {
202           DCHECK(*tailp == nullptr);
203         }
204         *headp = slot;
205         if (kUseTail) {
206           *tailp = slot;
207         }
208       } else {
209         // List wasn't empty.
210         if (kUseTail) {
211           DCHECK(*tailp != nullptr);
212         }
213         *headp = slot;
214         slot->SetNext(old_head);
215       }
216       ++size_;
217       if (kIsDebugBuild) {
218         Verify();
219       }
220     }
221     // Merge the given list into this list. Empty the given list.
222     // Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't
223     // currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2)
224     // supporting the kUseTail == false parameter would require a O(n) linked list traversal to do
225     // the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid.
Merge(SlotFreeList<true> * list)226     void Merge(SlotFreeList<true>* list) {
227       if (kIsDebugBuild) {
228         Verify();
229         CHECK(list != nullptr);
230         list->Verify();
231       }
232       if (list->Size() == 0) {
233         return;
234       }
235       Slot** headp = reinterpret_cast<Slot**>(&head_);
236       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
237       Slot* old_head = *headp;
238       if (old_head == nullptr) {
239         // List was empty.
240         *headp = list->Head();
241         if (kUseTail) {
242           *tailp = list->Tail();
243         }
244         size_ = list->Size();
245       } else {
246         // List wasn't empty.
247         DCHECK(list->Head() != nullptr);
248         *headp = list->Head();
249         DCHECK(list->Tail() != nullptr);
250         list->Tail()->SetNext(old_head);
251         // if kUseTail, no change to tailp.
252         size_ += list->Size();
253       }
254       list->Reset();
255       if (kIsDebugBuild) {
256         Verify();
257       }
258     }
259 
Reset()260     void Reset() {
261       head_ = 0;
262       if (kUseTail) {
263         tail_ = 0;
264       }
265       size_ = 0;
266     }
267 
Verify()268     void Verify() {
269       Slot* head = reinterpret_cast<Slot*>(head_);
270       Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr;
271       if (size_ == 0) {
272         CHECK(head == nullptr);
273         if (kUseTail) {
274           CHECK(tail == nullptr);
275         }
276       } else {
277         CHECK(head != nullptr);
278         if (kUseTail) {
279           CHECK(tail != nullptr);
280         }
281         size_t count = 0;
282         for (Slot* slot = head; slot != nullptr; slot = slot->Next()) {
283           ++count;
284           if (kUseTail && slot->Next() == nullptr) {
285             CHECK_EQ(slot, tail);
286           }
287         }
288         CHECK_EQ(size_, count);
289       }
290     }
291 
292    private:
293     // A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same
294     // layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1)
295     // uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the
296     // future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise
297     // (won't open up enough space to cause an extra slot to be available).
298     uint64_t head_;
299     // A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same
300     // layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists.
301     // Unused if kUseTail is false.
302     uint64_t tail_;
303     // The number of slots in the list. This is used to make it fast to check if a free list is all
304     // free without traversing the whole free list.
305     uint32_t size_;
306     uint32_t padding_ ATTRIBUTE_UNUSED;
307     friend class RosAlloc;
308   };
309 
310   // Represents a run of memory slots of the same size.
311   //
312   // A run's memory layout:
313   //
314   // +-------------------+
315   // | magic_num         |
316   // +-------------------+
317   // | size_bracket_idx  |
318   // +-------------------+
319   // | is_thread_local   |
320   // +-------------------+
321   // | to_be_bulk_freed  |
322   // +-------------------+
323   // |                   |
324   // | free list         |
325   // |                   |
326   // +-------------------+
327   // |                   |
328   // | bulk free list    |
329   // |                   |
330   // +-------------------+
331   // |                   |
332   // | thread-local free |
333   // | list              |
334   // |                   |
335   // +-------------------+
336   // | padding due to    |
337   // | alignment         |
338   // +-------------------+
339   // | slot 0            |
340   // +-------------------+
341   // | slot 1            |
342   // +-------------------+
343   // | slot 2            |
344   // +-------------------+
345   // ...
346   // +-------------------+
347   // | last slot         |
348   // +-------------------+
349   //
350   class Run {
351    public:
352     uint8_t magic_num_;                 // The magic number used for debugging.
353     uint8_t size_bracket_idx_;          // The index of the size bracket of this run.
354     uint8_t is_thread_local_;           // True if this run is used as a thread-local run.
355     uint8_t to_be_bulk_freed_;          // Used within BulkFree() to flag a run that's involved with a bulk free.
356     uint32_t padding_ ATTRIBUTE_UNUSED;
357     // Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail.
358     SlotFreeList<false> free_list_;
359     SlotFreeList<true> bulk_free_list_;
360     SlotFreeList<true> thread_local_free_list_;
361     // Padding due to alignment
362     // Slot 0
363     // Slot 1
364     // ...
365 
366     // Returns the byte size of the header.
fixed_header_size()367     static size_t fixed_header_size() {
368       return sizeof(Run);
369     }
FirstSlot()370     Slot* FirstSlot() const {
371       const uint8_t idx = size_bracket_idx_;
372       return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]);
373     }
LastSlot()374     Slot* LastSlot() {
375       const uint8_t idx = size_bracket_idx_;
376       const size_t bracket_size = bracketSizes[idx];
377       uintptr_t end = reinterpret_cast<uintptr_t>(End());
378       Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size);
379       DCHECK_LE(FirstSlot(), last_slot);
380       return last_slot;
381     }
FreeList()382     SlotFreeList<false>* FreeList() {
383       return &free_list_;
384     }
BulkFreeList()385     SlotFreeList<true>* BulkFreeList() {
386       return &bulk_free_list_;
387     }
ThreadLocalFreeList()388     SlotFreeList<true>* ThreadLocalFreeList() {
389       return &thread_local_free_list_;
390     }
End()391     void* End() {
392       return reinterpret_cast<uint8_t*>(this) + kPageSize * numOfPages[size_bracket_idx_];
393     }
SetIsThreadLocal(bool is_thread_local)394     void SetIsThreadLocal(bool is_thread_local) {
395       is_thread_local_  = is_thread_local ? 1 : 0;
396     }
IsThreadLocal()397     bool IsThreadLocal() const {
398       return is_thread_local_ != 0;
399     }
400     // Set up the free list for a new/empty run.
InitFreeList()401     void InitFreeList() {
402       const uint8_t idx = size_bracket_idx_;
403       const size_t bracket_size = bracketSizes[idx];
404       Slot* first_slot = FirstSlot();
405       // Add backwards so the first slot is at the head of the list.
406       for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) {
407         free_list_.Add(slot);
408       }
409     }
410     // Merge the thread local free list to the free list.  Used when a thread-local run becomes
411     // full.
412     bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out);
413     // Merge the bulk free list to the free list. Used in a bulk free.
414     void MergeBulkFreeListToFreeList();
415     // Merge the bulk free list to the thread local free list. In a bulk free, as a two-step
416     // process, GC will first record all the slots to free in a run in the bulk free list where it
417     // can write without a lock, and later acquire a lock once per run to merge the bulk free list
418     // to the thread-local free list.
419     void MergeBulkFreeListToThreadLocalFreeList();
420     // Allocates a slot in a run.
421     ALWAYS_INLINE void* AllocSlot();
422     // Frees a slot in a run. This is used in a non-bulk free.
423     void FreeSlot(void* ptr);
424     // Add the given slot to the bulk free list. Returns the bracket size.
425     size_t AddToBulkFreeList(void* ptr);
426     // Add the given slot to the thread-local free list.
427     void AddToThreadLocalFreeList(void* ptr);
428     // Returns true if all the slots in the run are not in use.
IsAllFree()429     bool IsAllFree() const {
430       return free_list_.Size() == numOfSlots[size_bracket_idx_];
431     }
432     // Returns the number of free slots.
NumberOfFreeSlots()433     size_t NumberOfFreeSlots() {
434       return free_list_.Size();
435     }
436     // Returns true if all the slots in the run are in use.
437     ALWAYS_INLINE bool IsFull();
438     // Returns true if the bulk free list is empty.
IsBulkFreeListEmpty()439     bool IsBulkFreeListEmpty() const {
440       return bulk_free_list_.Size() == 0;
441     }
442     // Returns true if the thread local free list is empty.
IsThreadLocalFreeListEmpty()443     bool IsThreadLocalFreeListEmpty() const {
444       return thread_local_free_list_.Size() == 0;
445     }
446     // Zero the run's data.
447     void ZeroData();
448     // Zero the run's header and the slot headers.
449     void ZeroHeaderAndSlotHeaders();
450     // Iterate over all the slots and apply the given function.
451     void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
452     // Dump the run metadata for debugging.
453     std::string Dump();
454     // Verify for debugging.
455     void Verify(Thread* self, RosAlloc* rosalloc, bool running_on_memory_tool)
456         REQUIRES(Locks::mutator_lock_)
457         REQUIRES(Locks::thread_list_lock_);
458 
459    private:
460     // The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket
461     // size.
462     size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name);
463     // Turns a FreeList into a string for debugging.
464     template<bool kUseTail>
465     std::string FreeListToStr(SlotFreeList<kUseTail>* free_list);
466     // Check a given pointer is a valid slot address and return it as Slot*.
ToSlot(void * ptr)467     Slot* ToSlot(void* ptr) {
468       const uint8_t idx = size_bracket_idx_;
469       const size_t bracket_size = bracketSizes[idx];
470       const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
471           - reinterpret_cast<uint8_t*>(FirstSlot());
472       DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
473       size_t slot_idx = offset_from_slot_base / bracket_size;
474       DCHECK_LT(slot_idx, numOfSlots[idx]);
475       return reinterpret_cast<Slot*>(ptr);
476     }
SlotIndex(Slot * slot)477     size_t SlotIndex(Slot* slot) const {
478       const uint8_t idx = size_bracket_idx_;
479       const size_t bracket_size = bracketSizes[idx];
480       const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
481           - reinterpret_cast<uint8_t*>(FirstSlot());
482       DCHECK_EQ(offset_from_slot_base % bracket_size, 0U);
483       size_t slot_idx = offset_from_slot_base / bracket_size;
484       DCHECK_LT(slot_idx, numOfSlots[idx]);
485       return slot_idx;
486     }
487 
488     // TODO: DISALLOW_COPY_AND_ASSIGN(Run);
489   };
490 
491   // The magic number for a run.
492   static constexpr uint8_t kMagicNum = 42;
493   // The magic number for free pages.
494   static constexpr uint8_t kMagicNumFree = 43;
495   // The number of size brackets.
496   static constexpr size_t kNumOfSizeBrackets = 42;
497   // The sizes (the slot sizes, in bytes) of the size brackets.
498   static size_t bracketSizes[kNumOfSizeBrackets];
499   // The numbers of pages that are used for runs for each size bracket.
500   static size_t numOfPages[kNumOfSizeBrackets];
501   // The numbers of slots of the runs for each size bracket.
502   static size_t numOfSlots[kNumOfSizeBrackets];
503   // The header sizes in bytes of the runs for each size bracket.
504   static size_t headerSizes[kNumOfSizeBrackets];
505 
506   // Initialize the run specs (the above arrays).
507   static void Initialize();
508   static bool initialized_;
509 
510   // Returns the byte size of the bracket size from the index.
IndexToBracketSize(size_t idx)511   static size_t IndexToBracketSize(size_t idx) {
512     DCHECK_LT(idx, kNumOfSizeBrackets);
513     return bracketSizes[idx];
514   }
515   // Returns the index of the size bracket from the bracket size.
BracketSizeToIndex(size_t size)516   static size_t BracketSizeToIndex(size_t size) {
517     DCHECK(8 <= size &&
518            ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) ||
519             (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) ||
520             size == 1 * KB || size == 2 * KB));
521     size_t idx;
522     if (UNLIKELY(size == 1 * KB)) {
523       idx = kNumOfSizeBrackets - 2;
524     } else if (UNLIKELY(size == 2 * KB)) {
525       idx = kNumOfSizeBrackets - 1;
526     } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
527       DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U);
528       idx = size / kThreadLocalBracketQuantumSize - 1;
529     } else {
530       DCHECK(size <= kMaxRegularBracketSize);
531       DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U);
532       idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
533           + kNumThreadLocalSizeBrackets;
534     }
535     DCHECK(bracketSizes[idx] == size);
536     return idx;
537   }
538   // Returns true if the given allocation size is for a thread local allocation.
IsSizeForThreadLocal(size_t size)539   static bool IsSizeForThreadLocal(size_t size) {
540     bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize;
541     DCHECK(size > kLargeSizeThreshold ||
542            (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets)));
543     return is_size_for_thread_local;
544   }
545   // Rounds up the size up the nearest bracket size.
RoundToBracketSize(size_t size)546   static size_t RoundToBracketSize(size_t size) {
547     DCHECK(size <= kLargeSizeThreshold);
548     if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
549       return RoundUp(size, kThreadLocalBracketQuantumSize);
550     } else if (size <= kMaxRegularBracketSize) {
551       return RoundUp(size, kBracketQuantumSize);
552     } else if (UNLIKELY(size <= 1 * KB)) {
553       return 1 * KB;
554     } else {
555       DCHECK_LE(size, 2 * KB);
556       return 2 * KB;
557     }
558   }
559   // Returns the size bracket index from the byte size with rounding.
SizeToIndex(size_t size)560   static size_t SizeToIndex(size_t size) {
561     DCHECK(size <= kLargeSizeThreshold);
562     if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
563       return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1;
564     } else if (size <= kMaxRegularBracketSize) {
565       return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize
566           - 1 + kNumThreadLocalSizeBrackets;
567     } else if (size <= 1 * KB) {
568       return kNumOfSizeBrackets - 2;
569     } else {
570       DCHECK_LE(size, 2 * KB);
571       return kNumOfSizeBrackets - 1;
572     }
573   }
574   // A combination of SizeToIndex() and RoundToBracketSize().
SizeToIndexAndBracketSize(size_t size,size_t * bracket_size_out)575   static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
576     DCHECK(size <= kLargeSizeThreshold);
577     size_t idx;
578     size_t bracket_size;
579     if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
580       bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize);
581       idx = bracket_size / kThreadLocalBracketQuantumSize - 1;
582     } else if (size <= kMaxRegularBracketSize) {
583       bracket_size = RoundUp(size, kBracketQuantumSize);
584       idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
585           + kNumThreadLocalSizeBrackets;
586     } else if (size <= 1 * KB) {
587       bracket_size = 1 * KB;
588       idx = kNumOfSizeBrackets - 2;
589     } else {
590       DCHECK(size <= 2 * KB);
591       bracket_size = 2 * KB;
592       idx = kNumOfSizeBrackets - 1;
593     }
594     DCHECK_EQ(idx, SizeToIndex(size)) << idx;
595     DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx;
596     DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx;
597     DCHECK_LE(size, bracket_size) << idx;
598     DCHECK(size > kMaxRegularBracketSize ||
599            (size <= kMaxThreadLocalBracketSize &&
600             bracket_size - size < kThreadLocalBracketQuantumSize) ||
601            (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx;
602     *bracket_size_out = bracket_size;
603     return idx;
604   }
605 
606   // Returns the page map index from an address. Requires that the
607   // address is page size aligned.
ToPageMapIndex(const void * addr)608   size_t ToPageMapIndex(const void* addr) const {
609     DCHECK_LE(base_, addr);
610     DCHECK_LT(addr, base_ + capacity_);
611     size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_;
612     DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
613     return byte_offset / kPageSize;
614   }
615   // Returns the page map index from an address with rounding.
RoundDownToPageMapIndex(const void * addr)616   size_t RoundDownToPageMapIndex(const void* addr) const {
617     DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_);
618     return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
619   }
620 
621   // A memory allocation request larger than this size is treated as a large object and allocated
622   // at a page-granularity.
623   static const size_t kLargeSizeThreshold = 2048;
624 
625   // If true, check that the returned memory is actually zero.
626   static constexpr bool kCheckZeroMemory = kIsDebugBuild;
627   // Do not check memory when running under a memory tool. In a normal
628   // build with kCheckZeroMemory the whole test should be optimized away.
629   // TODO: Unprotect before checks.
630   ALWAYS_INLINE bool ShouldCheckZeroMemory();
631 
632   // If true, log verbose details of operations.
633   static constexpr bool kTraceRosAlloc = false;
634 
635   struct hash_run {
operatorhash_run636     size_t operator()(const RosAlloc::Run* r) const {
637       return reinterpret_cast<size_t>(r);
638     }
639   };
640 
641   struct eq_run {
operatoreq_run642     bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
643       return r1 == r2;
644     }
645   };
646 
647  public:
648   // Different page release modes.
649   enum PageReleaseMode {
650     kPageReleaseModeNone,         // Release no empty pages.
651     kPageReleaseModeEnd,          // Release empty pages at the end of the space.
652     kPageReleaseModeSize,         // Release empty pages that are larger than the threshold.
653     kPageReleaseModeSizeAndEnd,   // Release empty pages that are larger than the threshold or
654                                   // at the end of the space.
655     kPageReleaseModeAll,          // Release all empty pages.
656   };
657 
658   // The default value for page_release_size_threshold_.
659   static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
660 
661   // We use thread-local runs for the size brackets whose indexes
662   // are less than this index. We use shared (current) runs for the rest.
663   // Sync this with the length of Thread::rosalloc_runs_.
664   static const size_t kNumThreadLocalSizeBrackets = 16;
665   static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread,
666                 "Mismatch between kNumThreadLocalSizeBrackets and "
667                 "kNumRosAllocThreadLocalSizeBracketsInThread");
668 
669   // The size of the largest bracket we use thread-local runs for.
670   // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1].
671   static const size_t kMaxThreadLocalBracketSize = 128;
672 
673   // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than
674   // this index.
675   static const size_t kNumRegularSizeBrackets = 40;
676 
677   // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the
678   // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1].
679   static const size_t kMaxRegularBracketSize = 512;
680 
681   // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes).
682   static constexpr size_t kThreadLocalBracketQuantumSize = 8;
683 
684   // Equal to Log2(kThreadLocalBracketQuantumSize).
685   static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3;
686 
687   // The bracket size increment for the non-thread-local, regular brackets (of size <=
688   // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes).
689   static constexpr size_t kBracketQuantumSize = 16;
690 
691   // Equal to Log2(kBracketQuantumSize).
692   static constexpr size_t kBracketQuantumSizeShift = 4;
693 
694  private:
695   // The base address of the memory region that's managed by this allocator.
696   uint8_t* base_;
697 
698   // The footprint in bytes of the currently allocated portion of the
699   // memory region.
700   size_t footprint_;
701 
702   // The maximum footprint. The address, base_ + capacity_, indicates
703   // the end of the memory region that's currently managed by this allocator.
704   size_t capacity_;
705 
706   // The maximum capacity. The address, base_ + max_capacity_, indicates
707   // the end of the memory region that's ever managed by this allocator.
708   size_t max_capacity_;
709 
710   template<class Key, AllocatorTag kTag, class Compare = std::less<Key>>
711   using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>;
712 
713   // The run sets that hold the runs whose slots are not all
714   // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
715   AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets];
716   // The run sets that hold the runs whose slots are all full. This is
717   // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
718   std::unordered_set<Run*, hash_run, eq_run, TrackingAllocator<Run*, kAllocatorTagRosAlloc>>
719       full_runs_[kNumOfSizeBrackets];
720   // The set of free pages.
721   AllocationTrackingSet<FreePageRun*, kAllocatorTagRosAlloc> free_page_runs_ GUARDED_BY(lock_);
722   // The dedicated full run, it is always full and shared by all threads when revoking happens.
723   // This is an optimization since enables us to avoid a null check for revoked runs.
724   static Run* dedicated_full_run_;
725   // Using size_t to ensure that it is at least word aligned.
726   static size_t dedicated_full_run_storage_[];
727   // The current runs where the allocations are first attempted for
728   // the size brackes that do not use thread-local
729   // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
730   Run* current_runs_[kNumOfSizeBrackets];
731   // The mutexes, one per size bracket.
732   Mutex* size_bracket_locks_[kNumOfSizeBrackets];
733   // Bracket lock names (since locks only have char* names).
734   std::string size_bracket_lock_names_[kNumOfSizeBrackets];
735   // The types of page map entries.
736   enum PageMapKind {
737     kPageMapReleased = 0,     // Zero and released back to the OS.
738     kPageMapEmpty,            // Zero but probably dirty.
739     kPageMapRun,              // The beginning of a run.
740     kPageMapRunPart,          // The non-beginning part of a run.
741     kPageMapLargeObject,      // The beginning of a large object.
742     kPageMapLargeObjectPart,  // The non-beginning part of a large object.
743   };
744   // The table that indicates what pages are currently used for.
745   volatile uint8_t* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
746   size_t page_map_size_;
747   size_t max_page_map_size_;
748   MemMap page_map_mem_map_;
749 
750   // The table that indicates the size of free page runs. These sizes
751   // are stored here to avoid storing in the free page header and
752   // release backing pages.
753   std::vector<size_t, TrackingAllocator<size_t, kAllocatorTagRosAlloc>> free_page_run_size_map_
754       GUARDED_BY(lock_);
755   // The global lock. Used to guard the page map, the free page set,
756   // and the footprint.
757   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
758   // The reader-writer lock to allow one bulk free at a time while
759   // allowing multiple individual frees at the same time. Also, this
760   // is used to avoid race conditions between BulkFree() and
761   // RevokeThreadLocalRuns() on the bulk free list.
762   ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
763 
764   // The page release mode.
765   const PageReleaseMode page_release_mode_;
766   // Under kPageReleaseModeSize(AndEnd), if the free page run size is
767   // greater than or equal to this value, release pages.
768   const size_t page_release_size_threshold_;
769 
770   // Whether this allocator is running on a memory tool.
771   bool is_running_on_memory_tool_;
772 
773   // The base address of the memory region that's managed by this allocator.
Begin()774   uint8_t* Begin() { return base_; }
775   // The end address of the memory region that's managed by this allocator.
End()776   uint8_t* End() { return base_ + capacity_; }
777 
778   // Page-granularity alloc/free
779   void* AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type)
780       REQUIRES(lock_);
781   // Returns how many bytes were freed.
782   size_t FreePages(Thread* self, void* ptr, bool already_zero) REQUIRES(lock_);
783 
784   // Allocate/free a run slot.
785   void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
786                      size_t* bytes_tl_bulk_allocated)
787       REQUIRES(!lock_);
788   // Allocate/free a run slot without acquiring locks.
789   // TODO: REQUIRES(Locks::mutator_lock_)
790   void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated,
791                                  size_t* usable_size, size_t* bytes_tl_bulk_allocated)
792       REQUIRES(!lock_);
793   void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx) REQUIRES(!lock_);
794 
795   // Returns the bracket size.
796   size_t FreeFromRun(Thread* self, void* ptr, Run* run)
797       REQUIRES(!lock_);
798 
799   // Used to allocate a new thread local run for a size bracket.
800   Run* AllocRun(Thread* self, size_t idx) REQUIRES(!lock_);
801 
802   // Used to acquire a new/reused run for a size bracket. Used when a
803   // thread-local or current run gets full.
804   Run* RefillRun(Thread* self, size_t idx) REQUIRES(!lock_);
805 
806   // The internal of non-bulk Free().
807   size_t FreeInternal(Thread* self, void* ptr) REQUIRES(!lock_);
808 
809   // Allocates large objects.
810   void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated,
811                          size_t* usable_size, size_t* bytes_tl_bulk_allocated)
812       REQUIRES(!lock_);
813 
814   // Revoke a run by adding it to non_full_runs_ or freeing the pages.
815   void RevokeRun(Thread* self, size_t idx, Run* run) REQUIRES(!lock_);
816 
817   // Revoke the current runs which share an index with the thread local runs.
818   void RevokeThreadUnsafeCurrentRuns() REQUIRES(!lock_);
819 
820   // Release a range of pages.
821   size_t ReleasePageRange(uint8_t* start, uint8_t* end) REQUIRES(lock_);
822 
823   // Dumps the page map for debugging.
824   std::string DumpPageMap() REQUIRES(lock_);
825 
826  public:
827   RosAlloc(void* base, size_t capacity, size_t max_capacity,
828            PageReleaseMode page_release_mode,
829            bool running_on_memory_tool,
830            size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
831   ~RosAlloc();
832 
RunFreeListOffset()833   static constexpr size_t RunFreeListOffset() {
834     return OFFSETOF_MEMBER(Run, free_list_);
835   }
RunFreeListHeadOffset()836   static constexpr size_t RunFreeListHeadOffset() {
837     return OFFSETOF_MEMBER(SlotFreeList<false>, head_);
838   }
RunFreeListSizeOffset()839   static constexpr size_t RunFreeListSizeOffset() {
840     return OFFSETOF_MEMBER(SlotFreeList<false>, size_);
841   }
RunSlotNextOffset()842   static constexpr size_t RunSlotNextOffset() {
843     return OFFSETOF_MEMBER(Slot, next_);
844   }
845 
846   // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
847   // If used, this may cause race conditions if multiple threads are allocating at the same time.
848   template<bool kThreadSafe = true>
849   void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
850               size_t* bytes_tl_bulk_allocated)
851       REQUIRES(!lock_);
852   size_t Free(Thread* self, void* ptr)
853       REQUIRES(!bulk_free_lock_, !lock_);
854   size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
855       REQUIRES(!bulk_free_lock_, !lock_);
856 
857   // Returns true if the given allocation request can be allocated in
858   // an existing thread local run without allocating a new run.
859   ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size);
860   // Allocate the given allocation request in an existing thread local
861   // run without allocating a new run.
862   ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated);
863 
864   // Returns the maximum bytes that could be allocated for the given
865   // size in bulk, that is the maximum value for the
866   // bytes_allocated_bulk out param returned by RosAlloc::Alloc().
867   ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size);
868 
869   // Returns the size of the allocated slot for a given allocated memory chunk.
870   size_t UsableSize(const void* ptr) REQUIRES(!lock_);
871   // Returns the size of the allocated slot for a given size.
UsableSize(size_t bytes)872   size_t UsableSize(size_t bytes) {
873     if (UNLIKELY(bytes > kLargeSizeThreshold)) {
874       return RoundUp(bytes, kPageSize);
875     } else {
876       return RoundToBracketSize(bytes);
877     }
878   }
879   // Try to reduce the current footprint by releasing the free page
880   // run at the end of the memory region, if any.
881   bool Trim() REQUIRES(!lock_);
882   // Iterates over all the memory slots and apply the given function.
883   void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
884                   void* arg)
885       REQUIRES(!lock_);
886 
887   // Release empty pages.
888   size_t ReleasePages() REQUIRES(!lock_);
889   // Returns the current footprint.
890   size_t Footprint() REQUIRES(!lock_);
891   // Returns the current capacity, maximum footprint.
892   size_t FootprintLimit() REQUIRES(!lock_);
893   // Update the current capacity.
894   void SetFootprintLimit(size_t bytes) REQUIRES(!lock_);
895 
896   // Releases the thread-local runs assigned to the given thread back to the common set of runs.
897   // Returns the total bytes of free slots in the revoked thread local runs. This is to be
898   // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
899   size_t RevokeThreadLocalRuns(Thread* thread) REQUIRES(!lock_, !bulk_free_lock_);
900   // Releases the thread-local runs assigned to all the threads back to the common set of runs.
901   // Returns the total bytes of free slots in the revoked thread local runs. This is to be
902   // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
903   size_t RevokeAllThreadLocalRuns() REQUIRES(!Locks::thread_list_lock_, !lock_, !bulk_free_lock_);
904   // Assert the thread local runs of a thread are revoked.
905   void AssertThreadLocalRunsAreRevoked(Thread* thread) REQUIRES(!bulk_free_lock_);
906   // Assert all the thread local runs are revoked.
907   void AssertAllThreadLocalRunsAreRevoked() REQUIRES(!Locks::thread_list_lock_, !bulk_free_lock_);
908 
GetDedicatedFullRun()909   static Run* GetDedicatedFullRun() {
910     return dedicated_full_run_;
911   }
IsFreePage(size_t idx)912   bool IsFreePage(size_t idx) const {
913     DCHECK_LT(idx, capacity_ / kPageSize);
914     uint8_t pm_type = page_map_[idx];
915     return pm_type == kPageMapReleased || pm_type == kPageMapEmpty;
916   }
917 
918   // Callbacks for InspectAll that will count the number of bytes
919   // allocated and objects allocated, respectively.
920   static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
921   static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
922 
DoesReleaseAllPages()923   bool DoesReleaseAllPages() const {
924     return page_release_mode_ == kPageReleaseModeAll;
925   }
926 
927   // Verify for debugging.
928   void Verify() REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !bulk_free_lock_,
929                          !lock_);
930 
931   void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes)
932       REQUIRES(!bulk_free_lock_, !lock_);
933 
934   void DumpStats(std::ostream& os)
935       REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_) REQUIRES(!bulk_free_lock_);
936 
937  private:
938   friend std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs);
939 
940   DISALLOW_COPY_AND_ASSIGN(RosAlloc);
941 };
942 std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs);
943 
944 // Callback from rosalloc when it needs to increase the footprint. Must be implemented somewhere
945 // else (currently rosalloc_space.cc).
946 void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment);
947 
948 }  // namespace allocator
949 }  // namespace gc
950 }  // namespace art
951 
952 #endif  // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
953