1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "base/mutex-inl.h"
18 #include "mirror/class-inl.h"
19 #include "mirror/object.h"
20 #include "mirror/object-inl.h"
21 #include "thread-inl.h"
22 #include "thread_list.h"
23 #include "rosalloc.h"
24 
25 #include <map>
26 #include <list>
27 #include <vector>
28 
29 namespace art {
30 namespace gc {
31 namespace allocator {
32 
33 extern "C" void* art_heap_rosalloc_morecore(RosAlloc* rosalloc, intptr_t increment);
34 
35 static constexpr bool kUsePrefetchDuringAllocRun = true;
36 static constexpr bool kPrefetchNewRunDataByZeroing = false;
37 static constexpr size_t kPrefetchStride = 64;
38 
39 size_t RosAlloc::bracketSizes[kNumOfSizeBrackets];
40 size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
41 size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
42 size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
43 size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
44 size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
45 bool RosAlloc::initialized_ = false;
46 size_t RosAlloc::dedicated_full_run_storage_[kPageSize / sizeof(size_t)] = { 0 };
47 RosAlloc::Run* RosAlloc::dedicated_full_run_ =
48     reinterpret_cast<RosAlloc::Run*>(dedicated_full_run_storage_);
49 
RosAlloc(void * base,size_t capacity,size_t max_capacity,PageReleaseMode page_release_mode,size_t page_release_size_threshold)50 RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
51                    PageReleaseMode page_release_mode, size_t page_release_size_threshold)
52     : base_(reinterpret_cast<byte*>(base)), footprint_(capacity),
53       capacity_(capacity), max_capacity_(max_capacity),
54       lock_("rosalloc global lock", kRosAllocGlobalLock),
55       bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock),
56       page_release_mode_(page_release_mode),
57       page_release_size_threshold_(page_release_size_threshold) {
58   DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
59   DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
60   CHECK_LE(capacity, max_capacity);
61   CHECK(IsAligned<kPageSize>(page_release_size_threshold_));
62   if (!initialized_) {
63     Initialize();
64   }
65   VLOG(heap) << "RosAlloc base="
66              << std::hex << (intptr_t)base_ << ", end="
67              << std::hex << (intptr_t)(base_ + capacity_)
68              << ", capacity=" << std::dec << capacity_
69              << ", max_capacity=" << std::dec << max_capacity_;
70   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
71     size_bracket_lock_names_[i] =
72         StringPrintf("an rosalloc size bracket %d lock", static_cast<int>(i));
73     size_bracket_locks_[i] = new Mutex(size_bracket_lock_names_[i].c_str(), kRosAllocBracketLock);
74     current_runs_[i] = dedicated_full_run_;
75   }
76   DCHECK_EQ(footprint_, capacity_);
77   size_t num_of_pages = footprint_ / kPageSize;
78   size_t max_num_of_pages = max_capacity_ / kPageSize;
79   std::string error_msg;
80   page_map_mem_map_.reset(MemMap::MapAnonymous("rosalloc page map", NULL, RoundUp(max_num_of_pages, kPageSize),
81                                                PROT_READ | PROT_WRITE, false, &error_msg));
82   CHECK(page_map_mem_map_.get() != nullptr) << "Couldn't allocate the page map : " << error_msg;
83   page_map_ = page_map_mem_map_->Begin();
84   page_map_size_ = num_of_pages;
85   max_page_map_size_ = max_num_of_pages;
86   free_page_run_size_map_.resize(num_of_pages);
87   FreePageRun* free_pages = reinterpret_cast<FreePageRun*>(base_);
88   if (kIsDebugBuild) {
89     free_pages->magic_num_ = kMagicNumFree;
90   }
91   free_pages->SetByteSize(this, capacity_);
92   DCHECK_EQ(capacity_ % kPageSize, static_cast<size_t>(0));
93   DCHECK(free_pages->IsFree());
94   free_pages->ReleasePages(this);
95   DCHECK(free_pages->IsFree());
96   free_page_runs_.insert(free_pages);
97   if (kTraceRosAlloc) {
98     LOG(INFO) << "RosAlloc::RosAlloc() : Inserted run 0x" << std::hex
99               << reinterpret_cast<intptr_t>(free_pages)
100               << " into free_page_runs_";
101   }
102 }
103 
~RosAlloc()104 RosAlloc::~RosAlloc() {
105   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
106     delete size_bracket_locks_[i];
107   }
108 }
109 
AllocPages(Thread * self,size_t num_pages,byte page_map_type)110 void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) {
111   lock_.AssertHeld(self);
112   DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject);
113   FreePageRun* res = NULL;
114   const size_t req_byte_size = num_pages * kPageSize;
115   // Find the lowest address free page run that's large enough.
116   for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) {
117     FreePageRun* fpr = *it;
118     DCHECK(fpr->IsFree());
119     size_t fpr_byte_size = fpr->ByteSize(this);
120     DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
121     if (req_byte_size <= fpr_byte_size) {
122       // Found one.
123       free_page_runs_.erase(it++);
124       if (kTraceRosAlloc) {
125         LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x"
126                   << std::hex << reinterpret_cast<intptr_t>(fpr)
127                   << " from free_page_runs_";
128       }
129       if (req_byte_size < fpr_byte_size) {
130         // Split.
131         FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
132         if (kIsDebugBuild) {
133           remainder->magic_num_ = kMagicNumFree;
134         }
135         remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
136         DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
137         // Don't need to call madvise on remainder here.
138         free_page_runs_.insert(remainder);
139         if (kTraceRosAlloc) {
140           LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
141                     << reinterpret_cast<intptr_t>(remainder)
142                     << " into free_page_runs_";
143         }
144         fpr->SetByteSize(this, req_byte_size);
145         DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
146       }
147       res = fpr;
148       break;
149     } else {
150       ++it;
151     }
152   }
153 
154   // Failed to allocate pages. Grow the footprint, if possible.
155   if (UNLIKELY(res == NULL && capacity_ > footprint_)) {
156     FreePageRun* last_free_page_run = NULL;
157     size_t last_free_page_run_size;
158     auto it = free_page_runs_.rbegin();
159     if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
160       // There is a free page run at the end.
161       DCHECK(last_free_page_run->IsFree());
162       DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
163       last_free_page_run_size = last_free_page_run->ByteSize(this);
164     } else {
165       // There is no free page run at the end.
166       last_free_page_run_size = 0;
167     }
168     DCHECK_LT(last_free_page_run_size, req_byte_size);
169     if (capacity_ - footprint_ + last_free_page_run_size >= req_byte_size) {
170       // If we grow the heap, we can allocate it.
171       size_t increment = std::min(std::max(2 * MB, req_byte_size - last_free_page_run_size),
172                                   capacity_ - footprint_);
173       DCHECK_EQ(increment % kPageSize, static_cast<size_t>(0));
174       size_t new_footprint = footprint_ + increment;
175       size_t new_num_of_pages = new_footprint / kPageSize;
176       DCHECK_LT(page_map_size_, new_num_of_pages);
177       DCHECK_LT(free_page_run_size_map_.size(), new_num_of_pages);
178       page_map_size_ = new_num_of_pages;
179       DCHECK_LE(page_map_size_, max_page_map_size_);
180       free_page_run_size_map_.resize(new_num_of_pages);
181       art_heap_rosalloc_morecore(this, increment);
182       if (last_free_page_run_size > 0) {
183         // There was a free page run at the end. Expand its size.
184         DCHECK_EQ(last_free_page_run_size, last_free_page_run->ByteSize(this));
185         last_free_page_run->SetByteSize(this, last_free_page_run_size + increment);
186         DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
187         DCHECK_EQ(last_free_page_run->End(this), base_ + new_footprint);
188       } else {
189         // Otherwise, insert a new free page run at the end.
190         FreePageRun* new_free_page_run = reinterpret_cast<FreePageRun*>(base_ + footprint_);
191         if (kIsDebugBuild) {
192           new_free_page_run->magic_num_ = kMagicNumFree;
193         }
194         new_free_page_run->SetByteSize(this, increment);
195         DCHECK_EQ(new_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
196         free_page_runs_.insert(new_free_page_run);
197         DCHECK_EQ(*free_page_runs_.rbegin(), new_free_page_run);
198         if (kTraceRosAlloc) {
199           LOG(INFO) << "RosAlloc::AlloPages() : Grew the heap by inserting run 0x"
200                     << std::hex << reinterpret_cast<intptr_t>(new_free_page_run)
201                     << " into free_page_runs_";
202         }
203       }
204       DCHECK_LE(footprint_ + increment, capacity_);
205       if (kTraceRosAlloc) {
206         LOG(INFO) << "RosAlloc::AllocPages() : increased the footprint from "
207                   << footprint_ << " to " << new_footprint;
208       }
209       footprint_ = new_footprint;
210 
211       // And retry the last free page run.
212       it = free_page_runs_.rbegin();
213       DCHECK(it != free_page_runs_.rend());
214       FreePageRun* fpr = *it;
215       if (kIsDebugBuild && last_free_page_run_size > 0) {
216         DCHECK(last_free_page_run != NULL);
217         DCHECK_EQ(last_free_page_run, fpr);
218       }
219       size_t fpr_byte_size = fpr->ByteSize(this);
220       DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
221       DCHECK_LE(req_byte_size, fpr_byte_size);
222       free_page_runs_.erase(fpr);
223       if (kTraceRosAlloc) {
224         LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
225                   << " from free_page_runs_";
226       }
227       if (req_byte_size < fpr_byte_size) {
228         // Split if there's a remainder.
229         FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
230         if (kIsDebugBuild) {
231           remainder->magic_num_ = kMagicNumFree;
232         }
233         remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
234         DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
235         free_page_runs_.insert(remainder);
236         if (kTraceRosAlloc) {
237           LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
238                     << reinterpret_cast<intptr_t>(remainder)
239                     << " into free_page_runs_";
240         }
241         fpr->SetByteSize(this, req_byte_size);
242         DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
243       }
244       res = fpr;
245     }
246   }
247   if (LIKELY(res != NULL)) {
248     // Update the page map.
249     size_t page_map_idx = ToPageMapIndex(res);
250     for (size_t i = 0; i < num_pages; i++) {
251       DCHECK(IsFreePage(page_map_idx + i));
252     }
253     switch (page_map_type) {
254     case kPageMapRun:
255       page_map_[page_map_idx] = kPageMapRun;
256       for (size_t i = 1; i < num_pages; i++) {
257         page_map_[page_map_idx + i] = kPageMapRunPart;
258       }
259       break;
260     case kPageMapLargeObject:
261       page_map_[page_map_idx] = kPageMapLargeObject;
262       for (size_t i = 1; i < num_pages; i++) {
263         page_map_[page_map_idx + i] = kPageMapLargeObjectPart;
264       }
265       break;
266     default:
267       LOG(FATAL) << "Unreachable - page map type: " << page_map_type;
268       break;
269     }
270     if (kIsDebugBuild) {
271       // Clear the first page since it is not madvised due to the magic number.
272       memset(res, 0, kPageSize);
273     }
274     if (kTraceRosAlloc) {
275       LOG(INFO) << "RosAlloc::AllocPages() : 0x" << std::hex << reinterpret_cast<intptr_t>(res)
276                 << "-0x" << (reinterpret_cast<intptr_t>(res) + num_pages * kPageSize)
277                 << "(" << std::dec << (num_pages * kPageSize) << ")";
278     }
279     return res;
280   }
281 
282   // Fail.
283   if (kTraceRosAlloc) {
284     LOG(INFO) << "RosAlloc::AllocPages() : NULL";
285   }
286   return nullptr;
287 }
288 
FreePages(Thread * self,void * ptr,bool already_zero)289 size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
290   lock_.AssertHeld(self);
291   size_t pm_idx = ToPageMapIndex(ptr);
292   DCHECK_LT(pm_idx, page_map_size_);
293   byte pm_type = page_map_[pm_idx];
294   DCHECK(pm_type == kPageMapRun || pm_type == kPageMapLargeObject);
295   byte pm_part_type;
296   switch (pm_type) {
297   case kPageMapRun:
298     pm_part_type = kPageMapRunPart;
299     break;
300   case kPageMapLargeObject:
301     pm_part_type = kPageMapLargeObjectPart;
302     break;
303   default:
304     LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << " : " << "pm_idx=" << pm_idx << ", pm_type="
305                << static_cast<int>(pm_type) << ", ptr=" << std::hex
306                << reinterpret_cast<intptr_t>(ptr);
307     return 0;
308   }
309   // Update the page map and count the number of pages.
310   size_t num_pages = 1;
311   page_map_[pm_idx] = kPageMapEmpty;
312   size_t idx = pm_idx + 1;
313   size_t end = page_map_size_;
314   while (idx < end && page_map_[idx] == pm_part_type) {
315     page_map_[idx] = kPageMapEmpty;
316     num_pages++;
317     idx++;
318   }
319   const size_t byte_size = num_pages * kPageSize;
320   if (already_zero) {
321     if (kCheckZeroMemory) {
322       const uword* word_ptr = reinterpret_cast<uword*>(ptr);
323       for (size_t i = 0; i < byte_size / sizeof(uword); ++i) {
324         CHECK_EQ(word_ptr[i], 0U) << "words don't match at index " << i;
325       }
326     }
327   } else if (!DoesReleaseAllPages()) {
328     memset(ptr, 0, byte_size);
329   }
330 
331   if (kTraceRosAlloc) {
332     LOG(INFO) << __PRETTY_FUNCTION__ << " : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
333               << "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size)
334               << "(" << std::dec << (num_pages * kPageSize) << ")";
335   }
336 
337   // Turn it into a free run.
338   FreePageRun* fpr = reinterpret_cast<FreePageRun*>(ptr);
339   if (kIsDebugBuild) {
340     fpr->magic_num_ = kMagicNumFree;
341   }
342   fpr->SetByteSize(this, byte_size);
343   DCHECK(IsAligned<kPageSize>(fpr->ByteSize(this)));
344 
345   DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
346   if (!free_page_runs_.empty()) {
347     // Try to coalesce in the higher address direction.
348     if (kTraceRosAlloc) {
349       LOG(INFO) << __PRETTY_FUNCTION__ << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
350                 << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
351                 << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
352                 << (fpr->End(this) == End() ? page_map_size_ : ToPageMapIndex(fpr->End(this))) << "]";
353     }
354     auto higher_it = free_page_runs_.upper_bound(fpr);
355     if (higher_it != free_page_runs_.end()) {
356       for (auto it = higher_it; it != free_page_runs_.end(); ) {
357         FreePageRun* h = *it;
358         DCHECK_EQ(h->ByteSize(this) % kPageSize, static_cast<size_t>(0));
359         if (kTraceRosAlloc) {
360           LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a higher free page run 0x"
361                     << std::hex << reinterpret_cast<uintptr_t>(h) << " [" << std::dec << ToPageMapIndex(h) << "] -0x"
362                     << std::hex << reinterpret_cast<uintptr_t>(h->End(this)) << " [" << std::dec
363                     << (h->End(this) == End() ? page_map_size_ : ToPageMapIndex(h->End(this))) << "]";
364         }
365         if (fpr->End(this) == h->Begin()) {
366           if (kTraceRosAlloc) {
367             LOG(INFO) << "Success";
368           }
369           // Clear magic num since this is no longer the start of a free page run.
370           if (kIsDebugBuild) {
371             h->magic_num_ = 0;
372           }
373           free_page_runs_.erase(it++);
374           if (kTraceRosAlloc) {
375             LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
376                       << reinterpret_cast<intptr_t>(h)
377                       << " from free_page_runs_";
378           }
379           fpr->SetByteSize(this, fpr->ByteSize(this) + h->ByteSize(this));
380           DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
381         } else {
382           // Not adjacent. Stop.
383           if (kTraceRosAlloc) {
384             LOG(INFO) << "Fail";
385           }
386           break;
387         }
388       }
389     }
390     // Try to coalesce in the lower address direction.
391     auto lower_it = free_page_runs_.upper_bound(fpr);
392     if (lower_it != free_page_runs_.begin()) {
393       --lower_it;
394       for (auto it = lower_it; ; ) {
395         // We want to try to coalesce with the first element but
396         // there's no "<=" operator for the iterator.
397         bool to_exit_loop = it == free_page_runs_.begin();
398 
399         FreePageRun* l = *it;
400         DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
401         if (kTraceRosAlloc) {
402           LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a lower free page run 0x"
403                     << std::hex << reinterpret_cast<uintptr_t>(l) << " [" << std::dec << ToPageMapIndex(l) << "] -0x"
404                     << std::hex << reinterpret_cast<uintptr_t>(l->End(this)) << " [" << std::dec
405                     << (l->End(this) == End() ? page_map_size_ : ToPageMapIndex(l->End(this))) << "]";
406         }
407         if (l->End(this) == fpr->Begin()) {
408           if (kTraceRosAlloc) {
409             LOG(INFO) << "Success";
410           }
411           free_page_runs_.erase(it--);
412           if (kTraceRosAlloc) {
413             LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
414                       << reinterpret_cast<intptr_t>(l)
415                       << " from free_page_runs_";
416           }
417           l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
418           DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
419           // Clear magic num since this is no longer the start of a free page run.
420           if (kIsDebugBuild) {
421             fpr->magic_num_ = 0;
422           }
423           fpr = l;
424         } else {
425           // Not adjacent. Stop.
426           if (kTraceRosAlloc) {
427             LOG(INFO) << "Fail";
428           }
429           break;
430         }
431         if (to_exit_loop) {
432           break;
433         }
434       }
435     }
436   }
437 
438   // Insert it.
439   DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
440   DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
441   DCHECK(fpr->IsFree());
442   fpr->ReleasePages(this);
443   DCHECK(fpr->IsFree());
444   free_page_runs_.insert(fpr);
445   DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
446   if (kTraceRosAlloc) {
447     LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
448               << " into free_page_runs_";
449   }
450   return byte_size;
451 }
452 
AllocLargeObject(Thread * self,size_t size,size_t * bytes_allocated)453 void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) {
454   DCHECK_GT(size, kLargeSizeThreshold);
455   size_t num_pages = RoundUp(size, kPageSize) / kPageSize;
456   void* r;
457   {
458     MutexLock mu(self, lock_);
459     r = AllocPages(self, num_pages, kPageMapLargeObject);
460   }
461   if (UNLIKELY(r == nullptr)) {
462     if (kTraceRosAlloc) {
463       LOG(INFO) << "RosAlloc::AllocLargeObject() : NULL";
464     }
465     return nullptr;
466   }
467   const size_t total_bytes = num_pages * kPageSize;
468   *bytes_allocated = total_bytes;
469   if (kTraceRosAlloc) {
470     LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
471               << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
472               << "(" << std::dec << (num_pages * kPageSize) << ")";
473   }
474   // Check if the returned memory is really all zero.
475   if (kCheckZeroMemory) {
476     CHECK_EQ(total_bytes % sizeof(uword), 0U);
477     const uword* words = reinterpret_cast<uword*>(r);
478     for (size_t i = 0; i < total_bytes / sizeof(uword); ++i) {
479       CHECK_EQ(words[i], 0U);
480     }
481   }
482   return r;
483 }
484 
FreeInternal(Thread * self,void * ptr)485 size_t RosAlloc::FreeInternal(Thread* self, void* ptr) {
486   DCHECK_LE(base_, ptr);
487   DCHECK_LT(ptr, base_ + footprint_);
488   size_t pm_idx = RoundDownToPageMapIndex(ptr);
489   Run* run = nullptr;
490   {
491     MutexLock mu(self, lock_);
492     DCHECK_LT(pm_idx, page_map_size_);
493     byte page_map_entry = page_map_[pm_idx];
494     if (kTraceRosAlloc) {
495       LOG(INFO) << "RosAlloc::FreeInternal() : " << std::hex << ptr << ", pm_idx=" << std::dec << pm_idx
496                 << ", page_map_entry=" << static_cast<int>(page_map_entry);
497     }
498     switch (page_map_[pm_idx]) {
499       case kPageMapLargeObject:
500         return FreePages(self, ptr, false);
501       case kPageMapLargeObjectPart:
502         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
503         return 0;
504       case kPageMapRunPart: {
505         // Find the beginning of the run.
506         do {
507           --pm_idx;
508           DCHECK_LT(pm_idx, capacity_ / kPageSize);
509         } while (page_map_[pm_idx] != kPageMapRun);
510         // Fall-through.
511       case kPageMapRun:
512         run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
513         DCHECK_EQ(run->magic_num_, kMagicNum);
514         break;
515       case kPageMapReleased:
516         // Fall-through.
517       case kPageMapEmpty:
518         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
519         return 0;
520       }
521       default:
522         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
523         return 0;
524     }
525   }
526   DCHECK(run != nullptr);
527   return FreeFromRun(self, ptr, run);
528 }
529 
Free(Thread * self,void * ptr)530 size_t RosAlloc::Free(Thread* self, void* ptr) {
531   ReaderMutexLock rmu(self, bulk_free_lock_);
532   return FreeInternal(self, ptr);
533 }
534 
AllocRun(Thread * self,size_t idx)535 RosAlloc::Run* RosAlloc::AllocRun(Thread* self, size_t idx) {
536   RosAlloc::Run* new_run = nullptr;
537   {
538     MutexLock mu(self, lock_);
539     new_run = reinterpret_cast<Run*>(AllocPages(self, numOfPages[idx], kPageMapRun));
540   }
541   if (LIKELY(new_run != nullptr)) {
542     if (kIsDebugBuild) {
543       new_run->magic_num_ = kMagicNum;
544     }
545     new_run->size_bracket_idx_ = idx;
546     new_run->SetAllocBitMapBitsForInvalidSlots();
547     DCHECK(!new_run->IsThreadLocal());
548     DCHECK_EQ(new_run->first_search_vec_idx_, 0U);
549     DCHECK(!new_run->to_be_bulk_freed_);
550     if (kUsePrefetchDuringAllocRun && idx < kNumThreadLocalSizeBrackets) {
551       // Take ownership of the cache lines if we are likely to be thread local run.
552       if (kPrefetchNewRunDataByZeroing) {
553         // Zeroing the data is sometimes faster than prefetching but it increases memory usage
554         // since we end up dirtying zero pages which may have been madvised.
555         new_run->ZeroData();
556       } else {
557         const size_t num_of_slots = numOfSlots[idx];
558         const size_t bracket_size = bracketSizes[idx];
559         const size_t num_of_bytes = num_of_slots * bracket_size;
560         byte* begin = reinterpret_cast<byte*>(new_run) + headerSizes[idx];
561         for (size_t i = 0; i < num_of_bytes; i += kPrefetchStride) {
562           __builtin_prefetch(begin + i);
563         }
564       }
565     }
566   }
567   return new_run;
568 }
569 
RefillRun(Thread * self,size_t idx)570 RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
571   // Get the lowest address non-full run from the binary tree.
572   std::set<Run*>* const bt = &non_full_runs_[idx];
573   if (!bt->empty()) {
574     // If there's one, use it as the current run.
575     auto it = bt->begin();
576     Run* non_full_run = *it;
577     DCHECK(non_full_run != nullptr);
578     DCHECK(!non_full_run->IsThreadLocal());
579     bt->erase(it);
580     return non_full_run;
581   }
582   // If there's none, allocate a new run and use it as the current run.
583   return AllocRun(self, idx);
584 }
585 
AllocFromCurrentRunUnlocked(Thread * self,size_t idx)586 inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
587   Run* current_run = current_runs_[idx];
588   DCHECK(current_run != nullptr);
589   void* slot_addr = current_run->AllocSlot();
590   if (UNLIKELY(slot_addr == nullptr)) {
591     // The current run got full. Try to refill it.
592     DCHECK(current_run->IsFull());
593     if (kIsDebugBuild && current_run != dedicated_full_run_) {
594       full_runs_[idx].insert(current_run);
595       if (kTraceRosAlloc) {
596         LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
597                   << reinterpret_cast<intptr_t>(current_run)
598                   << " into full_runs_[" << std::dec << idx << "]";
599       }
600       DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
601       DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
602     }
603     current_run = RefillRun(self, idx);
604     if (UNLIKELY(current_run == nullptr)) {
605       // Failed to allocate a new run, make sure that it is the dedicated full run.
606       current_runs_[idx] = dedicated_full_run_;
607       return nullptr;
608     }
609     DCHECK(current_run != nullptr);
610     DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
611     DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
612     current_run->SetIsThreadLocal(false);
613     current_runs_[idx] = current_run;
614     DCHECK(!current_run->IsFull());
615     slot_addr = current_run->AllocSlot();
616     // Must succeed now with a new run.
617     DCHECK(slot_addr != nullptr);
618   }
619   return slot_addr;
620 }
621 
AllocFromRunThreadUnsafe(Thread * self,size_t size,size_t * bytes_allocated)622 void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) {
623   DCHECK_LE(size, kLargeSizeThreshold);
624   size_t bracket_size;
625   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
626   DCHECK_EQ(idx, SizeToIndex(size));
627   DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
628   DCHECK_EQ(bracket_size, bracketSizes[idx]);
629   DCHECK_LE(size, bracket_size);
630   DCHECK(size > 512 || bracket_size - size < 16);
631   Locks::mutator_lock_->AssertExclusiveHeld(self);
632   void* slot_addr = AllocFromCurrentRunUnlocked(self, idx);
633   if (LIKELY(slot_addr != nullptr)) {
634     DCHECK(bytes_allocated != nullptr);
635     *bytes_allocated = bracket_size;
636     // Caller verifies that it is all 0.
637   }
638   return slot_addr;
639 }
640 
AllocFromRun(Thread * self,size_t size,size_t * bytes_allocated)641 void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) {
642   DCHECK_LE(size, kLargeSizeThreshold);
643   size_t bracket_size;
644   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
645   DCHECK_EQ(idx, SizeToIndex(size));
646   DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
647   DCHECK_EQ(bracket_size, bracketSizes[idx]);
648   DCHECK_LE(size, bracket_size);
649   DCHECK(size > 512 || bracket_size - size < 16);
650 
651   void* slot_addr;
652 
653   if (LIKELY(idx < kNumThreadLocalSizeBrackets)) {
654     // Use a thread-local run.
655     Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
656     // Allow invalid since this will always fail the allocation.
657     if (kIsDebugBuild) {
658       // Need the lock to prevent race conditions.
659       MutexLock mu(self, *size_bracket_locks_[idx]);
660       CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
661       CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
662     }
663     DCHECK(thread_local_run != nullptr);
664     DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_);
665     slot_addr = thread_local_run->AllocSlot();
666     // The allocation must fail if the run is invalid.
667     DCHECK(thread_local_run != dedicated_full_run_ || slot_addr == nullptr)
668         << "allocated from an invalid run";
669     if (UNLIKELY(slot_addr == nullptr)) {
670       // The run got full. Try to free slots.
671       DCHECK(thread_local_run->IsFull());
672       MutexLock mu(self, *size_bracket_locks_[idx]);
673       bool is_all_free_after_merge;
674       // This is safe to do for the dedicated_full_run_ since the bitmaps are empty.
675       if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
676         DCHECK_NE(thread_local_run, dedicated_full_run_);
677         // Some slot got freed. Keep it.
678         DCHECK(!thread_local_run->IsFull());
679         DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
680         if (is_all_free_after_merge) {
681           // Check that the bitmap idx is back at 0 if it's all free.
682           DCHECK_EQ(thread_local_run->first_search_vec_idx_, 0U);
683         }
684       } else {
685         // No slots got freed. Try to refill the thread-local run.
686         DCHECK(thread_local_run->IsFull());
687         if (thread_local_run != dedicated_full_run_) {
688           thread_local_run->SetIsThreadLocal(false);
689           if (kIsDebugBuild) {
690             full_runs_[idx].insert(thread_local_run);
691             if (kTraceRosAlloc) {
692               LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
693                         << reinterpret_cast<intptr_t>(thread_local_run)
694                         << " into full_runs_[" << std::dec << idx << "]";
695             }
696           }
697           DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
698           DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
699         }
700 
701         thread_local_run = RefillRun(self, idx);
702         if (UNLIKELY(thread_local_run == nullptr)) {
703           self->SetRosAllocRun(idx, dedicated_full_run_);
704           return nullptr;
705         }
706         DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
707         DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
708         thread_local_run->SetIsThreadLocal(true);
709         self->SetRosAllocRun(idx, thread_local_run);
710         DCHECK(!thread_local_run->IsFull());
711       }
712 
713       DCHECK(thread_local_run != nullptr);
714       DCHECK(!thread_local_run->IsFull());
715       DCHECK(thread_local_run->IsThreadLocal());
716       slot_addr = thread_local_run->AllocSlot();
717       // Must succeed now with a new run.
718       DCHECK(slot_addr != nullptr);
719     }
720     if (kTraceRosAlloc) {
721       LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
722                 << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
723                 << "(" << std::dec << (bracket_size) << ")";
724     }
725   } else {
726     // Use the (shared) current run.
727     MutexLock mu(self, *size_bracket_locks_[idx]);
728     slot_addr = AllocFromCurrentRunUnlocked(self, idx);
729     if (kTraceRosAlloc) {
730       LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
731                 << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
732                 << "(" << std::dec << (bracket_size) << ")";
733     }
734   }
735   DCHECK(bytes_allocated != nullptr);
736   *bytes_allocated = bracket_size;
737   // Caller verifies that it is all 0.
738   return slot_addr;
739 }
740 
FreeFromRun(Thread * self,void * ptr,Run * run)741 size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
742   DCHECK_EQ(run->magic_num_, kMagicNum);
743   DCHECK_LT(run, ptr);
744   DCHECK_LT(ptr, run->End());
745   const size_t idx = run->size_bracket_idx_;
746   const size_t bracket_size = bracketSizes[idx];
747   bool run_was_full = false;
748   MutexLock mu(self, *size_bracket_locks_[idx]);
749   if (kIsDebugBuild) {
750     run_was_full = run->IsFull();
751   }
752   if (kTraceRosAlloc) {
753     LOG(INFO) << "RosAlloc::FreeFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr);
754   }
755   if (LIKELY(run->IsThreadLocal())) {
756     // It's a thread-local run. Just mark the thread-local free bit map and return.
757     DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
758     DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
759     DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
760     run->MarkThreadLocalFreeBitMap(ptr);
761     if (kTraceRosAlloc) {
762       LOG(INFO) << "RosAlloc::FreeFromRun() : Freed a slot in a thread local run 0x" << std::hex
763                 << reinterpret_cast<intptr_t>(run);
764     }
765     // A thread local run will be kept as a thread local even if it's become all free.
766     return bracket_size;
767   }
768   // Free the slot in the run.
769   run->FreeSlot(ptr);
770   std::set<Run*>* non_full_runs = &non_full_runs_[idx];
771   if (run->IsAllFree()) {
772     // It has just become completely free. Free the pages of this run.
773     std::set<Run*>::iterator pos = non_full_runs->find(run);
774     if (pos != non_full_runs->end()) {
775       non_full_runs->erase(pos);
776       if (kTraceRosAlloc) {
777         LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
778                   << reinterpret_cast<intptr_t>(run) << " from non_full_runs_";
779       }
780     }
781     if (run == current_runs_[idx]) {
782       current_runs_[idx] = dedicated_full_run_;
783     }
784     DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
785     DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
786     run->ZeroHeader();
787     {
788       MutexLock mu(self, lock_);
789       FreePages(self, run, true);
790     }
791   } else {
792     // It is not completely free. If it wasn't the current run or
793     // already in the non-full run set (i.e., it was full) insert it
794     // into the non-full run set.
795     if (run != current_runs_[idx]) {
796       std::unordered_set<Run*, hash_run, eq_run>* full_runs =
797           kIsDebugBuild ? &full_runs_[idx] : NULL;
798       std::set<Run*>::iterator pos = non_full_runs->find(run);
799       if (pos == non_full_runs->end()) {
800         DCHECK(run_was_full);
801         DCHECK(full_runs->find(run) != full_runs->end());
802         if (kIsDebugBuild) {
803           full_runs->erase(run);
804           if (kTraceRosAlloc) {
805             LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
806                       << reinterpret_cast<intptr_t>(run) << " from full_runs_";
807           }
808         }
809         non_full_runs->insert(run);
810         DCHECK(!run->IsFull());
811         if (kTraceRosAlloc) {
812           LOG(INFO) << "RosAlloc::FreeFromRun() : Inserted run 0x" << std::hex
813                     << reinterpret_cast<intptr_t>(run)
814                     << " into non_full_runs_[" << std::dec << idx << "]";
815         }
816       }
817     }
818   }
819   return bracket_size;
820 }
821 
BitMapToStr(uint32_t * bit_map_base,size_t num_vec)822 std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) {
823   std::string bit_map_str;
824   for (size_t v = 0; v < num_vec; v++) {
825     uint32_t vec = bit_map_base[v];
826     if (v != num_vec - 1) {
827       bit_map_str.append(StringPrintf("%x-", vec));
828     } else {
829       bit_map_str.append(StringPrintf("%x", vec));
830     }
831   }
832   return bit_map_str.c_str();
833 }
834 
Dump()835 std::string RosAlloc::Run::Dump() {
836   size_t idx = size_bracket_idx_;
837   size_t num_slots = numOfSlots[idx];
838   size_t num_vec = RoundUp(num_slots, 32) / 32;
839   std::ostringstream stream;
840   stream << "RosAlloc Run = " << reinterpret_cast<void*>(this)
841          << "{ magic_num=" << static_cast<int>(magic_num_)
842          << " size_bracket_idx=" << idx
843          << " is_thread_local=" << static_cast<int>(is_thread_local_)
844          << " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_)
845          << " first_search_vec_idx=" << first_search_vec_idx_
846          << " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec)
847          << " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec)
848          << " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec)
849          << " }" << std::endl;
850   return stream.str();
851 }
852 
AllocSlot()853 inline void* RosAlloc::Run::AllocSlot() {
854   const size_t idx = size_bracket_idx_;
855   while (true) {
856     if (kIsDebugBuild) {
857       // Make sure that no slots leaked, the bitmap should be full for all previous vectors.
858       for (size_t i = 0; i < first_search_vec_idx_; ++i) {
859         CHECK_EQ(~alloc_bit_map_[i], 0U);
860       }
861     }
862     uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_];
863     uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr);
864     if (LIKELY(ffz1 != 0)) {
865       const uint32_t ffz = ffz1 - 1;
866       const uint32_t slot_idx = ffz + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte;
867       const uint32_t mask = 1U << ffz;
868       DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range";
869       // Found an empty slot. Set the bit.
870       DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U);
871       *alloc_bitmap_ptr |= mask;
872       DCHECK_NE(*alloc_bitmap_ptr & mask, 0U);
873       byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
874       if (kTraceRosAlloc) {
875         LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
876                   << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
877       }
878       return slot_addr;
879     }
880     const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32;
881     if (first_search_vec_idx_ + 1 >= num_words) {
882       DCHECK(IsFull());
883       // Already at the last word, return null.
884       return nullptr;
885     }
886     // Increase the index to the next word and try again.
887     ++first_search_vec_idx_;
888   }
889 }
890 
FreeSlot(void * ptr)891 void RosAlloc::Run::FreeSlot(void* ptr) {
892   DCHECK(!IsThreadLocal());
893   const byte idx = size_bracket_idx_;
894   const size_t bracket_size = bracketSizes[idx];
895   const size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
896       - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
897   DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
898   size_t slot_idx = offset_from_slot_base / bracket_size;
899   DCHECK_LT(slot_idx, numOfSlots[idx]);
900   size_t vec_idx = slot_idx / 32;
901   if (kIsDebugBuild) {
902     size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
903     DCHECK_LT(vec_idx, num_vec);
904   }
905   size_t vec_off = slot_idx % 32;
906   uint32_t* vec = &alloc_bit_map_[vec_idx];
907   first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(vec_idx));
908   const uint32_t mask = 1U << vec_off;
909   DCHECK_NE(*vec & mask, 0U);
910   *vec &= ~mask;
911   DCHECK_EQ(*vec & mask, 0U);
912   // Zero out the memory.
913   // TODO: Investigate alternate memset since ptr is guaranteed to be aligned to 16.
914   memset(ptr, 0, bracket_size);
915   if (kTraceRosAlloc) {
916     LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
917               << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
918   }
919 }
920 
MergeThreadLocalFreeBitMapToAllocBitMap(bool * is_all_free_after_out)921 inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
922   DCHECK(IsThreadLocal());
923   // Free slots in the alloc bit map based on the thread local free bit map.
924   const size_t idx = size_bracket_idx_;
925   const size_t num_of_slots = numOfSlots[idx];
926   const size_t num_vec = RoundUp(num_of_slots, 32) / 32;
927   bool changed = false;
928   uint32_t* vecp = &alloc_bit_map_[0];
929   uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0];
930   bool is_all_free_after = true;
931   for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
932     uint32_t tl_free_vec = *tl_free_vecp;
933     uint32_t vec_before = *vecp;
934     uint32_t vec_after;
935     if (tl_free_vec != 0) {
936       first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
937       vec_after = vec_before & ~tl_free_vec;
938       *vecp = vec_after;
939       changed = true;
940       *tl_free_vecp = 0;  // clear the thread local free bit map.
941     } else {
942       vec_after = vec_before;
943     }
944     if (vec_after != 0) {
945       if (v == num_vec - 1) {
946         // Only not all free if a bit other than the mask bits are set.
947         is_all_free_after =
948             is_all_free_after && GetBitmapLastVectorMask(num_of_slots, num_vec) == vec_after;
949       } else {
950         is_all_free_after = false;
951       }
952     }
953     DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
954   }
955   *is_all_free_after_out = is_all_free_after;
956   // Return true if there was at least a bit set in the thread-local
957   // free bit map and at least a bit in the alloc bit map changed.
958   return changed;
959 }
960 
MergeBulkFreeBitMapIntoAllocBitMap()961 inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
962   DCHECK(!IsThreadLocal());
963   // Free slots in the alloc bit map based on the bulk free bit map.
964   const size_t num_vec = NumberOfBitmapVectors();
965   uint32_t* vecp = &alloc_bit_map_[0];
966   uint32_t* free_vecp = &BulkFreeBitMap()[0];
967   for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
968     uint32_t free_vec = *free_vecp;
969     if (free_vec != 0) {
970       first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
971       *vecp &= ~free_vec;
972       *free_vecp = 0;  // clear the bulk free bit map.
973     }
974     DCHECK_EQ(*free_vecp, static_cast<uint32_t>(0));
975   }
976 }
977 
UnionBulkFreeBitMapToThreadLocalFreeBitMap()978 inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
979   DCHECK(IsThreadLocal());
980   // Union the thread local bit map with the bulk free bit map.
981   size_t num_vec = NumberOfBitmapVectors();
982   uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0];
983   uint32_t* from_vecp = &BulkFreeBitMap()[0];
984   for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
985     uint32_t from_vec = *from_vecp;
986     if (from_vec != 0) {
987       *to_vecp |= from_vec;
988       *from_vecp = 0;  // clear the bulk free bit map.
989     }
990     DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
991   }
992 }
993 
MarkThreadLocalFreeBitMap(void * ptr)994 inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
995   DCHECK(IsThreadLocal());
996   MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap");
997 }
998 
MarkBulkFreeBitMap(void * ptr)999 inline size_t RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
1000   return MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap");
1001 }
1002 
MarkFreeBitMapShared(void * ptr,uint32_t * free_bit_map_base,const char * caller_name)1003 inline size_t RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
1004                                                   const char* caller_name) {
1005   const byte idx = size_bracket_idx_;
1006   const size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
1007       - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
1008   const size_t bracket_size = bracketSizes[idx];
1009   memset(ptr, 0, bracket_size);
1010   DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
1011   size_t slot_idx = offset_from_slot_base / bracket_size;
1012   DCHECK_LT(slot_idx, numOfSlots[idx]);
1013   size_t vec_idx = slot_idx / 32;
1014   if (kIsDebugBuild) {
1015     size_t num_vec = NumberOfBitmapVectors();
1016     DCHECK_LT(vec_idx, num_vec);
1017   }
1018   size_t vec_off = slot_idx % 32;
1019   uint32_t* vec = &free_bit_map_base[vec_idx];
1020   const uint32_t mask = 1U << vec_off;
1021   DCHECK_EQ(*vec & mask, 0U);
1022   *vec |= mask;
1023   DCHECK_NE(*vec & mask, 0U);
1024   if (kTraceRosAlloc) {
1025     LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex
1026               << reinterpret_cast<intptr_t>(ptr)
1027               << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
1028   }
1029   return bracket_size;
1030 }
1031 
GetBitmapLastVectorMask(size_t num_slots,size_t num_vec)1032 inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) {
1033   const size_t kBitsPerVec = 32;
1034   DCHECK_GE(num_slots * kBitsPerVec, num_vec);
1035   size_t remain = num_vec * kBitsPerVec - num_slots;
1036   DCHECK_NE(remain, kBitsPerVec);
1037   return ((1U << remain) - 1) << (kBitsPerVec - remain);
1038 }
1039 
IsAllFree()1040 inline bool RosAlloc::Run::IsAllFree() {
1041   const byte idx = size_bracket_idx_;
1042   const size_t num_slots = numOfSlots[idx];
1043   const size_t num_vec = NumberOfBitmapVectors();
1044   DCHECK_NE(num_vec, 0U);
1045   // Check the last vector after the loop since it uses a special case for the masked bits.
1046   for (size_t v = 0; v < num_vec - 1; v++) {
1047     uint32_t vec = alloc_bit_map_[v];
1048     if (vec != 0) {
1049       return false;
1050     }
1051   }
1052   // Make sure the last word is equal to the mask, all other bits must be 0.
1053   return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec);
1054 }
1055 
IsFull()1056 inline bool RosAlloc::Run::IsFull() {
1057   const size_t num_vec = NumberOfBitmapVectors();
1058   for (size_t v = 0; v < num_vec; ++v) {
1059     if (~alloc_bit_map_[v] != 0) {
1060       return false;
1061     }
1062   }
1063   return true;
1064 }
1065 
IsBulkFreeBitmapClean()1066 inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
1067   const size_t num_vec = NumberOfBitmapVectors();
1068   for (size_t v = 0; v < num_vec; v++) {
1069     uint32_t vec = BulkFreeBitMap()[v];
1070     if (vec != 0) {
1071       return false;
1072     }
1073   }
1074   return true;
1075 }
1076 
IsThreadLocalFreeBitmapClean()1077 inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() {
1078   const size_t num_vec = NumberOfBitmapVectors();
1079   for (size_t v = 0; v < num_vec; v++) {
1080     uint32_t vec = ThreadLocalFreeBitMap()[v];
1081     if (vec != 0) {
1082       return false;
1083     }
1084   }
1085   return true;
1086 }
1087 
SetAllocBitMapBitsForInvalidSlots()1088 inline void RosAlloc::Run::SetAllocBitMapBitsForInvalidSlots() {
1089   const size_t idx = size_bracket_idx_;
1090   const size_t num_slots = numOfSlots[idx];
1091   const size_t num_vec = RoundUp(num_slots, 32) / 32;
1092   DCHECK_NE(num_vec, 0U);
1093   // Make sure to set the bits at the end of the bitmap so that we don't allocate there since they
1094   // don't represent valid slots.
1095   alloc_bit_map_[num_vec - 1] |= GetBitmapLastVectorMask(num_slots, num_vec);
1096 }
1097 
ZeroHeader()1098 inline void RosAlloc::Run::ZeroHeader() {
1099   const byte idx = size_bracket_idx_;
1100   memset(this, 0, headerSizes[idx]);
1101 }
1102 
ZeroData()1103 inline void RosAlloc::Run::ZeroData() {
1104   const byte idx = size_bracket_idx_;
1105   byte* slot_begin = reinterpret_cast<byte*>(this) + headerSizes[idx];
1106   memset(slot_begin, 0, numOfSlots[idx] * bracketSizes[idx]);
1107 }
1108 
FillAllocBitMap()1109 inline void RosAlloc::Run::FillAllocBitMap() {
1110   size_t num_vec = NumberOfBitmapVectors();
1111   memset(alloc_bit_map_, 0xFF, sizeof(uint32_t) * num_vec);
1112   first_search_vec_idx_ = num_vec - 1;  // No free bits in any of the bitmap words.
1113 }
1114 
InspectAllSlots(void (* handler)(void * start,void * end,size_t used_bytes,void * callback_arg),void * arg)1115 void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
1116                                     void* arg) {
1117   size_t idx = size_bracket_idx_;
1118   byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
1119   size_t num_slots = numOfSlots[idx];
1120   size_t bracket_size = IndexToBracketSize(idx);
1121   DCHECK_EQ(slot_base + num_slots * bracket_size, reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize);
1122   size_t num_vec = RoundUp(num_slots, 32) / 32;
1123   size_t slots = 0;
1124   for (size_t v = 0; v < num_vec; v++, slots += 32) {
1125     DCHECK_GE(num_slots, slots);
1126     uint32_t vec = alloc_bit_map_[v];
1127     size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
1128     for (size_t i = 0; i < end; ++i) {
1129       bool is_allocated = ((vec >> i) & 0x1) != 0;
1130       byte* slot_addr = slot_base + (slots + i) * bracket_size;
1131       if (is_allocated) {
1132         handler(slot_addr, slot_addr + bracket_size, bracket_size, arg);
1133       } else {
1134         handler(slot_addr, slot_addr + bracket_size, 0, arg);
1135       }
1136     }
1137   }
1138 }
1139 
1140 // If true, read the page map entries in BulkFree() without using the
1141 // lock for better performance, assuming that the existence of an
1142 // allocated chunk/pointer being freed in BulkFree() guarantees that
1143 // the page map entry won't change. Disabled for now.
1144 static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = true;
1145 
BulkFree(Thread * self,void ** ptrs,size_t num_ptrs)1146 size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
1147   size_t freed_bytes = 0;
1148   if (false) {
1149     // Used only to test Free() as GC uses only BulkFree().
1150     for (size_t i = 0; i < num_ptrs; ++i) {
1151       freed_bytes += FreeInternal(self, ptrs[i]);
1152     }
1153     return freed_bytes;
1154   }
1155 
1156   WriterMutexLock wmu(self, bulk_free_lock_);
1157 
1158   // First mark slots to free in the bulk free bit map without locking the
1159   // size bracket locks. On host, unordered_set is faster than vector + flag.
1160 #ifdef HAVE_ANDROID_OS
1161   std::vector<Run*> runs;
1162 #else
1163   std::unordered_set<Run*, hash_run, eq_run> runs;
1164 #endif
1165   for (size_t i = 0; i < num_ptrs; i++) {
1166     void* ptr = ptrs[i];
1167     DCHECK_LE(base_, ptr);
1168     DCHECK_LT(ptr, base_ + footprint_);
1169     size_t pm_idx = RoundDownToPageMapIndex(ptr);
1170     Run* run = nullptr;
1171     if (kReadPageMapEntryWithoutLockInBulkFree) {
1172       // Read the page map entries without locking the lock.
1173       byte page_map_entry = page_map_[pm_idx];
1174       if (kTraceRosAlloc) {
1175         LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
1176                   << std::dec << pm_idx
1177                   << ", page_map_entry=" << static_cast<int>(page_map_entry);
1178       }
1179       if (LIKELY(page_map_entry == kPageMapRun)) {
1180         run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1181       } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
1182         size_t pi = pm_idx;
1183         // Find the beginning of the run.
1184         do {
1185           --pi;
1186           DCHECK_LT(pi, capacity_ / kPageSize);
1187         } while (page_map_[pi] != kPageMapRun);
1188         run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
1189       } else if (page_map_entry == kPageMapLargeObject) {
1190         MutexLock mu(self, lock_);
1191         freed_bytes += FreePages(self, ptr, false);
1192         continue;
1193       } else {
1194         LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
1195       }
1196     } else {
1197       // Read the page map entries with a lock.
1198       MutexLock mu(self, lock_);
1199       DCHECK_LT(pm_idx, page_map_size_);
1200       byte page_map_entry = page_map_[pm_idx];
1201       if (kTraceRosAlloc) {
1202         LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
1203                   << std::dec << pm_idx
1204                   << ", page_map_entry=" << static_cast<int>(page_map_entry);
1205       }
1206       if (LIKELY(page_map_entry == kPageMapRun)) {
1207         run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1208       } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
1209         size_t pi = pm_idx;
1210         // Find the beginning of the run.
1211         do {
1212           --pi;
1213           DCHECK_LT(pi, capacity_ / kPageSize);
1214         } while (page_map_[pi] != kPageMapRun);
1215         run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
1216       } else if (page_map_entry == kPageMapLargeObject) {
1217         freed_bytes += FreePages(self, ptr, false);
1218         continue;
1219       } else {
1220         LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
1221       }
1222     }
1223     DCHECK(run != nullptr);
1224     DCHECK_EQ(run->magic_num_, kMagicNum);
1225     // Set the bit in the bulk free bit map.
1226     freed_bytes += run->MarkBulkFreeBitMap(ptr);
1227 #ifdef HAVE_ANDROID_OS
1228     if (!run->to_be_bulk_freed_) {
1229       run->to_be_bulk_freed_ = true;
1230       runs.push_back(run);
1231     }
1232 #else
1233     runs.insert(run);
1234 #endif
1235   }
1236 
1237   // Now, iterate over the affected runs and update the alloc bit map
1238   // based on the bulk free bit map (for non-thread-local runs) and
1239   // union the bulk free bit map into the thread-local free bit map
1240   // (for thread-local runs.)
1241   for (Run* run : runs) {
1242 #ifdef HAVE_ANDROID_OS
1243     DCHECK(run->to_be_bulk_freed_);
1244     run->to_be_bulk_freed_ = false;
1245 #endif
1246     size_t idx = run->size_bracket_idx_;
1247     MutexLock mu(self, *size_bracket_locks_[idx]);
1248     if (run->IsThreadLocal()) {
1249       DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
1250       DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
1251       DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
1252       run->UnionBulkFreeBitMapToThreadLocalFreeBitMap();
1253       if (kTraceRosAlloc) {
1254         LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x"
1255                   << std::hex << reinterpret_cast<intptr_t>(run);
1256       }
1257       DCHECK(run->IsThreadLocal());
1258       // A thread local run will be kept as a thread local even if
1259       // it's become all free.
1260     } else {
1261       bool run_was_full = run->IsFull();
1262       run->MergeBulkFreeBitMapIntoAllocBitMap();
1263       if (kTraceRosAlloc) {
1264         LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a run 0x" << std::hex
1265                   << reinterpret_cast<intptr_t>(run);
1266       }
1267       // Check if the run should be moved to non_full_runs_ or
1268       // free_page_runs_.
1269       std::set<Run*>* non_full_runs = &non_full_runs_[idx];
1270       std::unordered_set<Run*, hash_run, eq_run>* full_runs =
1271           kIsDebugBuild ? &full_runs_[idx] : NULL;
1272       if (run->IsAllFree()) {
1273         // It has just become completely free. Free the pages of the
1274         // run.
1275         bool run_was_current = run == current_runs_[idx];
1276         if (run_was_current) {
1277           DCHECK(full_runs->find(run) == full_runs->end());
1278           DCHECK(non_full_runs->find(run) == non_full_runs->end());
1279           // If it was a current run, reuse it.
1280         } else if (run_was_full) {
1281           // If it was full, remove it from the full run set (debug
1282           // only.)
1283           if (kIsDebugBuild) {
1284             std::unordered_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
1285             DCHECK(pos != full_runs->end());
1286             full_runs->erase(pos);
1287             if (kTraceRosAlloc) {
1288               LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1289                         << reinterpret_cast<intptr_t>(run)
1290                         << " from full_runs_";
1291             }
1292             DCHECK(full_runs->find(run) == full_runs->end());
1293           }
1294         } else {
1295           // If it was in a non full run set, remove it from the set.
1296           DCHECK(full_runs->find(run) == full_runs->end());
1297           DCHECK(non_full_runs->find(run) != non_full_runs->end());
1298           non_full_runs->erase(run);
1299           if (kTraceRosAlloc) {
1300             LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1301                       << reinterpret_cast<intptr_t>(run)
1302                       << " from non_full_runs_";
1303           }
1304           DCHECK(non_full_runs->find(run) == non_full_runs->end());
1305         }
1306         if (!run_was_current) {
1307           run->ZeroHeader();
1308           MutexLock mu(self, lock_);
1309           FreePages(self, run, true);
1310         }
1311       } else {
1312         // It is not completely free. If it wasn't the current run or
1313         // already in the non-full run set (i.e., it was full) insert
1314         // it into the non-full run set.
1315         if (run == current_runs_[idx]) {
1316           DCHECK(non_full_runs->find(run) == non_full_runs->end());
1317           DCHECK(full_runs->find(run) == full_runs->end());
1318           // If it was a current run, keep it.
1319         } else if (run_was_full) {
1320           // If it was full, remove it from the full run set (debug
1321           // only) and insert into the non-full run set.
1322           DCHECK(full_runs->find(run) != full_runs->end());
1323           DCHECK(non_full_runs->find(run) == non_full_runs->end());
1324           if (kIsDebugBuild) {
1325             full_runs->erase(run);
1326             if (kTraceRosAlloc) {
1327               LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1328                         << reinterpret_cast<intptr_t>(run)
1329                         << " from full_runs_";
1330             }
1331           }
1332           non_full_runs->insert(run);
1333           if (kTraceRosAlloc) {
1334             LOG(INFO) << "RosAlloc::BulkFree() : Inserted run 0x" << std::hex
1335                       << reinterpret_cast<intptr_t>(run)
1336                       << " into non_full_runs_[" << std::dec << idx;
1337           }
1338         } else {
1339           // If it was not full, so leave it in the non full run set.
1340           DCHECK(full_runs->find(run) == full_runs->end());
1341           DCHECK(non_full_runs->find(run) != non_full_runs->end());
1342         }
1343       }
1344     }
1345   }
1346   return freed_bytes;
1347 }
1348 
DumpPageMap()1349 std::string RosAlloc::DumpPageMap() {
1350   std::ostringstream stream;
1351   stream << "RosAlloc PageMap: " << std::endl;
1352   lock_.AssertHeld(Thread::Current());
1353   size_t end = page_map_size_;
1354   FreePageRun* curr_fpr = NULL;
1355   size_t curr_fpr_size = 0;
1356   size_t remaining_curr_fpr_size = 0;
1357   size_t num_running_empty_pages = 0;
1358   for (size_t i = 0; i < end; ++i) {
1359     byte pm = page_map_[i];
1360     switch (pm) {
1361       case kPageMapReleased:
1362         // Fall-through.
1363       case kPageMapEmpty: {
1364         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1365         if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
1366           // Encountered a fresh free page run.
1367           DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1368           DCHECK(fpr->IsFree());
1369           DCHECK(curr_fpr == NULL);
1370           DCHECK_EQ(curr_fpr_size, static_cast<size_t>(0));
1371           curr_fpr = fpr;
1372           curr_fpr_size = fpr->ByteSize(this);
1373           DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
1374           remaining_curr_fpr_size = curr_fpr_size - kPageSize;
1375           stream << "[" << i << "]=" << (pm == kPageMapReleased ? "Released" : "Empty")
1376                  << " (FPR start) fpr_size=" << curr_fpr_size
1377                  << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
1378           if (remaining_curr_fpr_size == 0) {
1379             // Reset at the end of the current free page run.
1380             curr_fpr = NULL;
1381             curr_fpr_size = 0;
1382           }
1383           stream << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr) << std::endl;
1384           DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
1385         } else {
1386           // Still part of the current free page run.
1387           DCHECK_NE(num_running_empty_pages, static_cast<size_t>(0));
1388           DCHECK(curr_fpr != NULL && curr_fpr_size > 0 && remaining_curr_fpr_size > 0);
1389           DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
1390           DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
1391           remaining_curr_fpr_size -= kPageSize;
1392           stream << "[" << i << "]=Empty (FPR part)"
1393                  << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
1394           if (remaining_curr_fpr_size == 0) {
1395             // Reset at the end of the current free page run.
1396             curr_fpr = NULL;
1397             curr_fpr_size = 0;
1398           }
1399         }
1400         num_running_empty_pages++;
1401         break;
1402       }
1403       case kPageMapLargeObject: {
1404         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1405         num_running_empty_pages = 0;
1406         stream << "[" << i << "]=Large (start)" << std::endl;
1407         break;
1408       }
1409       case kPageMapLargeObjectPart:
1410         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1411         num_running_empty_pages = 0;
1412         stream << "[" << i << "]=Large (part)" << std::endl;
1413         break;
1414       case kPageMapRun: {
1415         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1416         num_running_empty_pages = 0;
1417         Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1418         size_t idx = run->size_bracket_idx_;
1419         stream << "[" << i << "]=Run (start)"
1420                << " idx=" << idx
1421                << " numOfPages=" << numOfPages[idx]
1422                << " is_thread_local=" << run->is_thread_local_
1423                << " is_all_free=" << (run->IsAllFree() ? 1 : 0)
1424                << std::endl;
1425         break;
1426       }
1427       case kPageMapRunPart:
1428         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1429         num_running_empty_pages = 0;
1430         stream << "[" << i << "]=Run (part)" << std::endl;
1431         break;
1432       default:
1433         stream << "[" << i << "]=Unrecognizable page map type: " << pm;
1434         break;
1435     }
1436   }
1437   return stream.str();
1438 }
1439 
UsableSize(void * ptr)1440 size_t RosAlloc::UsableSize(void* ptr) {
1441   DCHECK_LE(base_, ptr);
1442   DCHECK_LT(ptr, base_ + footprint_);
1443   size_t pm_idx = RoundDownToPageMapIndex(ptr);
1444   MutexLock mu(Thread::Current(), lock_);
1445   switch (page_map_[pm_idx]) {
1446     case kPageMapReleased:
1447       // Fall-through.
1448     case kPageMapEmpty:
1449       LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
1450                  << std::hex << reinterpret_cast<intptr_t>(ptr);
1451       break;
1452     case kPageMapLargeObject: {
1453       size_t num_pages = 1;
1454       size_t idx = pm_idx + 1;
1455       size_t end = page_map_size_;
1456       while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
1457         num_pages++;
1458         idx++;
1459       }
1460       return num_pages * kPageSize;
1461     }
1462     case kPageMapLargeObjectPart:
1463       LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
1464                  << std::hex << reinterpret_cast<intptr_t>(ptr);
1465       break;
1466     case kPageMapRun:
1467     case kPageMapRunPart: {
1468       // Find the beginning of the run.
1469       while (page_map_[pm_idx] != kPageMapRun) {
1470         pm_idx--;
1471         DCHECK_LT(pm_idx, capacity_ / kPageSize);
1472       }
1473       DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
1474       Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1475       DCHECK_EQ(run->magic_num_, kMagicNum);
1476       size_t idx = run->size_bracket_idx_;
1477       size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
1478           - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
1479       DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
1480       return IndexToBracketSize(idx);
1481     }
1482     default: {
1483       LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
1484       break;
1485     }
1486   }
1487   return 0;
1488 }
1489 
Trim()1490 bool RosAlloc::Trim() {
1491   MutexLock mu(Thread::Current(), lock_);
1492   FreePageRun* last_free_page_run;
1493   DCHECK_EQ(footprint_ % kPageSize, static_cast<size_t>(0));
1494   auto it = free_page_runs_.rbegin();
1495   if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
1496     // Remove the last free page run, if any.
1497     DCHECK(last_free_page_run->IsFree());
1498     DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
1499     DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
1500     DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
1501     free_page_runs_.erase(last_free_page_run);
1502     size_t decrement = last_free_page_run->ByteSize(this);
1503     size_t new_footprint = footprint_ - decrement;
1504     DCHECK_EQ(new_footprint % kPageSize, static_cast<size_t>(0));
1505     size_t new_num_of_pages = new_footprint / kPageSize;
1506     DCHECK_GE(page_map_size_, new_num_of_pages);
1507     // Zero out the tail of the page map.
1508     byte* zero_begin = const_cast<byte*>(page_map_) + new_num_of_pages;
1509     byte* madvise_begin = AlignUp(zero_begin, kPageSize);
1510     DCHECK_LE(madvise_begin, page_map_mem_map_->End());
1511     size_t madvise_size = page_map_mem_map_->End() - madvise_begin;
1512     if (madvise_size > 0) {
1513       DCHECK_ALIGNED(madvise_begin, kPageSize);
1514       DCHECK_EQ(RoundUp(madvise_size, kPageSize), madvise_size);
1515       if (!kMadviseZeroes) {
1516         memset(madvise_begin, 0, madvise_size);
1517       }
1518       CHECK_EQ(madvise(madvise_begin, madvise_size, MADV_DONTNEED), 0);
1519     }
1520     if (madvise_begin - zero_begin) {
1521       memset(zero_begin, 0, madvise_begin - zero_begin);
1522     }
1523     page_map_size_ = new_num_of_pages;
1524     free_page_run_size_map_.resize(new_num_of_pages);
1525     DCHECK_EQ(free_page_run_size_map_.size(), new_num_of_pages);
1526     art_heap_rosalloc_morecore(this, -(static_cast<intptr_t>(decrement)));
1527     if (kTraceRosAlloc) {
1528       LOG(INFO) << "RosAlloc::Trim() : decreased the footprint from "
1529                 << footprint_ << " to " << new_footprint;
1530     }
1531     DCHECK_LT(new_footprint, footprint_);
1532     DCHECK_LT(new_footprint, capacity_);
1533     footprint_ = new_footprint;
1534     return true;
1535   }
1536   return false;
1537 }
1538 
InspectAll(void (* handler)(void * start,void * end,size_t used_bytes,void * callback_arg),void * arg)1539 void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
1540                           void* arg) {
1541   // Note: no need to use this to release pages as we already do so in FreePages().
1542   if (handler == NULL) {
1543     return;
1544   }
1545   MutexLock mu(Thread::Current(), lock_);
1546   size_t pm_end = page_map_size_;
1547   size_t i = 0;
1548   while (i < pm_end) {
1549     byte pm = page_map_[i];
1550     switch (pm) {
1551       case kPageMapReleased:
1552         // Fall-through.
1553       case kPageMapEmpty: {
1554         // The start of a free page run.
1555         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1556         DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
1557         size_t fpr_size = fpr->ByteSize(this);
1558         DCHECK(IsAligned<kPageSize>(fpr_size));
1559         void* start = fpr;
1560         if (kIsDebugBuild) {
1561           // In the debug build, the first page of a free page run
1562           // contains a magic number for debugging. Exclude it.
1563           start = reinterpret_cast<byte*>(fpr) + kPageSize;
1564         }
1565         void* end = reinterpret_cast<byte*>(fpr) + fpr_size;
1566         handler(start, end, 0, arg);
1567         size_t num_pages = fpr_size / kPageSize;
1568         if (kIsDebugBuild) {
1569           for (size_t j = i + 1; j < i + num_pages; ++j) {
1570             DCHECK(IsFreePage(j));
1571           }
1572         }
1573         i += fpr_size / kPageSize;
1574         DCHECK_LE(i, pm_end);
1575         break;
1576       }
1577       case kPageMapLargeObject: {
1578         // The start of a large object.
1579         size_t num_pages = 1;
1580         size_t idx = i + 1;
1581         while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
1582           num_pages++;
1583           idx++;
1584         }
1585         void* start = base_ + i * kPageSize;
1586         void* end = base_ + (i + num_pages) * kPageSize;
1587         size_t used_bytes = num_pages * kPageSize;
1588         handler(start, end, used_bytes, arg);
1589         if (kIsDebugBuild) {
1590           for (size_t j = i + 1; j < i + num_pages; ++j) {
1591             DCHECK_EQ(page_map_[j], kPageMapLargeObjectPart);
1592           }
1593         }
1594         i += num_pages;
1595         DCHECK_LE(i, pm_end);
1596         break;
1597       }
1598       case kPageMapLargeObjectPart:
1599         LOG(FATAL) << "Unreachable - page map type: " << pm;
1600         break;
1601       case kPageMapRun: {
1602         // The start of a run.
1603         Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1604         DCHECK_EQ(run->magic_num_, kMagicNum);
1605         // The dedicated full run doesn't contain any real allocations, don't visit the slots in
1606         // there.
1607         run->InspectAllSlots(handler, arg);
1608         size_t num_pages = numOfPages[run->size_bracket_idx_];
1609         if (kIsDebugBuild) {
1610           for (size_t j = i + 1; j < i + num_pages; ++j) {
1611             DCHECK_EQ(page_map_[j], kPageMapRunPart);
1612           }
1613         }
1614         i += num_pages;
1615         DCHECK_LE(i, pm_end);
1616         break;
1617       }
1618       case kPageMapRunPart:
1619         LOG(FATAL) << "Unreachable - page map type: " << pm;
1620         break;
1621       default:
1622         LOG(FATAL) << "Unreachable - page map type: " << pm;
1623         break;
1624     }
1625   }
1626 }
1627 
Footprint()1628 size_t RosAlloc::Footprint() {
1629   MutexLock mu(Thread::Current(), lock_);
1630   return footprint_;
1631 }
1632 
FootprintLimit()1633 size_t RosAlloc::FootprintLimit() {
1634   MutexLock mu(Thread::Current(), lock_);
1635   return capacity_;
1636 }
1637 
SetFootprintLimit(size_t new_capacity)1638 void RosAlloc::SetFootprintLimit(size_t new_capacity) {
1639   MutexLock mu(Thread::Current(), lock_);
1640   DCHECK_EQ(RoundUp(new_capacity, kPageSize), new_capacity);
1641   // Only growing is supported here. But Trim() is supported.
1642   if (capacity_ < new_capacity) {
1643     CHECK_LE(new_capacity, max_capacity_);
1644     capacity_ = new_capacity;
1645     VLOG(heap) << "new capacity=" << capacity_;
1646   }
1647 }
1648 
RevokeThreadLocalRuns(Thread * thread)1649 void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
1650   Thread* self = Thread::Current();
1651   // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
1652   ReaderMutexLock wmu(self, bulk_free_lock_);
1653   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
1654     MutexLock mu(self, *size_bracket_locks_[idx]);
1655     Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
1656     CHECK(thread_local_run != nullptr);
1657     // Invalid means already revoked.
1658     DCHECK(thread_local_run->IsThreadLocal());
1659     if (thread_local_run != dedicated_full_run_) {
1660       thread->SetRosAllocRun(idx, dedicated_full_run_);
1661       DCHECK_EQ(thread_local_run->magic_num_, kMagicNum);
1662       // Note the thread local run may not be full here.
1663       bool dont_care;
1664       thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
1665       thread_local_run->SetIsThreadLocal(false);
1666       thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
1667       DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
1668       DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
1669       RevokeRun(self, idx, thread_local_run);
1670     }
1671   }
1672 }
1673 
RevokeRun(Thread * self,size_t idx,Run * run)1674 void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) {
1675   size_bracket_locks_[idx]->AssertHeld(self);
1676   DCHECK(run != dedicated_full_run_);
1677   if (run->IsFull()) {
1678     if (kIsDebugBuild) {
1679       full_runs_[idx].insert(run);
1680       DCHECK(full_runs_[idx].find(run) != full_runs_[idx].end());
1681       if (kTraceRosAlloc) {
1682         LOG(INFO) << __PRETTY_FUNCTION__  << " : Inserted run 0x" << std::hex
1683                   << reinterpret_cast<intptr_t>(run)
1684                   << " into full_runs_[" << std::dec << idx << "]";
1685       }
1686     }
1687   } else if (run->IsAllFree()) {
1688     run->ZeroHeader();
1689     MutexLock mu(self, lock_);
1690     FreePages(self, run, true);
1691   } else {
1692     non_full_runs_[idx].insert(run);
1693     DCHECK(non_full_runs_[idx].find(run) != non_full_runs_[idx].end());
1694     if (kTraceRosAlloc) {
1695       LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
1696                 << reinterpret_cast<intptr_t>(run)
1697                 << " into non_full_runs_[" << std::dec << idx << "]";
1698     }
1699   }
1700 }
1701 
RevokeThreadUnsafeCurrentRuns()1702 void RosAlloc::RevokeThreadUnsafeCurrentRuns() {
1703   // Revoke the current runs which share the same idx as thread local runs.
1704   Thread* self = Thread::Current();
1705   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
1706     MutexLock mu(self, *size_bracket_locks_[idx]);
1707     if (current_runs_[idx] != dedicated_full_run_) {
1708       RevokeRun(self, idx, current_runs_[idx]);
1709       current_runs_[idx] = dedicated_full_run_;
1710     }
1711   }
1712 }
1713 
RevokeAllThreadLocalRuns()1714 void RosAlloc::RevokeAllThreadLocalRuns() {
1715   // This is called when a mutator thread won't allocate such as at
1716   // the Zygote creation time or during the GC pause.
1717   MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
1718   MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
1719   std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
1720   for (Thread* thread : thread_list) {
1721     RevokeThreadLocalRuns(thread);
1722   }
1723   RevokeThreadUnsafeCurrentRuns();
1724 }
1725 
AssertThreadLocalRunsAreRevoked(Thread * thread)1726 void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) {
1727   if (kIsDebugBuild) {
1728     Thread* self = Thread::Current();
1729     // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
1730     ReaderMutexLock wmu(self, bulk_free_lock_);
1731     for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
1732       MutexLock mu(self, *size_bracket_locks_[idx]);
1733       Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
1734       DCHECK(thread_local_run == nullptr || thread_local_run == dedicated_full_run_);
1735     }
1736   }
1737 }
1738 
AssertAllThreadLocalRunsAreRevoked()1739 void RosAlloc::AssertAllThreadLocalRunsAreRevoked() {
1740   if (kIsDebugBuild) {
1741     Thread* self = Thread::Current();
1742     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
1743     MutexLock mu2(self, *Locks::thread_list_lock_);
1744     std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
1745     for (Thread* t : thread_list) {
1746       AssertThreadLocalRunsAreRevoked(t);
1747     }
1748     for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
1749       MutexLock mu(self, *size_bracket_locks_[idx]);
1750       CHECK_EQ(current_runs_[idx], dedicated_full_run_);
1751     }
1752   }
1753 }
1754 
Initialize()1755 void RosAlloc::Initialize() {
1756   // bracketSizes.
1757   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1758     if (i < kNumOfSizeBrackets - 2) {
1759       bracketSizes[i] = 16 * (i + 1);
1760     } else if (i == kNumOfSizeBrackets - 2) {
1761       bracketSizes[i] = 1 * KB;
1762     } else {
1763       DCHECK_EQ(i, kNumOfSizeBrackets - 1);
1764       bracketSizes[i] = 2 * KB;
1765     }
1766     if (kTraceRosAlloc) {
1767       LOG(INFO) << "bracketSizes[" << i << "]=" << bracketSizes[i];
1768     }
1769   }
1770   // numOfPages.
1771   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1772     if (i < 4) {
1773       numOfPages[i] = 1;
1774     } else if (i < 8) {
1775       numOfPages[i] = 1;
1776     } else if (i < 16) {
1777       numOfPages[i] = 4;
1778     } else if (i < 32) {
1779       numOfPages[i] = 8;
1780     } else if (i == 32) {
1781       DCHECK_EQ(i, kNumOfSizeBrackets - 2);
1782       numOfPages[i] = 16;
1783     } else {
1784       DCHECK_EQ(i, kNumOfSizeBrackets - 1);
1785       numOfPages[i] = 32;
1786     }
1787     if (kTraceRosAlloc) {
1788       LOG(INFO) << "numOfPages[" << i << "]=" << numOfPages[i];
1789     }
1790   }
1791   // Compute numOfSlots and slotOffsets.
1792   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1793     size_t bracket_size = bracketSizes[i];
1794     size_t run_size = kPageSize * numOfPages[i];
1795     size_t max_num_of_slots = run_size / bracket_size;
1796     // Compute the actual number of slots by taking the header and
1797     // alignment into account.
1798     size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t));
1799     DCHECK_EQ(fixed_header_size, static_cast<size_t>(8));
1800     size_t header_size = 0;
1801     size_t bulk_free_bit_map_offset = 0;
1802     size_t thread_local_free_bit_map_offset = 0;
1803     size_t num_of_slots = 0;
1804     // Search for the maximum number of slots that allows enough space
1805     // for the header (including the bit maps.)
1806     for (int s = max_num_of_slots; s >= 0; s--) {
1807       size_t tmp_slots_size = bracket_size * s;
1808       size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte;
1809       size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size;
1810       size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size;
1811       size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size;
1812       size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size;
1813       size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size;
1814       // Align up the unaligned header size. bracket_size may not be a power of two.
1815       size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
1816           tmp_unaligned_header_size :
1817           tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
1818       DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
1819       DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
1820       if (tmp_slots_size + tmp_header_size <= run_size) {
1821         // Found the right number of slots, that is, there was enough
1822         // space for the header (including the bit maps.)
1823         num_of_slots = s;
1824         header_size = tmp_header_size;
1825         bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off;
1826         thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off;
1827         break;
1828       }
1829     }
1830     DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0);
1831     // Add the padding for the alignment remainder.
1832     header_size += run_size % bracket_size;
1833     DCHECK_EQ(header_size + num_of_slots * bracket_size, run_size);
1834     numOfSlots[i] = num_of_slots;
1835     headerSizes[i] = header_size;
1836     bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset;
1837     threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset;
1838     if (kTraceRosAlloc) {
1839       LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i]
1840                 << ", headerSizes[" << i << "]=" << headerSizes[i]
1841                 << ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i]
1842                 << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
1843     }
1844   }
1845   // Fill the alloc bitmap so nobody can successfully allocate from it.
1846   if (kIsDebugBuild) {
1847     dedicated_full_run_->magic_num_ = kMagicNum;
1848   }
1849   // It doesn't matter which size bracket we use since the main goal is to have the allocation
1850   // fail 100% of the time you attempt to allocate into the dedicated full run.
1851   dedicated_full_run_->size_bracket_idx_ = 0;
1852   dedicated_full_run_->FillAllocBitMap();
1853   dedicated_full_run_->SetIsThreadLocal(true);
1854 }
1855 
BytesAllocatedCallback(void * start,void * end,size_t used_bytes,void * arg)1856 void RosAlloc::BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
1857   if (used_bytes == 0) {
1858     return;
1859   }
1860   size_t* bytes_allocated = reinterpret_cast<size_t*>(arg);
1861   *bytes_allocated += used_bytes;
1862 }
1863 
ObjectsAllocatedCallback(void * start,void * end,size_t used_bytes,void * arg)1864 void RosAlloc::ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
1865   if (used_bytes == 0) {
1866     return;
1867   }
1868   size_t* objects_allocated = reinterpret_cast<size_t*>(arg);
1869   ++(*objects_allocated);
1870 }
1871 
Verify()1872 void RosAlloc::Verify() {
1873   Thread* self = Thread::Current();
1874   CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
1875       << "The mutator locks isn't exclusively locked at " << __PRETTY_FUNCTION__;
1876   MutexLock mu(self, *Locks::thread_list_lock_);
1877   ReaderMutexLock wmu(self, bulk_free_lock_);
1878   std::vector<Run*> runs;
1879   {
1880     MutexLock mu(self, lock_);
1881     size_t pm_end = page_map_size_;
1882     size_t i = 0;
1883     while (i < pm_end) {
1884       byte pm = page_map_[i];
1885       switch (pm) {
1886         case kPageMapReleased:
1887           // Fall-through.
1888         case kPageMapEmpty: {
1889           // The start of a free page run.
1890           FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1891           DCHECK_EQ(fpr->magic_num_, kMagicNumFree);
1892           CHECK(free_page_runs_.find(fpr) != free_page_runs_.end())
1893               << "An empty page must belong to the free page run set";
1894           size_t fpr_size = fpr->ByteSize(this);
1895           CHECK(IsAligned<kPageSize>(fpr_size))
1896               << "A free page run size isn't page-aligned : " << fpr_size;
1897           size_t num_pages = fpr_size / kPageSize;
1898           CHECK_GT(num_pages, static_cast<uintptr_t>(0))
1899               << "A free page run size must be > 0 : " << fpr_size;
1900           for (size_t j = i + 1; j < i + num_pages; ++j) {
1901             CHECK(IsFreePage(j))
1902                 << "A mismatch between the page map table for kPageMapEmpty "
1903                 << " at page index " << j
1904                 << " and the free page run size : page index range : "
1905                 << i << " to " << (i + num_pages) << std::endl << DumpPageMap();
1906           }
1907           i += num_pages;
1908           CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1909                               << std::endl << DumpPageMap();
1910           break;
1911         }
1912         case kPageMapLargeObject: {
1913           // The start of a large object.
1914           size_t num_pages = 1;
1915           size_t idx = i + 1;
1916           while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
1917             num_pages++;
1918             idx++;
1919           }
1920           void* start = base_ + i * kPageSize;
1921           mirror::Object* obj = reinterpret_cast<mirror::Object*>(start);
1922           size_t obj_size = obj->SizeOf();
1923           CHECK_GT(obj_size, kLargeSizeThreshold)
1924               << "A rosalloc large object size must be > " << kLargeSizeThreshold;
1925           CHECK_EQ(num_pages, RoundUp(obj_size, kPageSize) / kPageSize)
1926               << "A rosalloc large object size " << obj_size
1927               << " does not match the page map table " << (num_pages * kPageSize)
1928               << std::endl << DumpPageMap();
1929           i += num_pages;
1930           CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1931                               << std::endl << DumpPageMap();
1932           break;
1933         }
1934         case kPageMapLargeObjectPart:
1935           LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
1936           break;
1937         case kPageMapRun: {
1938           // The start of a run.
1939           Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1940           DCHECK_EQ(run->magic_num_, kMagicNum);
1941           size_t idx = run->size_bracket_idx_;
1942           CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << idx;
1943           size_t num_pages = numOfPages[idx];
1944           CHECK_GT(num_pages, static_cast<uintptr_t>(0))
1945               << "Run size must be > 0 : " << num_pages;
1946           for (size_t j = i + 1; j < i + num_pages; ++j) {
1947             CHECK_EQ(page_map_[j], kPageMapRunPart)
1948                 << "A mismatch between the page map table for kPageMapRunPart "
1949                 << " at page index " << j
1950                 << " and the run size : page index range " << i << " to " << (i + num_pages)
1951                 << std::endl << DumpPageMap();
1952           }
1953           // Don't verify the dedicated_full_run_ since it doesn't have any real allocations.
1954           runs.push_back(run);
1955           i += num_pages;
1956           CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1957                               << std::endl << DumpPageMap();
1958           break;
1959         }
1960         case kPageMapRunPart:
1961           // Fall-through.
1962         default:
1963           LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
1964           break;
1965       }
1966     }
1967   }
1968   std::list<Thread*> threads = Runtime::Current()->GetThreadList()->GetList();
1969   for (Thread* thread : threads) {
1970     for (size_t i = 0; i < kNumThreadLocalSizeBrackets; ++i) {
1971       MutexLock mu(self, *size_bracket_locks_[i]);
1972       Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
1973       CHECK(thread_local_run != nullptr);
1974       CHECK(thread_local_run->IsThreadLocal());
1975       CHECK(thread_local_run == dedicated_full_run_ ||
1976             thread_local_run->size_bracket_idx_ == i);
1977     }
1978   }
1979   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1980     MutexLock mu(self, *size_bracket_locks_[i]);
1981     Run* current_run = current_runs_[i];
1982     CHECK(current_run != nullptr);
1983     if (current_run != dedicated_full_run_) {
1984       // The dedicated full run is currently marked as thread local.
1985       CHECK(!current_run->IsThreadLocal());
1986       CHECK_EQ(current_run->size_bracket_idx_, i);
1987     }
1988   }
1989   // Call Verify() here for the lock order.
1990   for (auto& run : runs) {
1991     run->Verify(self, this);
1992   }
1993 }
1994 
Verify(Thread * self,RosAlloc * rosalloc)1995 void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) {
1996   DCHECK_EQ(magic_num_, kMagicNum) << "Bad magic number : " << Dump();
1997   const size_t idx = size_bracket_idx_;
1998   CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump();
1999   byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
2000   const size_t num_slots = numOfSlots[idx];
2001   const size_t num_vec = RoundUp(num_slots, 32) / 32;
2002   CHECK_GT(num_vec, 0U);
2003   size_t bracket_size = IndexToBracketSize(idx);
2004   CHECK_EQ(slot_base + num_slots * bracket_size,
2005            reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize)
2006       << "Mismatch in the end address of the run " << Dump();
2007   // Check that the bulk free bitmap is clean. It's only used during BulkFree().
2008   CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump();
2009   uint32_t last_word_mask = GetBitmapLastVectorMask(num_slots, num_vec);
2010   // Make sure all the bits at the end of the run are set so that we don't allocate there.
2011   CHECK_EQ(alloc_bit_map_[num_vec - 1] & last_word_mask, last_word_mask);
2012   // Ensure that the first bitmap index is valid.
2013   CHECK_LT(first_search_vec_idx_, num_vec);
2014   // Check the thread local runs, the current runs, and the run sets.
2015   if (IsThreadLocal()) {
2016     // If it's a thread local run, then it must be pointed to by an owner thread.
2017     bool owner_found = false;
2018     std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
2019     for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
2020       Thread* thread = *it;
2021       for (size_t i = 0; i < kNumThreadLocalSizeBrackets; i++) {
2022         MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
2023         Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
2024         if (thread_local_run == this) {
2025           CHECK(!owner_found)
2026               << "A thread local run has more than one owner thread " << Dump();
2027           CHECK_EQ(i, idx)
2028               << "A mismatching size bracket index in a thread local run " << Dump();
2029           owner_found = true;
2030         }
2031       }
2032     }
2033     CHECK(owner_found) << "A thread local run has no owner thread " << Dump();
2034   } else {
2035     // If it's not thread local, check that the thread local free bitmap is clean.
2036     CHECK(IsThreadLocalFreeBitmapClean())
2037         << "A non-thread-local run's thread local free bitmap isn't clean "
2038         << Dump();
2039     // Check if it's a current run for the size bucket.
2040     bool is_current_run = false;
2041     for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
2042       MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
2043       Run* current_run = rosalloc->current_runs_[i];
2044       if (idx == i) {
2045         if (this == current_run) {
2046           is_current_run = true;
2047         }
2048       } else {
2049         // If the size bucket index does not match, then it must not
2050         // be a current run.
2051         CHECK_NE(this, current_run)
2052             << "A current run points to a run with a wrong size bracket index " << Dump();
2053       }
2054     }
2055     // If it's neither a thread local or current run, then it must be
2056     // in a run set.
2057     if (!is_current_run) {
2058       MutexLock mu(self, rosalloc->lock_);
2059       std::set<Run*>& non_full_runs = rosalloc->non_full_runs_[idx];
2060       // If it's all free, it must be a free page run rather than a run.
2061       CHECK(!IsAllFree()) << "A free run must be in a free page run set " << Dump();
2062       if (!IsFull()) {
2063         // If it's not full, it must in the non-full run set.
2064         CHECK(non_full_runs.find(this) != non_full_runs.end())
2065             << "A non-full run isn't in the non-full run set " << Dump();
2066       } else {
2067         // If it's full, it must in the full run set (debug build only.)
2068         if (kIsDebugBuild) {
2069           std::unordered_set<Run*, hash_run, eq_run>& full_runs = rosalloc->full_runs_[idx];
2070           CHECK(full_runs.find(this) != full_runs.end())
2071               << " A full run isn't in the full run set " << Dump();
2072         }
2073       }
2074     }
2075   }
2076   // Check each slot.
2077   size_t slots = 0;
2078   for (size_t v = 0; v < num_vec; v++, slots += 32) {
2079     DCHECK_GE(num_slots, slots) << "Out of bounds";
2080     uint32_t vec = alloc_bit_map_[v];
2081     uint32_t thread_local_free_vec = ThreadLocalFreeBitMap()[v];
2082     size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
2083     for (size_t i = 0; i < end; ++i) {
2084       bool is_allocated = ((vec >> i) & 0x1) != 0;
2085       // If a thread local run, slots may be marked freed in the
2086       // thread local free bitmap.
2087       bool is_thread_local_freed = IsThreadLocal() && ((thread_local_free_vec >> i) & 0x1) != 0;
2088       if (is_allocated && !is_thread_local_freed) {
2089         byte* slot_addr = slot_base + (slots + i) * bracket_size;
2090         mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
2091         size_t obj_size = obj->SizeOf();
2092         CHECK_LE(obj_size, kLargeSizeThreshold)
2093             << "A run slot contains a large object " << Dump();
2094         CHECK_EQ(SizeToIndex(obj_size), idx)
2095             << PrettyTypeOf(obj) << " "
2096             << "obj_size=" << obj_size << ", idx=" << idx << " "
2097             << "A run slot contains an object with wrong size " << Dump();
2098       }
2099     }
2100   }
2101 }
2102 
ReleasePages()2103 size_t RosAlloc::ReleasePages() {
2104   VLOG(heap) << "RosAlloc::ReleasePages()";
2105   DCHECK(!DoesReleaseAllPages());
2106   Thread* self = Thread::Current();
2107   size_t reclaimed_bytes = 0;
2108   size_t i = 0;
2109   // Check the page map size which might have changed due to grow/shrink.
2110   while (i < page_map_size_) {
2111     // Reading the page map without a lock is racy but the race is benign since it should only
2112     // result in occasionally not releasing pages which we could release.
2113     byte pm = page_map_[i];
2114     switch (pm) {
2115       case kPageMapReleased:
2116         // Fall through.
2117       case kPageMapEmpty: {
2118         // This is currently the start of a free page run.
2119         // Acquire the lock to prevent other threads racing in and modifying the page map.
2120         MutexLock mu(self, lock_);
2121         // Check that it's still empty after we acquired the lock since another thread could have
2122         // raced in and placed an allocation here.
2123         if (IsFreePage(i)) {
2124           // Free page runs can start with a released page if we coalesced a released page free
2125           // page run with an empty page run.
2126           FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
2127           // There is a race condition where FreePage can coalesce fpr with the previous
2128           // free page run before we acquire lock_. In that case free_page_runs_.find will not find
2129           // a run starting at fpr. To handle this race, we skip reclaiming the page range and go
2130           // to the next page.
2131           if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
2132             size_t fpr_size = fpr->ByteSize(this);
2133             DCHECK(IsAligned<kPageSize>(fpr_size));
2134             byte* start = reinterpret_cast<byte*>(fpr);
2135             reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
2136             size_t pages = fpr_size / kPageSize;
2137             CHECK_GT(pages, 0U) << "Infinite loop probable";
2138             i += pages;
2139             DCHECK_LE(i, page_map_size_);
2140             break;
2141           }
2142         }
2143         // Fall through.
2144       }
2145       case kPageMapLargeObject:      // Fall through.
2146       case kPageMapLargeObjectPart:  // Fall through.
2147       case kPageMapRun:              // Fall through.
2148       case kPageMapRunPart:          // Fall through.
2149         ++i;
2150         break;  // Skip.
2151       default:
2152         LOG(FATAL) << "Unreachable - page map type: " << pm;
2153         break;
2154     }
2155   }
2156   return reclaimed_bytes;
2157 }
2158 
ReleasePageRange(byte * start,byte * end)2159 size_t RosAlloc::ReleasePageRange(byte* start, byte* end) {
2160   DCHECK_ALIGNED(start, kPageSize);
2161   DCHECK_ALIGNED(end, kPageSize);
2162   DCHECK_LT(start, end);
2163   if (kIsDebugBuild) {
2164     // In the debug build, the first page of a free page run
2165     // contains a magic number for debugging. Exclude it.
2166     start += kPageSize;
2167   }
2168   if (!kMadviseZeroes) {
2169     // TODO: Do this when we resurrect the page instead.
2170     memset(start, 0, end - start);
2171   }
2172   CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
2173   size_t pm_idx = ToPageMapIndex(start);
2174   size_t reclaimed_bytes = 0;
2175   // Calculate reclaimed bytes and upate page map.
2176   const size_t max_idx = pm_idx + (end - start) / kPageSize;
2177   for (; pm_idx < max_idx; ++pm_idx) {
2178     DCHECK(IsFreePage(pm_idx));
2179     if (page_map_[pm_idx] == kPageMapEmpty) {
2180       // Mark the page as released and update how many bytes we released.
2181       reclaimed_bytes += kPageSize;
2182       page_map_[pm_idx] = kPageMapReleased;
2183     }
2184   }
2185   return reclaimed_bytes;
2186 }
2187 
LogFragmentationAllocFailure(std::ostream & os,size_t failed_alloc_bytes)2188 void RosAlloc::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) {
2189   Thread* self = Thread::Current();
2190   size_t largest_continuous_free_pages = 0;
2191   WriterMutexLock wmu(self, bulk_free_lock_);
2192   MutexLock mu(self, lock_);
2193   for (FreePageRun* fpr : free_page_runs_) {
2194     largest_continuous_free_pages = std::max(largest_continuous_free_pages,
2195                                              fpr->ByteSize(this));
2196   }
2197   if (failed_alloc_bytes > kLargeSizeThreshold) {
2198     // Large allocation.
2199     size_t required_bytes = RoundUp(failed_alloc_bytes, kPageSize);
2200     if (required_bytes > largest_continuous_free_pages) {
2201       os << "; failed due to fragmentation (required continguous free "
2202          << required_bytes << " bytes where largest contiguous free "
2203          <<  largest_continuous_free_pages << " bytes)";
2204     }
2205   } else {
2206     // Non-large allocation.
2207     size_t required_bytes = numOfPages[SizeToIndex(failed_alloc_bytes)] * kPageSize;
2208     if (required_bytes > largest_continuous_free_pages) {
2209       os << "; failed due to fragmentation (required continguous free "
2210          << required_bytes << " bytes for a new buffer where largest contiguous free "
2211          <<  largest_continuous_free_pages << " bytes)";
2212     }
2213   }
2214 }
2215 
2216 }  // namespace allocator
2217 }  // namespace gc
2218 }  // namespace art
2219