1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "rosalloc.h"
18 
19 #include "base/mutex-inl.h"
20 #include "gc/space/valgrind_settings.h"
21 #include "mem_map.h"
22 #include "mirror/class-inl.h"
23 #include "mirror/object.h"
24 #include "mirror/object-inl.h"
25 #include "thread-inl.h"
26 #include "thread_list.h"
27 
28 #include <map>
29 #include <list>
30 #include <sstream>
31 #include <vector>
32 
33 namespace art {
34 namespace gc {
35 namespace allocator {
36 
37 static constexpr bool kUsePrefetchDuringAllocRun = true;
38 static constexpr bool kPrefetchNewRunDataByZeroing = false;
39 static constexpr size_t kPrefetchStride = 64;
40 
41 size_t RosAlloc::bracketSizes[kNumOfSizeBrackets];
42 size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
43 size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
44 size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
45 size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
46 size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
47 bool RosAlloc::initialized_ = false;
48 size_t RosAlloc::dedicated_full_run_storage_[kPageSize / sizeof(size_t)] = { 0 };
49 RosAlloc::Run* RosAlloc::dedicated_full_run_ =
50     reinterpret_cast<RosAlloc::Run*>(dedicated_full_run_storage_);
51 
RosAlloc(void * base,size_t capacity,size_t max_capacity,PageReleaseMode page_release_mode,bool running_on_valgrind,size_t page_release_size_threshold)52 RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
53                    PageReleaseMode page_release_mode, bool running_on_valgrind,
54                    size_t page_release_size_threshold)
55     : base_(reinterpret_cast<uint8_t*>(base)), footprint_(capacity),
56       capacity_(capacity), max_capacity_(max_capacity),
57       lock_("rosalloc global lock", kRosAllocGlobalLock),
58       bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock),
59       page_release_mode_(page_release_mode),
60       page_release_size_threshold_(page_release_size_threshold),
61       running_on_valgrind_(running_on_valgrind) {
62   DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
63   DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
64   CHECK_LE(capacity, max_capacity);
65   CHECK(IsAligned<kPageSize>(page_release_size_threshold_));
66   if (!initialized_) {
67     Initialize();
68   }
69   VLOG(heap) << "RosAlloc base="
70              << std::hex << (intptr_t)base_ << ", end="
71              << std::hex << (intptr_t)(base_ + capacity_)
72              << ", capacity=" << std::dec << capacity_
73              << ", max_capacity=" << std::dec << max_capacity_;
74   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
75     size_bracket_lock_names_[i] =
76         StringPrintf("an rosalloc size bracket %d lock", static_cast<int>(i));
77     size_bracket_locks_[i] = new Mutex(size_bracket_lock_names_[i].c_str(), kRosAllocBracketLock);
78     current_runs_[i] = dedicated_full_run_;
79   }
80   DCHECK_EQ(footprint_, capacity_);
81   size_t num_of_pages = footprint_ / kPageSize;
82   size_t max_num_of_pages = max_capacity_ / kPageSize;
83   std::string error_msg;
84   page_map_mem_map_.reset(MemMap::MapAnonymous("rosalloc page map", nullptr,
85                                                RoundUp(max_num_of_pages, kPageSize),
86                                                PROT_READ | PROT_WRITE, false, false, &error_msg));
87   CHECK(page_map_mem_map_.get() != nullptr) << "Couldn't allocate the page map : " << error_msg;
88   page_map_ = page_map_mem_map_->Begin();
89   page_map_size_ = num_of_pages;
90   max_page_map_size_ = max_num_of_pages;
91   free_page_run_size_map_.resize(num_of_pages);
92   FreePageRun* free_pages = reinterpret_cast<FreePageRun*>(base_);
93   if (kIsDebugBuild) {
94     free_pages->magic_num_ = kMagicNumFree;
95   }
96   free_pages->SetByteSize(this, capacity_);
97   DCHECK_EQ(capacity_ % kPageSize, static_cast<size_t>(0));
98   DCHECK(free_pages->IsFree());
99   free_pages->ReleasePages(this);
100   DCHECK(free_pages->IsFree());
101   free_page_runs_.insert(free_pages);
102   if (kTraceRosAlloc) {
103     LOG(INFO) << "RosAlloc::RosAlloc() : Inserted run 0x" << std::hex
104               << reinterpret_cast<intptr_t>(free_pages)
105               << " into free_page_runs_";
106   }
107 }
108 
~RosAlloc()109 RosAlloc::~RosAlloc() {
110   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
111     delete size_bracket_locks_[i];
112   }
113 }
114 
AllocPages(Thread * self,size_t num_pages,uint8_t page_map_type)115 void* RosAlloc::AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type) {
116   lock_.AssertHeld(self);
117   DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject);
118   FreePageRun* res = nullptr;
119   const size_t req_byte_size = num_pages * kPageSize;
120   // Find the lowest address free page run that's large enough.
121   for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) {
122     FreePageRun* fpr = *it;
123     DCHECK(fpr->IsFree());
124     size_t fpr_byte_size = fpr->ByteSize(this);
125     DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
126     if (req_byte_size <= fpr_byte_size) {
127       // Found one.
128       free_page_runs_.erase(it++);
129       if (kTraceRosAlloc) {
130         LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x"
131                   << std::hex << reinterpret_cast<intptr_t>(fpr)
132                   << " from free_page_runs_";
133       }
134       if (req_byte_size < fpr_byte_size) {
135         // Split.
136         FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<uint8_t*>(fpr) + req_byte_size);
137         if (kIsDebugBuild) {
138           remainder->magic_num_ = kMagicNumFree;
139         }
140         remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
141         DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
142         // Don't need to call madvise on remainder here.
143         free_page_runs_.insert(remainder);
144         if (kTraceRosAlloc) {
145           LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
146                     << reinterpret_cast<intptr_t>(remainder)
147                     << " into free_page_runs_";
148         }
149         fpr->SetByteSize(this, req_byte_size);
150         DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
151       }
152       res = fpr;
153       break;
154     } else {
155       ++it;
156     }
157   }
158 
159   // Failed to allocate pages. Grow the footprint, if possible.
160   if (UNLIKELY(res == nullptr && capacity_ > footprint_)) {
161     FreePageRun* last_free_page_run = nullptr;
162     size_t last_free_page_run_size;
163     auto it = free_page_runs_.rbegin();
164     if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
165       // There is a free page run at the end.
166       DCHECK(last_free_page_run->IsFree());
167       DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
168       last_free_page_run_size = last_free_page_run->ByteSize(this);
169     } else {
170       // There is no free page run at the end.
171       last_free_page_run_size = 0;
172     }
173     DCHECK_LT(last_free_page_run_size, req_byte_size);
174     if (capacity_ - footprint_ + last_free_page_run_size >= req_byte_size) {
175       // If we grow the heap, we can allocate it.
176       size_t increment = std::min(std::max(2 * MB, req_byte_size - last_free_page_run_size),
177                                   capacity_ - footprint_);
178       DCHECK_EQ(increment % kPageSize, static_cast<size_t>(0));
179       size_t new_footprint = footprint_ + increment;
180       size_t new_num_of_pages = new_footprint / kPageSize;
181       DCHECK_LT(page_map_size_, new_num_of_pages);
182       DCHECK_LT(free_page_run_size_map_.size(), new_num_of_pages);
183       page_map_size_ = new_num_of_pages;
184       DCHECK_LE(page_map_size_, max_page_map_size_);
185       free_page_run_size_map_.resize(new_num_of_pages);
186       ArtRosAllocMoreCore(this, increment);
187       if (last_free_page_run_size > 0) {
188         // There was a free page run at the end. Expand its size.
189         DCHECK_EQ(last_free_page_run_size, last_free_page_run->ByteSize(this));
190         last_free_page_run->SetByteSize(this, last_free_page_run_size + increment);
191         DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
192         DCHECK_EQ(last_free_page_run->End(this), base_ + new_footprint);
193       } else {
194         // Otherwise, insert a new free page run at the end.
195         FreePageRun* new_free_page_run = reinterpret_cast<FreePageRun*>(base_ + footprint_);
196         if (kIsDebugBuild) {
197           new_free_page_run->magic_num_ = kMagicNumFree;
198         }
199         new_free_page_run->SetByteSize(this, increment);
200         DCHECK_EQ(new_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
201         free_page_runs_.insert(new_free_page_run);
202         DCHECK_EQ(*free_page_runs_.rbegin(), new_free_page_run);
203         if (kTraceRosAlloc) {
204           LOG(INFO) << "RosAlloc::AlloPages() : Grew the heap by inserting run 0x"
205                     << std::hex << reinterpret_cast<intptr_t>(new_free_page_run)
206                     << " into free_page_runs_";
207         }
208       }
209       DCHECK_LE(footprint_ + increment, capacity_);
210       if (kTraceRosAlloc) {
211         LOG(INFO) << "RosAlloc::AllocPages() : increased the footprint from "
212                   << footprint_ << " to " << new_footprint;
213       }
214       footprint_ = new_footprint;
215 
216       // And retry the last free page run.
217       it = free_page_runs_.rbegin();
218       DCHECK(it != free_page_runs_.rend());
219       FreePageRun* fpr = *it;
220       if (kIsDebugBuild && last_free_page_run_size > 0) {
221         DCHECK(last_free_page_run != nullptr);
222         DCHECK_EQ(last_free_page_run, fpr);
223       }
224       size_t fpr_byte_size = fpr->ByteSize(this);
225       DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
226       DCHECK_LE(req_byte_size, fpr_byte_size);
227       free_page_runs_.erase(fpr);
228       if (kTraceRosAlloc) {
229         LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
230                   << " from free_page_runs_";
231       }
232       if (req_byte_size < fpr_byte_size) {
233         // Split if there's a remainder.
234         FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<uint8_t*>(fpr) + req_byte_size);
235         if (kIsDebugBuild) {
236           remainder->magic_num_ = kMagicNumFree;
237         }
238         remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
239         DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
240         free_page_runs_.insert(remainder);
241         if (kTraceRosAlloc) {
242           LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
243                     << reinterpret_cast<intptr_t>(remainder)
244                     << " into free_page_runs_";
245         }
246         fpr->SetByteSize(this, req_byte_size);
247         DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
248       }
249       res = fpr;
250     }
251   }
252   if (LIKELY(res != nullptr)) {
253     // Update the page map.
254     size_t page_map_idx = ToPageMapIndex(res);
255     for (size_t i = 0; i < num_pages; i++) {
256       DCHECK(IsFreePage(page_map_idx + i));
257     }
258     switch (page_map_type) {
259     case kPageMapRun:
260       page_map_[page_map_idx] = kPageMapRun;
261       for (size_t i = 1; i < num_pages; i++) {
262         page_map_[page_map_idx + i] = kPageMapRunPart;
263       }
264       break;
265     case kPageMapLargeObject:
266       page_map_[page_map_idx] = kPageMapLargeObject;
267       for (size_t i = 1; i < num_pages; i++) {
268         page_map_[page_map_idx + i] = kPageMapLargeObjectPart;
269       }
270       break;
271     default:
272       LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_type);
273       break;
274     }
275     if (kIsDebugBuild) {
276       // Clear the first page since it is not madvised due to the magic number.
277       memset(res, 0, kPageSize);
278     }
279     if (kTraceRosAlloc) {
280       LOG(INFO) << "RosAlloc::AllocPages() : 0x" << std::hex << reinterpret_cast<intptr_t>(res)
281                 << "-0x" << (reinterpret_cast<intptr_t>(res) + num_pages * kPageSize)
282                 << "(" << std::dec << (num_pages * kPageSize) << ")";
283     }
284     return res;
285   }
286 
287   // Fail.
288   if (kTraceRosAlloc) {
289     LOG(INFO) << "RosAlloc::AllocPages() : nullptr";
290   }
291   return nullptr;
292 }
293 
FreePages(Thread * self,void * ptr,bool already_zero)294 size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
295   lock_.AssertHeld(self);
296   size_t pm_idx = ToPageMapIndex(ptr);
297   DCHECK_LT(pm_idx, page_map_size_);
298   uint8_t pm_type = page_map_[pm_idx];
299   DCHECK(pm_type == kPageMapRun || pm_type == kPageMapLargeObject);
300   uint8_t pm_part_type;
301   switch (pm_type) {
302   case kPageMapRun:
303     pm_part_type = kPageMapRunPart;
304     break;
305   case kPageMapLargeObject:
306     pm_part_type = kPageMapLargeObjectPart;
307     break;
308   default:
309     LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << " : " << "pm_idx=" << pm_idx << ", pm_type="
310                << static_cast<int>(pm_type) << ", ptr=" << std::hex
311                << reinterpret_cast<intptr_t>(ptr);
312     return 0;
313   }
314   // Update the page map and count the number of pages.
315   size_t num_pages = 1;
316   page_map_[pm_idx] = kPageMapEmpty;
317   size_t idx = pm_idx + 1;
318   size_t end = page_map_size_;
319   while (idx < end && page_map_[idx] == pm_part_type) {
320     page_map_[idx] = kPageMapEmpty;
321     num_pages++;
322     idx++;
323   }
324   const size_t byte_size = num_pages * kPageSize;
325   if (already_zero) {
326     if (ShouldCheckZeroMemory()) {
327       const uintptr_t* word_ptr = reinterpret_cast<uintptr_t*>(ptr);
328       for (size_t i = 0; i < byte_size / sizeof(uintptr_t); ++i) {
329         CHECK_EQ(word_ptr[i], 0U) << "words don't match at index " << i;
330       }
331     }
332   } else if (!DoesReleaseAllPages()) {
333     memset(ptr, 0, byte_size);
334   }
335 
336   if (kTraceRosAlloc) {
337     LOG(INFO) << __PRETTY_FUNCTION__ << " : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
338               << "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size)
339               << "(" << std::dec << (num_pages * kPageSize) << ")";
340   }
341 
342   // Turn it into a free run.
343   FreePageRun* fpr = reinterpret_cast<FreePageRun*>(ptr);
344   if (kIsDebugBuild) {
345     fpr->magic_num_ = kMagicNumFree;
346   }
347   fpr->SetByteSize(this, byte_size);
348   DCHECK(IsAligned<kPageSize>(fpr->ByteSize(this)));
349 
350   DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
351   if (!free_page_runs_.empty()) {
352     // Try to coalesce in the higher address direction.
353     if (kTraceRosAlloc) {
354       LOG(INFO) << __PRETTY_FUNCTION__ << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
355                 << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
356                 << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
357                 << (fpr->End(this) == End() ? page_map_size_ : ToPageMapIndex(fpr->End(this))) << "]";
358     }
359     auto higher_it = free_page_runs_.upper_bound(fpr);
360     if (higher_it != free_page_runs_.end()) {
361       for (auto it = higher_it; it != free_page_runs_.end(); ) {
362         FreePageRun* h = *it;
363         DCHECK_EQ(h->ByteSize(this) % kPageSize, static_cast<size_t>(0));
364         if (kTraceRosAlloc) {
365           LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a higher free page run 0x"
366                     << std::hex << reinterpret_cast<uintptr_t>(h) << " [" << std::dec << ToPageMapIndex(h) << "] -0x"
367                     << std::hex << reinterpret_cast<uintptr_t>(h->End(this)) << " [" << std::dec
368                     << (h->End(this) == End() ? page_map_size_ : ToPageMapIndex(h->End(this))) << "]";
369         }
370         if (fpr->End(this) == h->Begin()) {
371           if (kTraceRosAlloc) {
372             LOG(INFO) << "Success";
373           }
374           // Clear magic num since this is no longer the start of a free page run.
375           if (kIsDebugBuild) {
376             h->magic_num_ = 0;
377           }
378           free_page_runs_.erase(it++);
379           if (kTraceRosAlloc) {
380             LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
381                       << reinterpret_cast<intptr_t>(h)
382                       << " from free_page_runs_";
383           }
384           fpr->SetByteSize(this, fpr->ByteSize(this) + h->ByteSize(this));
385           DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
386         } else {
387           // Not adjacent. Stop.
388           if (kTraceRosAlloc) {
389             LOG(INFO) << "Fail";
390           }
391           break;
392         }
393       }
394     }
395     // Try to coalesce in the lower address direction.
396     auto lower_it = free_page_runs_.upper_bound(fpr);
397     if (lower_it != free_page_runs_.begin()) {
398       --lower_it;
399       for (auto it = lower_it; ; ) {
400         // We want to try to coalesce with the first element but
401         // there's no "<=" operator for the iterator.
402         bool to_exit_loop = it == free_page_runs_.begin();
403 
404         FreePageRun* l = *it;
405         DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
406         if (kTraceRosAlloc) {
407           LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a lower free page run 0x"
408                     << std::hex << reinterpret_cast<uintptr_t>(l) << " [" << std::dec << ToPageMapIndex(l) << "] -0x"
409                     << std::hex << reinterpret_cast<uintptr_t>(l->End(this)) << " [" << std::dec
410                     << (l->End(this) == End() ? page_map_size_ : ToPageMapIndex(l->End(this))) << "]";
411         }
412         if (l->End(this) == fpr->Begin()) {
413           if (kTraceRosAlloc) {
414             LOG(INFO) << "Success";
415           }
416           free_page_runs_.erase(it--);
417           if (kTraceRosAlloc) {
418             LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
419                       << reinterpret_cast<intptr_t>(l)
420                       << " from free_page_runs_";
421           }
422           l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
423           DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
424           // Clear magic num since this is no longer the start of a free page run.
425           if (kIsDebugBuild) {
426             fpr->magic_num_ = 0;
427           }
428           fpr = l;
429         } else {
430           // Not adjacent. Stop.
431           if (kTraceRosAlloc) {
432             LOG(INFO) << "Fail";
433           }
434           break;
435         }
436         if (to_exit_loop) {
437           break;
438         }
439       }
440     }
441   }
442 
443   // Insert it.
444   DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
445   DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
446   DCHECK(fpr->IsFree());
447   fpr->ReleasePages(this);
448   DCHECK(fpr->IsFree());
449   free_page_runs_.insert(fpr);
450   DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
451   if (kTraceRosAlloc) {
452     LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
453               << " into free_page_runs_";
454   }
455   return byte_size;
456 }
457 
AllocLargeObject(Thread * self,size_t size,size_t * bytes_allocated,size_t * usable_size,size_t * bytes_tl_bulk_allocated)458 void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated,
459                                  size_t* usable_size, size_t* bytes_tl_bulk_allocated) {
460   DCHECK(bytes_allocated != nullptr);
461   DCHECK(usable_size != nullptr);
462   DCHECK_GT(size, kLargeSizeThreshold);
463   size_t num_pages = RoundUp(size, kPageSize) / kPageSize;
464   void* r;
465   {
466     MutexLock mu(self, lock_);
467     r = AllocPages(self, num_pages, kPageMapLargeObject);
468   }
469   if (UNLIKELY(r == nullptr)) {
470     if (kTraceRosAlloc) {
471       LOG(INFO) << "RosAlloc::AllocLargeObject() : nullptr";
472     }
473     return nullptr;
474   }
475   const size_t total_bytes = num_pages * kPageSize;
476   *bytes_allocated = total_bytes;
477   *usable_size = total_bytes;
478   *bytes_tl_bulk_allocated = total_bytes;
479   if (kTraceRosAlloc) {
480     LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
481               << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
482               << "(" << std::dec << (num_pages * kPageSize) << ")";
483   }
484   // Check if the returned memory is really all zero.
485   if (ShouldCheckZeroMemory()) {
486     CHECK_EQ(total_bytes % sizeof(uintptr_t), 0U);
487     const uintptr_t* words = reinterpret_cast<uintptr_t*>(r);
488     for (size_t i = 0; i < total_bytes / sizeof(uintptr_t); ++i) {
489       CHECK_EQ(words[i], 0U);
490     }
491   }
492   return r;
493 }
494 
FreeInternal(Thread * self,void * ptr)495 size_t RosAlloc::FreeInternal(Thread* self, void* ptr) {
496   DCHECK_LE(base_, ptr);
497   DCHECK_LT(ptr, base_ + footprint_);
498   size_t pm_idx = RoundDownToPageMapIndex(ptr);
499   Run* run = nullptr;
500   {
501     MutexLock mu(self, lock_);
502     DCHECK_LT(pm_idx, page_map_size_);
503     uint8_t page_map_entry = page_map_[pm_idx];
504     if (kTraceRosAlloc) {
505       LOG(INFO) << "RosAlloc::FreeInternal() : " << std::hex << ptr << ", pm_idx=" << std::dec << pm_idx
506                 << ", page_map_entry=" << static_cast<int>(page_map_entry);
507     }
508     switch (page_map_[pm_idx]) {
509       case kPageMapLargeObject:
510         return FreePages(self, ptr, false);
511       case kPageMapLargeObjectPart:
512         LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_[pm_idx]);
513         return 0;
514       case kPageMapRunPart: {
515         // Find the beginning of the run.
516         do {
517           --pm_idx;
518           DCHECK_LT(pm_idx, capacity_ / kPageSize);
519         } while (page_map_[pm_idx] != kPageMapRun);
520         FALLTHROUGH_INTENDED;
521       case kPageMapRun:
522         run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
523         DCHECK_EQ(run->magic_num_, kMagicNum);
524         break;
525       case kPageMapReleased:
526       case kPageMapEmpty:
527         LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_[pm_idx]);
528         return 0;
529       }
530       default:
531         LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_[pm_idx]);
532         return 0;
533     }
534   }
535   DCHECK(run != nullptr);
536   return FreeFromRun(self, ptr, run);
537 }
538 
Free(Thread * self,void * ptr)539 size_t RosAlloc::Free(Thread* self, void* ptr) {
540   ReaderMutexLock rmu(self, bulk_free_lock_);
541   return FreeInternal(self, ptr);
542 }
543 
AllocRun(Thread * self,size_t idx)544 RosAlloc::Run* RosAlloc::AllocRun(Thread* self, size_t idx) {
545   RosAlloc::Run* new_run = nullptr;
546   {
547     MutexLock mu(self, lock_);
548     new_run = reinterpret_cast<Run*>(AllocPages(self, numOfPages[idx], kPageMapRun));
549   }
550   if (LIKELY(new_run != nullptr)) {
551     if (kIsDebugBuild) {
552       new_run->magic_num_ = kMagicNum;
553     }
554     new_run->size_bracket_idx_ = idx;
555     new_run->SetAllocBitMapBitsForInvalidSlots();
556     DCHECK(!new_run->IsThreadLocal());
557     DCHECK_EQ(new_run->first_search_vec_idx_, 0U);
558     DCHECK(!new_run->to_be_bulk_freed_);
559     if (kUsePrefetchDuringAllocRun && idx < kNumThreadLocalSizeBrackets) {
560       // Take ownership of the cache lines if we are likely to be thread local run.
561       if (kPrefetchNewRunDataByZeroing) {
562         // Zeroing the data is sometimes faster than prefetching but it increases memory usage
563         // since we end up dirtying zero pages which may have been madvised.
564         new_run->ZeroData();
565       } else {
566         const size_t num_of_slots = numOfSlots[idx];
567         const size_t bracket_size = bracketSizes[idx];
568         const size_t num_of_bytes = num_of_slots * bracket_size;
569         uint8_t* begin = reinterpret_cast<uint8_t*>(new_run) + headerSizes[idx];
570         for (size_t i = 0; i < num_of_bytes; i += kPrefetchStride) {
571           __builtin_prefetch(begin + i);
572         }
573       }
574     }
575   }
576   return new_run;
577 }
578 
RefillRun(Thread * self,size_t idx)579 RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
580   // Get the lowest address non-full run from the binary tree.
581   auto* const bt = &non_full_runs_[idx];
582   if (!bt->empty()) {
583     // If there's one, use it as the current run.
584     auto it = bt->begin();
585     Run* non_full_run = *it;
586     DCHECK(non_full_run != nullptr);
587     DCHECK(!non_full_run->IsThreadLocal());
588     bt->erase(it);
589     return non_full_run;
590   }
591   // If there's none, allocate a new run and use it as the current run.
592   return AllocRun(self, idx);
593 }
594 
AllocFromCurrentRunUnlocked(Thread * self,size_t idx)595 inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
596   Run* current_run = current_runs_[idx];
597   DCHECK(current_run != nullptr);
598   void* slot_addr = current_run->AllocSlot();
599   if (UNLIKELY(slot_addr == nullptr)) {
600     // The current run got full. Try to refill it.
601     DCHECK(current_run->IsFull());
602     if (kIsDebugBuild && current_run != dedicated_full_run_) {
603       full_runs_[idx].insert(current_run);
604       if (kTraceRosAlloc) {
605         LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
606                   << reinterpret_cast<intptr_t>(current_run)
607                   << " into full_runs_[" << std::dec << idx << "]";
608       }
609       DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
610       DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
611     }
612     current_run = RefillRun(self, idx);
613     if (UNLIKELY(current_run == nullptr)) {
614       // Failed to allocate a new run, make sure that it is the dedicated full run.
615       current_runs_[idx] = dedicated_full_run_;
616       return nullptr;
617     }
618     DCHECK(current_run != nullptr);
619     DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
620     DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
621     current_run->SetIsThreadLocal(false);
622     current_runs_[idx] = current_run;
623     DCHECK(!current_run->IsFull());
624     slot_addr = current_run->AllocSlot();
625     // Must succeed now with a new run.
626     DCHECK(slot_addr != nullptr);
627   }
628   return slot_addr;
629 }
630 
AllocFromRunThreadUnsafe(Thread * self,size_t size,size_t * bytes_allocated,size_t * usable_size,size_t * bytes_tl_bulk_allocated)631 void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated,
632                                          size_t* usable_size,
633                                          size_t* bytes_tl_bulk_allocated) {
634   DCHECK(bytes_allocated != nullptr);
635   DCHECK(usable_size != nullptr);
636   DCHECK(bytes_tl_bulk_allocated != nullptr);
637   DCHECK_LE(size, kLargeSizeThreshold);
638   size_t bracket_size;
639   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
640   DCHECK_EQ(idx, SizeToIndex(size));
641   DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
642   DCHECK_EQ(bracket_size, bracketSizes[idx]);
643   DCHECK_LE(size, bracket_size);
644   DCHECK(size > 512 || bracket_size - size < 16);
645   Locks::mutator_lock_->AssertExclusiveHeld(self);
646   void* slot_addr = AllocFromCurrentRunUnlocked(self, idx);
647   if (LIKELY(slot_addr != nullptr)) {
648     *bytes_allocated = bracket_size;
649     *usable_size = bracket_size;
650     *bytes_tl_bulk_allocated = bracket_size;
651   }
652   // Caller verifies that it is all 0.
653   return slot_addr;
654 }
655 
AllocFromRun(Thread * self,size_t size,size_t * bytes_allocated,size_t * usable_size,size_t * bytes_tl_bulk_allocated)656 void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated,
657                              size_t* usable_size, size_t* bytes_tl_bulk_allocated) {
658   DCHECK(bytes_allocated != nullptr);
659   DCHECK(usable_size != nullptr);
660   DCHECK(bytes_tl_bulk_allocated != nullptr);
661   DCHECK_LE(size, kLargeSizeThreshold);
662   size_t bracket_size;
663   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
664   DCHECK_EQ(idx, SizeToIndex(size));
665   DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
666   DCHECK_EQ(bracket_size, bracketSizes[idx]);
667   DCHECK_LE(size, bracket_size);
668   DCHECK(size > 512 || bracket_size - size < 16);
669 
670   void* slot_addr;
671 
672   if (LIKELY(idx < kNumThreadLocalSizeBrackets)) {
673     // Use a thread-local run.
674     Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
675     // Allow invalid since this will always fail the allocation.
676     if (kIsDebugBuild) {
677       // Need the lock to prevent race conditions.
678       MutexLock mu(self, *size_bracket_locks_[idx]);
679       CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
680       CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
681     }
682     DCHECK(thread_local_run != nullptr);
683     DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_);
684     slot_addr = thread_local_run->AllocSlot();
685     // The allocation must fail if the run is invalid.
686     DCHECK(thread_local_run != dedicated_full_run_ || slot_addr == nullptr)
687         << "allocated from an invalid run";
688     if (UNLIKELY(slot_addr == nullptr)) {
689       // The run got full. Try to free slots.
690       DCHECK(thread_local_run->IsFull());
691       MutexLock mu(self, *size_bracket_locks_[idx]);
692       bool is_all_free_after_merge;
693       // This is safe to do for the dedicated_full_run_ since the bitmaps are empty.
694       if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
695         DCHECK_NE(thread_local_run, dedicated_full_run_);
696         // Some slot got freed. Keep it.
697         DCHECK(!thread_local_run->IsFull());
698         DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
699         if (is_all_free_after_merge) {
700           // Check that the bitmap idx is back at 0 if it's all free.
701           DCHECK_EQ(thread_local_run->first_search_vec_idx_, 0U);
702         }
703       } else {
704         // No slots got freed. Try to refill the thread-local run.
705         DCHECK(thread_local_run->IsFull());
706         if (thread_local_run != dedicated_full_run_) {
707           thread_local_run->SetIsThreadLocal(false);
708           if (kIsDebugBuild) {
709             full_runs_[idx].insert(thread_local_run);
710             if (kTraceRosAlloc) {
711               LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
712                         << reinterpret_cast<intptr_t>(thread_local_run)
713                         << " into full_runs_[" << std::dec << idx << "]";
714             }
715           }
716           DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
717           DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
718         }
719 
720         thread_local_run = RefillRun(self, idx);
721         if (UNLIKELY(thread_local_run == nullptr)) {
722           self->SetRosAllocRun(idx, dedicated_full_run_);
723           return nullptr;
724         }
725         DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
726         DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
727         thread_local_run->SetIsThreadLocal(true);
728         self->SetRosAllocRun(idx, thread_local_run);
729         DCHECK(!thread_local_run->IsFull());
730       }
731       DCHECK(thread_local_run != nullptr);
732       DCHECK(!thread_local_run->IsFull());
733       DCHECK(thread_local_run->IsThreadLocal());
734       // Account for all the free slots in the new or refreshed thread local run.
735       *bytes_tl_bulk_allocated = thread_local_run->NumberOfFreeSlots() * bracket_size;
736       slot_addr = thread_local_run->AllocSlot();
737       // Must succeed now with a new run.
738       DCHECK(slot_addr != nullptr);
739     } else {
740       // The slot is already counted. Leave it as is.
741       *bytes_tl_bulk_allocated = 0;
742     }
743     DCHECK(slot_addr != nullptr);
744     if (kTraceRosAlloc) {
745       LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex
746                 << reinterpret_cast<intptr_t>(slot_addr)
747                 << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
748                 << "(" << std::dec << (bracket_size) << ")";
749     }
750     *bytes_allocated = bracket_size;
751     *usable_size = bracket_size;
752   } else {
753     // Use the (shared) current run.
754     MutexLock mu(self, *size_bracket_locks_[idx]);
755     slot_addr = AllocFromCurrentRunUnlocked(self, idx);
756     if (kTraceRosAlloc) {
757       LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex
758                 << reinterpret_cast<intptr_t>(slot_addr)
759                 << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
760                 << "(" << std::dec << (bracket_size) << ")";
761     }
762     if (LIKELY(slot_addr != nullptr)) {
763       *bytes_allocated = bracket_size;
764       *usable_size = bracket_size;
765       *bytes_tl_bulk_allocated = bracket_size;
766     }
767   }
768   // Caller verifies that it is all 0.
769   return slot_addr;
770 }
771 
FreeFromRun(Thread * self,void * ptr,Run * run)772 size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
773   DCHECK_EQ(run->magic_num_, kMagicNum);
774   DCHECK_LT(run, ptr);
775   DCHECK_LT(ptr, run->End());
776   const size_t idx = run->size_bracket_idx_;
777   const size_t bracket_size = bracketSizes[idx];
778   bool run_was_full = false;
779   MutexLock brackets_mu(self, *size_bracket_locks_[idx]);
780   if (kIsDebugBuild) {
781     run_was_full = run->IsFull();
782   }
783   if (kTraceRosAlloc) {
784     LOG(INFO) << "RosAlloc::FreeFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr);
785   }
786   if (LIKELY(run->IsThreadLocal())) {
787     // It's a thread-local run. Just mark the thread-local free bit map and return.
788     DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
789     DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
790     DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
791     run->MarkThreadLocalFreeBitMap(ptr);
792     if (kTraceRosAlloc) {
793       LOG(INFO) << "RosAlloc::FreeFromRun() : Freed a slot in a thread local run 0x" << std::hex
794                 << reinterpret_cast<intptr_t>(run);
795     }
796     // A thread local run will be kept as a thread local even if it's become all free.
797     return bracket_size;
798   }
799   // Free the slot in the run.
800   run->FreeSlot(ptr);
801   auto* non_full_runs = &non_full_runs_[idx];
802   if (run->IsAllFree()) {
803     // It has just become completely free. Free the pages of this run.
804     std::set<Run*>::iterator pos = non_full_runs->find(run);
805     if (pos != non_full_runs->end()) {
806       non_full_runs->erase(pos);
807       if (kTraceRosAlloc) {
808         LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
809                   << reinterpret_cast<intptr_t>(run) << " from non_full_runs_";
810       }
811     }
812     if (run == current_runs_[idx]) {
813       current_runs_[idx] = dedicated_full_run_;
814     }
815     DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
816     DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
817     run->ZeroHeader();
818     {
819       MutexLock lock_mu(self, lock_);
820       FreePages(self, run, true);
821     }
822   } else {
823     // It is not completely free. If it wasn't the current run or
824     // already in the non-full run set (i.e., it was full) insert it
825     // into the non-full run set.
826     if (run != current_runs_[idx]) {
827       auto* full_runs = kIsDebugBuild ? &full_runs_[idx] : nullptr;
828       auto pos = non_full_runs->find(run);
829       if (pos == non_full_runs->end()) {
830         DCHECK(run_was_full);
831         DCHECK(full_runs->find(run) != full_runs->end());
832         if (kIsDebugBuild) {
833           full_runs->erase(run);
834           if (kTraceRosAlloc) {
835             LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
836                       << reinterpret_cast<intptr_t>(run) << " from full_runs_";
837           }
838         }
839         non_full_runs->insert(run);
840         DCHECK(!run->IsFull());
841         if (kTraceRosAlloc) {
842           LOG(INFO) << "RosAlloc::FreeFromRun() : Inserted run 0x" << std::hex
843                     << reinterpret_cast<intptr_t>(run)
844                     << " into non_full_runs_[" << std::dec << idx << "]";
845         }
846       }
847     }
848   }
849   return bracket_size;
850 }
851 
BitMapToStr(uint32_t * bit_map_base,size_t num_vec)852 std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) {
853   std::string bit_map_str;
854   for (size_t v = 0; v < num_vec; v++) {
855     uint32_t vec = bit_map_base[v];
856     if (v != num_vec - 1) {
857       bit_map_str.append(StringPrintf("%x-", vec));
858     } else {
859       bit_map_str.append(StringPrintf("%x", vec));
860     }
861   }
862   return bit_map_str.c_str();
863 }
864 
Dump()865 std::string RosAlloc::Run::Dump() {
866   size_t idx = size_bracket_idx_;
867   size_t num_slots = numOfSlots[idx];
868   size_t num_vec = RoundUp(num_slots, 32) / 32;
869   std::ostringstream stream;
870   stream << "RosAlloc Run = " << reinterpret_cast<void*>(this)
871          << "{ magic_num=" << static_cast<int>(magic_num_)
872          << " size_bracket_idx=" << idx
873          << " is_thread_local=" << static_cast<int>(is_thread_local_)
874          << " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_)
875          << " first_search_vec_idx=" << first_search_vec_idx_
876          << " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec)
877          << " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec)
878          << " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec)
879          << " }" << std::endl;
880   return stream.str();
881 }
882 
FreeSlot(void * ptr)883 void RosAlloc::Run::FreeSlot(void* ptr) {
884   DCHECK(!IsThreadLocal());
885   const uint8_t idx = size_bracket_idx_;
886   const size_t bracket_size = bracketSizes[idx];
887   const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
888       - (reinterpret_cast<uint8_t*>(this) + headerSizes[idx]);
889   DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
890   size_t slot_idx = offset_from_slot_base / bracket_size;
891   DCHECK_LT(slot_idx, numOfSlots[idx]);
892   size_t vec_idx = slot_idx / 32;
893   if (kIsDebugBuild) {
894     size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
895     DCHECK_LT(vec_idx, num_vec);
896   }
897   size_t vec_off = slot_idx % 32;
898   uint32_t* vec = &alloc_bit_map_[vec_idx];
899   first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(vec_idx));
900   const uint32_t mask = 1U << vec_off;
901   DCHECK_NE(*vec & mask, 0U);
902   *vec &= ~mask;
903   DCHECK_EQ(*vec & mask, 0U);
904   // Zero out the memory.
905   // TODO: Investigate alternate memset since ptr is guaranteed to be aligned to 16.
906   memset(ptr, 0, bracket_size);
907   if (kTraceRosAlloc) {
908     LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
909               << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
910   }
911 }
912 
NumberOfFreeSlots()913 size_t RosAlloc::Run::NumberOfFreeSlots() {
914   size_t num_alloc_slots = 0;
915   const size_t idx = size_bracket_idx_;
916   const size_t num_slots = numOfSlots[idx];
917   const size_t num_vec = RoundUp(num_slots, 32) / 32;
918   DCHECK_NE(num_vec, 0U);
919   for (size_t v = 0; v < num_vec - 1; v++) {
920     num_alloc_slots += POPCOUNT(alloc_bit_map_[v]);
921   }
922   // Don't count the invalid bits in the last vector.
923   uint32_t last_vec_masked = alloc_bit_map_[num_vec - 1] &
924       ~GetBitmapLastVectorMask(num_slots, num_vec);
925   num_alloc_slots += POPCOUNT(last_vec_masked);
926   size_t num_free_slots = num_slots - num_alloc_slots;
927   DCHECK_LE(num_alloc_slots, num_slots);
928   DCHECK_LE(num_free_slots, num_slots);
929   return num_free_slots;
930 }
931 
MergeThreadLocalFreeBitMapToAllocBitMap(bool * is_all_free_after_out)932 inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
933   DCHECK(IsThreadLocal());
934   // Free slots in the alloc bit map based on the thread local free bit map.
935   const size_t idx = size_bracket_idx_;
936   const size_t num_of_slots = numOfSlots[idx];
937   const size_t num_vec = RoundUp(num_of_slots, 32) / 32;
938   bool changed = false;
939   uint32_t* vecp = &alloc_bit_map_[0];
940   uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0];
941   bool is_all_free_after = true;
942   for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
943     uint32_t tl_free_vec = *tl_free_vecp;
944     uint32_t vec_before = *vecp;
945     uint32_t vec_after;
946     if (tl_free_vec != 0) {
947       first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
948       vec_after = vec_before & ~tl_free_vec;
949       *vecp = vec_after;
950       changed = true;
951       *tl_free_vecp = 0;  // clear the thread local free bit map.
952     } else {
953       vec_after = vec_before;
954     }
955     if (vec_after != 0) {
956       if (v == num_vec - 1) {
957         // Only not all free if a bit other than the mask bits are set.
958         is_all_free_after =
959             is_all_free_after && GetBitmapLastVectorMask(num_of_slots, num_vec) == vec_after;
960       } else {
961         is_all_free_after = false;
962       }
963     }
964     DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
965   }
966   *is_all_free_after_out = is_all_free_after;
967   // Return true if there was at least a bit set in the thread-local
968   // free bit map and at least a bit in the alloc bit map changed.
969   return changed;
970 }
971 
MergeBulkFreeBitMapIntoAllocBitMap()972 inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
973   DCHECK(!IsThreadLocal());
974   // Free slots in the alloc bit map based on the bulk free bit map.
975   const size_t num_vec = NumberOfBitmapVectors();
976   uint32_t* vecp = &alloc_bit_map_[0];
977   uint32_t* free_vecp = &BulkFreeBitMap()[0];
978   for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
979     uint32_t free_vec = *free_vecp;
980     if (free_vec != 0) {
981       first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
982       *vecp &= ~free_vec;
983       *free_vecp = 0;  // clear the bulk free bit map.
984     }
985     DCHECK_EQ(*free_vecp, static_cast<uint32_t>(0));
986   }
987 }
988 
UnionBulkFreeBitMapToThreadLocalFreeBitMap()989 inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
990   DCHECK(IsThreadLocal());
991   // Union the thread local bit map with the bulk free bit map.
992   size_t num_vec = NumberOfBitmapVectors();
993   uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0];
994   uint32_t* from_vecp = &BulkFreeBitMap()[0];
995   for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
996     uint32_t from_vec = *from_vecp;
997     if (from_vec != 0) {
998       *to_vecp |= from_vec;
999       *from_vecp = 0;  // clear the bulk free bit map.
1000     }
1001     DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
1002   }
1003 }
1004 
MarkThreadLocalFreeBitMap(void * ptr)1005 inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
1006   DCHECK(IsThreadLocal());
1007   MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap");
1008 }
1009 
MarkBulkFreeBitMap(void * ptr)1010 inline size_t RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
1011   return MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap");
1012 }
1013 
MarkFreeBitMapShared(void * ptr,uint32_t * free_bit_map_base,const char * caller_name)1014 inline size_t RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
1015                                                   const char* caller_name) {
1016   const uint8_t idx = size_bracket_idx_;
1017   const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
1018       - (reinterpret_cast<uint8_t*>(this) + headerSizes[idx]);
1019   const size_t bracket_size = bracketSizes[idx];
1020   memset(ptr, 0, bracket_size);
1021   DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
1022   size_t slot_idx = offset_from_slot_base / bracket_size;
1023   DCHECK_LT(slot_idx, numOfSlots[idx]);
1024   size_t vec_idx = slot_idx / 32;
1025   if (kIsDebugBuild) {
1026     size_t num_vec = NumberOfBitmapVectors();
1027     DCHECK_LT(vec_idx, num_vec);
1028   }
1029   size_t vec_off = slot_idx % 32;
1030   uint32_t* vec = &free_bit_map_base[vec_idx];
1031   const uint32_t mask = 1U << vec_off;
1032   DCHECK_EQ(*vec & mask, 0U);
1033   *vec |= mask;
1034   DCHECK_NE(*vec & mask, 0U);
1035   if (kTraceRosAlloc) {
1036     LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex
1037               << reinterpret_cast<intptr_t>(ptr)
1038               << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
1039   }
1040   return bracket_size;
1041 }
1042 
GetBitmapLastVectorMask(size_t num_slots,size_t num_vec)1043 inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) {
1044   const size_t kBitsPerVec = 32;
1045   DCHECK_GE(num_vec * kBitsPerVec, num_slots);
1046   DCHECK_NE(num_vec, 0U);
1047   size_t remain = num_vec * kBitsPerVec - num_slots;
1048   DCHECK_LT(remain, kBitsPerVec);
1049   return ((1U << remain) - 1) << ((kBitsPerVec - remain) & 0x1F);
1050 }
1051 
IsAllFree()1052 inline bool RosAlloc::Run::IsAllFree() {
1053   const uint8_t idx = size_bracket_idx_;
1054   const size_t num_slots = numOfSlots[idx];
1055   const size_t num_vec = NumberOfBitmapVectors();
1056   DCHECK_NE(num_vec, 0U);
1057   // Check the last vector after the loop since it uses a special case for the masked bits.
1058   for (size_t v = 0; v < num_vec - 1; v++) {
1059     uint32_t vec = alloc_bit_map_[v];
1060     if (vec != 0) {
1061       return false;
1062     }
1063   }
1064   // Make sure the last word is equal to the mask, all other bits must be 0.
1065   return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec);
1066 }
1067 
IsBulkFreeBitmapClean()1068 inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
1069   const size_t num_vec = NumberOfBitmapVectors();
1070   for (size_t v = 0; v < num_vec; v++) {
1071     uint32_t vec = BulkFreeBitMap()[v];
1072     if (vec != 0) {
1073       return false;
1074     }
1075   }
1076   return true;
1077 }
1078 
IsThreadLocalFreeBitmapClean()1079 inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() {
1080   const size_t num_vec = NumberOfBitmapVectors();
1081   for (size_t v = 0; v < num_vec; v++) {
1082     uint32_t vec = ThreadLocalFreeBitMap()[v];
1083     if (vec != 0) {
1084       return false;
1085     }
1086   }
1087   return true;
1088 }
1089 
SetAllocBitMapBitsForInvalidSlots()1090 inline void RosAlloc::Run::SetAllocBitMapBitsForInvalidSlots() {
1091   const size_t idx = size_bracket_idx_;
1092   const size_t num_slots = numOfSlots[idx];
1093   const size_t num_vec = RoundUp(num_slots, 32) / 32;
1094   DCHECK_NE(num_vec, 0U);
1095   // Make sure to set the bits at the end of the bitmap so that we don't allocate there since they
1096   // don't represent valid slots.
1097   alloc_bit_map_[num_vec - 1] |= GetBitmapLastVectorMask(num_slots, num_vec);
1098 }
1099 
ZeroHeader()1100 inline void RosAlloc::Run::ZeroHeader() {
1101   const uint8_t idx = size_bracket_idx_;
1102   memset(this, 0, headerSizes[idx]);
1103 }
1104 
ZeroData()1105 inline void RosAlloc::Run::ZeroData() {
1106   const uint8_t idx = size_bracket_idx_;
1107   uint8_t* slot_begin = reinterpret_cast<uint8_t*>(this) + headerSizes[idx];
1108   memset(slot_begin, 0, numOfSlots[idx] * bracketSizes[idx]);
1109 }
1110 
FillAllocBitMap()1111 inline void RosAlloc::Run::FillAllocBitMap() {
1112   size_t num_vec = NumberOfBitmapVectors();
1113   memset(alloc_bit_map_, 0xFF, sizeof(uint32_t) * num_vec);
1114   first_search_vec_idx_ = num_vec - 1;  // No free bits in any of the bitmap words.
1115 }
1116 
InspectAllSlots(void (* handler)(void * start,void * end,size_t used_bytes,void * callback_arg),void * arg)1117 void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
1118                                     void* arg) {
1119   size_t idx = size_bracket_idx_;
1120   uint8_t* slot_base = reinterpret_cast<uint8_t*>(this) + headerSizes[idx];
1121   size_t num_slots = numOfSlots[idx];
1122   size_t bracket_size = IndexToBracketSize(idx);
1123   DCHECK_EQ(slot_base + num_slots * bracket_size,
1124             reinterpret_cast<uint8_t*>(this) + numOfPages[idx] * kPageSize);
1125   size_t num_vec = RoundUp(num_slots, 32) / 32;
1126   size_t slots = 0;
1127   const uint32_t* const tl_free_vecp = IsThreadLocal() ? ThreadLocalFreeBitMap() : nullptr;
1128   for (size_t v = 0; v < num_vec; v++, slots += 32) {
1129     DCHECK_GE(num_slots, slots);
1130     uint32_t vec = alloc_bit_map_[v];
1131     if (tl_free_vecp != nullptr) {
1132       // Clear out the set bits in the thread local free bitmap since these aren't actually
1133       // allocated.
1134       vec &= ~tl_free_vecp[v];
1135     }
1136     size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
1137     for (size_t i = 0; i < end; ++i) {
1138       bool is_allocated = ((vec >> i) & 0x1) != 0;
1139       uint8_t* slot_addr = slot_base + (slots + i) * bracket_size;
1140       if (is_allocated) {
1141         handler(slot_addr, slot_addr + bracket_size, bracket_size, arg);
1142       } else {
1143         handler(slot_addr, slot_addr + bracket_size, 0, arg);
1144       }
1145     }
1146   }
1147 }
1148 
1149 // If true, read the page map entries in BulkFree() without using the
1150 // lock for better performance, assuming that the existence of an
1151 // allocated chunk/pointer being freed in BulkFree() guarantees that
1152 // the page map entry won't change. Disabled for now.
1153 static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = true;
1154 
BulkFree(Thread * self,void ** ptrs,size_t num_ptrs)1155 size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
1156   size_t freed_bytes = 0;
1157   if ((false)) {
1158     // Used only to test Free() as GC uses only BulkFree().
1159     for (size_t i = 0; i < num_ptrs; ++i) {
1160       freed_bytes += FreeInternal(self, ptrs[i]);
1161     }
1162     return freed_bytes;
1163   }
1164 
1165   WriterMutexLock wmu(self, bulk_free_lock_);
1166 
1167   // First mark slots to free in the bulk free bit map without locking the
1168   // size bracket locks. On host, unordered_set is faster than vector + flag.
1169 #ifdef HAVE_ANDROID_OS
1170   std::vector<Run*> runs;
1171 #else
1172   std::unordered_set<Run*, hash_run, eq_run> runs;
1173 #endif
1174   for (size_t i = 0; i < num_ptrs; i++) {
1175     void* ptr = ptrs[i];
1176     DCHECK_LE(base_, ptr);
1177     DCHECK_LT(ptr, base_ + footprint_);
1178     size_t pm_idx = RoundDownToPageMapIndex(ptr);
1179     Run* run = nullptr;
1180     if (kReadPageMapEntryWithoutLockInBulkFree) {
1181       // Read the page map entries without locking the lock.
1182       uint8_t page_map_entry = page_map_[pm_idx];
1183       if (kTraceRosAlloc) {
1184         LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
1185                   << std::dec << pm_idx
1186                   << ", page_map_entry=" << static_cast<int>(page_map_entry);
1187       }
1188       if (LIKELY(page_map_entry == kPageMapRun)) {
1189         run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1190       } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
1191         size_t pi = pm_idx;
1192         // Find the beginning of the run.
1193         do {
1194           --pi;
1195           DCHECK_LT(pi, capacity_ / kPageSize);
1196         } while (page_map_[pi] != kPageMapRun);
1197         run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
1198       } else if (page_map_entry == kPageMapLargeObject) {
1199         MutexLock mu(self, lock_);
1200         freed_bytes += FreePages(self, ptr, false);
1201         continue;
1202       } else {
1203         LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_entry);
1204       }
1205     } else {
1206       // Read the page map entries with a lock.
1207       MutexLock mu(self, lock_);
1208       DCHECK_LT(pm_idx, page_map_size_);
1209       uint8_t page_map_entry = page_map_[pm_idx];
1210       if (kTraceRosAlloc) {
1211         LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
1212                   << std::dec << pm_idx
1213                   << ", page_map_entry=" << static_cast<int>(page_map_entry);
1214       }
1215       if (LIKELY(page_map_entry == kPageMapRun)) {
1216         run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1217       } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
1218         size_t pi = pm_idx;
1219         // Find the beginning of the run.
1220         do {
1221           --pi;
1222           DCHECK_LT(pi, capacity_ / kPageSize);
1223         } while (page_map_[pi] != kPageMapRun);
1224         run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
1225       } else if (page_map_entry == kPageMapLargeObject) {
1226         freed_bytes += FreePages(self, ptr, false);
1227         continue;
1228       } else {
1229         LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_entry);
1230       }
1231     }
1232     DCHECK(run != nullptr);
1233     DCHECK_EQ(run->magic_num_, kMagicNum);
1234     // Set the bit in the bulk free bit map.
1235     freed_bytes += run->MarkBulkFreeBitMap(ptr);
1236 #ifdef HAVE_ANDROID_OS
1237     if (!run->to_be_bulk_freed_) {
1238       run->to_be_bulk_freed_ = true;
1239       runs.push_back(run);
1240     }
1241 #else
1242     runs.insert(run);
1243 #endif
1244   }
1245 
1246   // Now, iterate over the affected runs and update the alloc bit map
1247   // based on the bulk free bit map (for non-thread-local runs) and
1248   // union the bulk free bit map into the thread-local free bit map
1249   // (for thread-local runs.)
1250   for (Run* run : runs) {
1251 #ifdef HAVE_ANDROID_OS
1252     DCHECK(run->to_be_bulk_freed_);
1253     run->to_be_bulk_freed_ = false;
1254 #endif
1255     size_t idx = run->size_bracket_idx_;
1256     MutexLock brackets_mu(self, *size_bracket_locks_[idx]);
1257     if (run->IsThreadLocal()) {
1258       DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
1259       DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
1260       DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
1261       run->UnionBulkFreeBitMapToThreadLocalFreeBitMap();
1262       if (kTraceRosAlloc) {
1263         LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x"
1264                   << std::hex << reinterpret_cast<intptr_t>(run);
1265       }
1266       DCHECK(run->IsThreadLocal());
1267       // A thread local run will be kept as a thread local even if
1268       // it's become all free.
1269     } else {
1270       bool run_was_full = run->IsFull();
1271       run->MergeBulkFreeBitMapIntoAllocBitMap();
1272       if (kTraceRosAlloc) {
1273         LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a run 0x" << std::hex
1274                   << reinterpret_cast<intptr_t>(run);
1275       }
1276       // Check if the run should be moved to non_full_runs_ or
1277       // free_page_runs_.
1278       auto* non_full_runs = &non_full_runs_[idx];
1279       auto* full_runs = kIsDebugBuild ? &full_runs_[idx] : nullptr;
1280       if (run->IsAllFree()) {
1281         // It has just become completely free. Free the pages of the
1282         // run.
1283         bool run_was_current = run == current_runs_[idx];
1284         if (run_was_current) {
1285           DCHECK(full_runs->find(run) == full_runs->end());
1286           DCHECK(non_full_runs->find(run) == non_full_runs->end());
1287           // If it was a current run, reuse it.
1288         } else if (run_was_full) {
1289           // If it was full, remove it from the full run set (debug
1290           // only.)
1291           if (kIsDebugBuild) {
1292             std::unordered_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
1293             DCHECK(pos != full_runs->end());
1294             full_runs->erase(pos);
1295             if (kTraceRosAlloc) {
1296               LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1297                         << reinterpret_cast<intptr_t>(run)
1298                         << " from full_runs_";
1299             }
1300             DCHECK(full_runs->find(run) == full_runs->end());
1301           }
1302         } else {
1303           // If it was in a non full run set, remove it from the set.
1304           DCHECK(full_runs->find(run) == full_runs->end());
1305           DCHECK(non_full_runs->find(run) != non_full_runs->end());
1306           non_full_runs->erase(run);
1307           if (kTraceRosAlloc) {
1308             LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1309                       << reinterpret_cast<intptr_t>(run)
1310                       << " from non_full_runs_";
1311           }
1312           DCHECK(non_full_runs->find(run) == non_full_runs->end());
1313         }
1314         if (!run_was_current) {
1315           run->ZeroHeader();
1316           MutexLock lock_mu(self, lock_);
1317           FreePages(self, run, true);
1318         }
1319       } else {
1320         // It is not completely free. If it wasn't the current run or
1321         // already in the non-full run set (i.e., it was full) insert
1322         // it into the non-full run set.
1323         if (run == current_runs_[idx]) {
1324           DCHECK(non_full_runs->find(run) == non_full_runs->end());
1325           DCHECK(full_runs->find(run) == full_runs->end());
1326           // If it was a current run, keep it.
1327         } else if (run_was_full) {
1328           // If it was full, remove it from the full run set (debug
1329           // only) and insert into the non-full run set.
1330           DCHECK(full_runs->find(run) != full_runs->end());
1331           DCHECK(non_full_runs->find(run) == non_full_runs->end());
1332           if (kIsDebugBuild) {
1333             full_runs->erase(run);
1334             if (kTraceRosAlloc) {
1335               LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1336                         << reinterpret_cast<intptr_t>(run)
1337                         << " from full_runs_";
1338             }
1339           }
1340           non_full_runs->insert(run);
1341           if (kTraceRosAlloc) {
1342             LOG(INFO) << "RosAlloc::BulkFree() : Inserted run 0x" << std::hex
1343                       << reinterpret_cast<intptr_t>(run)
1344                       << " into non_full_runs_[" << std::dec << idx;
1345           }
1346         } else {
1347           // If it was not full, so leave it in the non full run set.
1348           DCHECK(full_runs->find(run) == full_runs->end());
1349           DCHECK(non_full_runs->find(run) != non_full_runs->end());
1350         }
1351       }
1352     }
1353   }
1354   return freed_bytes;
1355 }
1356 
DumpPageMap()1357 std::string RosAlloc::DumpPageMap() {
1358   std::ostringstream stream;
1359   stream << "RosAlloc PageMap: " << std::endl;
1360   lock_.AssertHeld(Thread::Current());
1361   size_t end = page_map_size_;
1362   FreePageRun* curr_fpr = nullptr;
1363   size_t curr_fpr_size = 0;
1364   size_t remaining_curr_fpr_size = 0;
1365   size_t num_running_empty_pages = 0;
1366   for (size_t i = 0; i < end; ++i) {
1367     uint8_t pm = page_map_[i];
1368     switch (pm) {
1369       case kPageMapReleased:
1370         // Fall-through.
1371       case kPageMapEmpty: {
1372         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1373         if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
1374           // Encountered a fresh free page run.
1375           DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1376           DCHECK(fpr->IsFree());
1377           DCHECK(curr_fpr == nullptr);
1378           DCHECK_EQ(curr_fpr_size, static_cast<size_t>(0));
1379           curr_fpr = fpr;
1380           curr_fpr_size = fpr->ByteSize(this);
1381           DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
1382           remaining_curr_fpr_size = curr_fpr_size - kPageSize;
1383           stream << "[" << i << "]=" << (pm == kPageMapReleased ? "Released" : "Empty")
1384                  << " (FPR start) fpr_size=" << curr_fpr_size
1385                  << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
1386           if (remaining_curr_fpr_size == 0) {
1387             // Reset at the end of the current free page run.
1388             curr_fpr = nullptr;
1389             curr_fpr_size = 0;
1390           }
1391           stream << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr) << std::endl;
1392           DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
1393         } else {
1394           // Still part of the current free page run.
1395           DCHECK_NE(num_running_empty_pages, static_cast<size_t>(0));
1396           DCHECK(curr_fpr != nullptr && curr_fpr_size > 0 && remaining_curr_fpr_size > 0);
1397           DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
1398           DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
1399           remaining_curr_fpr_size -= kPageSize;
1400           stream << "[" << i << "]=Empty (FPR part)"
1401                  << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
1402           if (remaining_curr_fpr_size == 0) {
1403             // Reset at the end of the current free page run.
1404             curr_fpr = nullptr;
1405             curr_fpr_size = 0;
1406           }
1407         }
1408         num_running_empty_pages++;
1409         break;
1410       }
1411       case kPageMapLargeObject: {
1412         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1413         num_running_empty_pages = 0;
1414         stream << "[" << i << "]=Large (start)" << std::endl;
1415         break;
1416       }
1417       case kPageMapLargeObjectPart:
1418         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1419         num_running_empty_pages = 0;
1420         stream << "[" << i << "]=Large (part)" << std::endl;
1421         break;
1422       case kPageMapRun: {
1423         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1424         num_running_empty_pages = 0;
1425         Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1426         size_t idx = run->size_bracket_idx_;
1427         stream << "[" << i << "]=Run (start)"
1428                << " idx=" << idx
1429                << " numOfPages=" << numOfPages[idx]
1430                << " is_thread_local=" << run->is_thread_local_
1431                << " is_all_free=" << (run->IsAllFree() ? 1 : 0)
1432                << std::endl;
1433         break;
1434       }
1435       case kPageMapRunPart:
1436         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1437         num_running_empty_pages = 0;
1438         stream << "[" << i << "]=Run (part)" << std::endl;
1439         break;
1440       default:
1441         stream << "[" << i << "]=Unrecognizable page map type: " << pm;
1442         break;
1443     }
1444   }
1445   return stream.str();
1446 }
1447 
UsableSize(const void * ptr)1448 size_t RosAlloc::UsableSize(const void* ptr) {
1449   DCHECK_LE(base_, ptr);
1450   DCHECK_LT(ptr, base_ + footprint_);
1451   size_t pm_idx = RoundDownToPageMapIndex(ptr);
1452   MutexLock mu(Thread::Current(), lock_);
1453   switch (page_map_[pm_idx]) {
1454     case kPageMapReleased:
1455       // Fall-through.
1456     case kPageMapEmpty:
1457       LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
1458                  << std::hex << reinterpret_cast<intptr_t>(ptr);
1459       break;
1460     case kPageMapLargeObject: {
1461       size_t num_pages = 1;
1462       size_t idx = pm_idx + 1;
1463       size_t end = page_map_size_;
1464       while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
1465         num_pages++;
1466         idx++;
1467       }
1468       return num_pages * kPageSize;
1469     }
1470     case kPageMapLargeObjectPart:
1471       LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
1472                  << std::hex << reinterpret_cast<intptr_t>(ptr);
1473       break;
1474     case kPageMapRun:
1475     case kPageMapRunPart: {
1476       // Find the beginning of the run.
1477       while (page_map_[pm_idx] != kPageMapRun) {
1478         pm_idx--;
1479         DCHECK_LT(pm_idx, capacity_ / kPageSize);
1480       }
1481       DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
1482       Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1483       DCHECK_EQ(run->magic_num_, kMagicNum);
1484       size_t idx = run->size_bracket_idx_;
1485       size_t offset_from_slot_base = reinterpret_cast<const uint8_t*>(ptr)
1486           - (reinterpret_cast<uint8_t*>(run) + headerSizes[idx]);
1487       DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
1488       return IndexToBracketSize(idx);
1489     }
1490     default: {
1491       LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_[pm_idx]);
1492       break;
1493     }
1494   }
1495   return 0;
1496 }
1497 
Trim()1498 bool RosAlloc::Trim() {
1499   MutexLock mu(Thread::Current(), lock_);
1500   FreePageRun* last_free_page_run;
1501   DCHECK_EQ(footprint_ % kPageSize, static_cast<size_t>(0));
1502   auto it = free_page_runs_.rbegin();
1503   if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
1504     // Remove the last free page run, if any.
1505     DCHECK(last_free_page_run->IsFree());
1506     DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
1507     DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
1508     DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
1509     free_page_runs_.erase(last_free_page_run);
1510     size_t decrement = last_free_page_run->ByteSize(this);
1511     size_t new_footprint = footprint_ - decrement;
1512     DCHECK_EQ(new_footprint % kPageSize, static_cast<size_t>(0));
1513     size_t new_num_of_pages = new_footprint / kPageSize;
1514     DCHECK_GE(page_map_size_, new_num_of_pages);
1515     // Zero out the tail of the page map.
1516     uint8_t* zero_begin = const_cast<uint8_t*>(page_map_) + new_num_of_pages;
1517     uint8_t* madvise_begin = AlignUp(zero_begin, kPageSize);
1518     DCHECK_LE(madvise_begin, page_map_mem_map_->End());
1519     size_t madvise_size = page_map_mem_map_->End() - madvise_begin;
1520     if (madvise_size > 0) {
1521       DCHECK_ALIGNED(madvise_begin, kPageSize);
1522       DCHECK_EQ(RoundUp(madvise_size, kPageSize), madvise_size);
1523       if (!kMadviseZeroes) {
1524         memset(madvise_begin, 0, madvise_size);
1525       }
1526       CHECK_EQ(madvise(madvise_begin, madvise_size, MADV_DONTNEED), 0);
1527     }
1528     if (madvise_begin - zero_begin) {
1529       memset(zero_begin, 0, madvise_begin - zero_begin);
1530     }
1531     page_map_size_ = new_num_of_pages;
1532     free_page_run_size_map_.resize(new_num_of_pages);
1533     DCHECK_EQ(free_page_run_size_map_.size(), new_num_of_pages);
1534     ArtRosAllocMoreCore(this, -(static_cast<intptr_t>(decrement)));
1535     if (kTraceRosAlloc) {
1536       LOG(INFO) << "RosAlloc::Trim() : decreased the footprint from "
1537                 << footprint_ << " to " << new_footprint;
1538     }
1539     DCHECK_LT(new_footprint, footprint_);
1540     DCHECK_LT(new_footprint, capacity_);
1541     footprint_ = new_footprint;
1542     return true;
1543   }
1544   return false;
1545 }
1546 
InspectAll(void (* handler)(void * start,void * end,size_t used_bytes,void * callback_arg),void * arg)1547 void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
1548                           void* arg) {
1549   // Note: no need to use this to release pages as we already do so in FreePages().
1550   if (handler == nullptr) {
1551     return;
1552   }
1553   MutexLock mu(Thread::Current(), lock_);
1554   size_t pm_end = page_map_size_;
1555   size_t i = 0;
1556   while (i < pm_end) {
1557     uint8_t pm = page_map_[i];
1558     switch (pm) {
1559       case kPageMapReleased:
1560         // Fall-through.
1561       case kPageMapEmpty: {
1562         // The start of a free page run.
1563         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1564         DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
1565         size_t fpr_size = fpr->ByteSize(this);
1566         DCHECK(IsAligned<kPageSize>(fpr_size));
1567         void* start = fpr;
1568         if (kIsDebugBuild) {
1569           // In the debug build, the first page of a free page run
1570           // contains a magic number for debugging. Exclude it.
1571           start = reinterpret_cast<uint8_t*>(fpr) + kPageSize;
1572         }
1573         void* end = reinterpret_cast<uint8_t*>(fpr) + fpr_size;
1574         handler(start, end, 0, arg);
1575         size_t num_pages = fpr_size / kPageSize;
1576         if (kIsDebugBuild) {
1577           for (size_t j = i + 1; j < i + num_pages; ++j) {
1578             DCHECK(IsFreePage(j));
1579           }
1580         }
1581         i += fpr_size / kPageSize;
1582         DCHECK_LE(i, pm_end);
1583         break;
1584       }
1585       case kPageMapLargeObject: {
1586         // The start of a large object.
1587         size_t num_pages = 1;
1588         size_t idx = i + 1;
1589         while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
1590           num_pages++;
1591           idx++;
1592         }
1593         void* start = base_ + i * kPageSize;
1594         void* end = base_ + (i + num_pages) * kPageSize;
1595         size_t used_bytes = num_pages * kPageSize;
1596         handler(start, end, used_bytes, arg);
1597         if (kIsDebugBuild) {
1598           for (size_t j = i + 1; j < i + num_pages; ++j) {
1599             DCHECK_EQ(page_map_[j], kPageMapLargeObjectPart);
1600           }
1601         }
1602         i += num_pages;
1603         DCHECK_LE(i, pm_end);
1604         break;
1605       }
1606       case kPageMapLargeObjectPart:
1607         LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm);
1608         break;
1609       case kPageMapRun: {
1610         // The start of a run.
1611         Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1612         DCHECK_EQ(run->magic_num_, kMagicNum);
1613         // The dedicated full run doesn't contain any real allocations, don't visit the slots in
1614         // there.
1615         run->InspectAllSlots(handler, arg);
1616         size_t num_pages = numOfPages[run->size_bracket_idx_];
1617         if (kIsDebugBuild) {
1618           for (size_t j = i + 1; j < i + num_pages; ++j) {
1619             DCHECK_EQ(page_map_[j], kPageMapRunPart);
1620           }
1621         }
1622         i += num_pages;
1623         DCHECK_LE(i, pm_end);
1624         break;
1625       }
1626       case kPageMapRunPart:
1627         LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm);
1628         break;
1629       default:
1630         LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm);
1631         break;
1632     }
1633   }
1634 }
1635 
Footprint()1636 size_t RosAlloc::Footprint() {
1637   MutexLock mu(Thread::Current(), lock_);
1638   return footprint_;
1639 }
1640 
FootprintLimit()1641 size_t RosAlloc::FootprintLimit() {
1642   MutexLock mu(Thread::Current(), lock_);
1643   return capacity_;
1644 }
1645 
SetFootprintLimit(size_t new_capacity)1646 void RosAlloc::SetFootprintLimit(size_t new_capacity) {
1647   MutexLock mu(Thread::Current(), lock_);
1648   DCHECK_EQ(RoundUp(new_capacity, kPageSize), new_capacity);
1649   // Only growing is supported here. But Trim() is supported.
1650   if (capacity_ < new_capacity) {
1651     CHECK_LE(new_capacity, max_capacity_);
1652     capacity_ = new_capacity;
1653     VLOG(heap) << "new capacity=" << capacity_;
1654   }
1655 }
1656 
RevokeThreadLocalRuns(Thread * thread)1657 size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
1658   Thread* self = Thread::Current();
1659   // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
1660   ReaderMutexLock wmu(self, bulk_free_lock_);
1661   size_t free_bytes = 0U;
1662   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
1663     MutexLock mu(self, *size_bracket_locks_[idx]);
1664     Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
1665     CHECK(thread_local_run != nullptr);
1666     // Invalid means already revoked.
1667     DCHECK(thread_local_run->IsThreadLocal());
1668     if (thread_local_run != dedicated_full_run_) {
1669       // Note the thread local run may not be full here.
1670       thread->SetRosAllocRun(idx, dedicated_full_run_);
1671       DCHECK_EQ(thread_local_run->magic_num_, kMagicNum);
1672       // Count the number of free slots left.
1673       size_t num_free_slots = thread_local_run->NumberOfFreeSlots();
1674       free_bytes += num_free_slots * bracketSizes[idx];
1675       bool dont_care;
1676       thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
1677       thread_local_run->SetIsThreadLocal(false);
1678       thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
1679       DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
1680       DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
1681       RevokeRun(self, idx, thread_local_run);
1682     }
1683   }
1684   return free_bytes;
1685 }
1686 
RevokeRun(Thread * self,size_t idx,Run * run)1687 void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) {
1688   size_bracket_locks_[idx]->AssertHeld(self);
1689   DCHECK(run != dedicated_full_run_);
1690   if (run->IsFull()) {
1691     if (kIsDebugBuild) {
1692       full_runs_[idx].insert(run);
1693       DCHECK(full_runs_[idx].find(run) != full_runs_[idx].end());
1694       if (kTraceRosAlloc) {
1695         LOG(INFO) << __PRETTY_FUNCTION__  << " : Inserted run 0x" << std::hex
1696                   << reinterpret_cast<intptr_t>(run)
1697                   << " into full_runs_[" << std::dec << idx << "]";
1698       }
1699     }
1700   } else if (run->IsAllFree()) {
1701     run->ZeroHeader();
1702     MutexLock mu(self, lock_);
1703     FreePages(self, run, true);
1704   } else {
1705     non_full_runs_[idx].insert(run);
1706     DCHECK(non_full_runs_[idx].find(run) != non_full_runs_[idx].end());
1707     if (kTraceRosAlloc) {
1708       LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
1709                 << reinterpret_cast<intptr_t>(run)
1710                 << " into non_full_runs_[" << std::dec << idx << "]";
1711     }
1712   }
1713 }
1714 
RevokeThreadUnsafeCurrentRuns()1715 void RosAlloc::RevokeThreadUnsafeCurrentRuns() {
1716   // Revoke the current runs which share the same idx as thread local runs.
1717   Thread* self = Thread::Current();
1718   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
1719     MutexLock mu(self, *size_bracket_locks_[idx]);
1720     if (current_runs_[idx] != dedicated_full_run_) {
1721       RevokeRun(self, idx, current_runs_[idx]);
1722       current_runs_[idx] = dedicated_full_run_;
1723     }
1724   }
1725 }
1726 
RevokeAllThreadLocalRuns()1727 size_t RosAlloc::RevokeAllThreadLocalRuns() {
1728   // This is called when a mutator thread won't allocate such as at
1729   // the Zygote creation time or during the GC pause.
1730   MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
1731   MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
1732   std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
1733   size_t free_bytes = 0U;
1734   for (Thread* thread : thread_list) {
1735     free_bytes += RevokeThreadLocalRuns(thread);
1736   }
1737   RevokeThreadUnsafeCurrentRuns();
1738   return free_bytes;
1739 }
1740 
AssertThreadLocalRunsAreRevoked(Thread * thread)1741 void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) {
1742   if (kIsDebugBuild) {
1743     Thread* self = Thread::Current();
1744     // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
1745     ReaderMutexLock wmu(self, bulk_free_lock_);
1746     for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
1747       MutexLock mu(self, *size_bracket_locks_[idx]);
1748       Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
1749       DCHECK(thread_local_run == nullptr || thread_local_run == dedicated_full_run_);
1750     }
1751   }
1752 }
1753 
AssertAllThreadLocalRunsAreRevoked()1754 void RosAlloc::AssertAllThreadLocalRunsAreRevoked() {
1755   if (kIsDebugBuild) {
1756     Thread* self = Thread::Current();
1757     MutexLock shutdown_mu(self, *Locks::runtime_shutdown_lock_);
1758     MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
1759     std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
1760     for (Thread* t : thread_list) {
1761       AssertThreadLocalRunsAreRevoked(t);
1762     }
1763     for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
1764       MutexLock brackets_mu(self, *size_bracket_locks_[idx]);
1765       CHECK_EQ(current_runs_[idx], dedicated_full_run_);
1766     }
1767   }
1768 }
1769 
Initialize()1770 void RosAlloc::Initialize() {
1771   // bracketSizes.
1772   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1773     if (i < kNumOfSizeBrackets - 2) {
1774       bracketSizes[i] = 16 * (i + 1);
1775     } else if (i == kNumOfSizeBrackets - 2) {
1776       bracketSizes[i] = 1 * KB;
1777     } else {
1778       DCHECK_EQ(i, kNumOfSizeBrackets - 1);
1779       bracketSizes[i] = 2 * KB;
1780     }
1781     if (kTraceRosAlloc) {
1782       LOG(INFO) << "bracketSizes[" << i << "]=" << bracketSizes[i];
1783     }
1784   }
1785   // numOfPages.
1786   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1787     if (i < 4) {
1788       numOfPages[i] = 1;
1789     } else if (i < 8) {
1790       numOfPages[i] = 1;
1791     } else if (i < 16) {
1792       numOfPages[i] = 4;
1793     } else if (i < 32) {
1794       numOfPages[i] = 8;
1795     } else if (i == 32) {
1796       DCHECK_EQ(i, kNumOfSizeBrackets - 2);
1797       numOfPages[i] = 16;
1798     } else {
1799       DCHECK_EQ(i, kNumOfSizeBrackets - 1);
1800       numOfPages[i] = 32;
1801     }
1802     if (kTraceRosAlloc) {
1803       LOG(INFO) << "numOfPages[" << i << "]=" << numOfPages[i];
1804     }
1805   }
1806   // Compute numOfSlots and slotOffsets.
1807   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1808     size_t bracket_size = bracketSizes[i];
1809     size_t run_size = kPageSize * numOfPages[i];
1810     size_t max_num_of_slots = run_size / bracket_size;
1811     // Compute the actual number of slots by taking the header and
1812     // alignment into account.
1813     size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t));
1814     DCHECK_EQ(fixed_header_size, static_cast<size_t>(8));
1815     size_t header_size = 0;
1816     size_t bulk_free_bit_map_offset = 0;
1817     size_t thread_local_free_bit_map_offset = 0;
1818     size_t num_of_slots = 0;
1819     // Search for the maximum number of slots that allows enough space
1820     // for the header (including the bit maps.)
1821     for (int s = max_num_of_slots; s >= 0; s--) {
1822       size_t tmp_slots_size = bracket_size * s;
1823       size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte;
1824       size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size;
1825       size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size;
1826       size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size;
1827       size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size;
1828       size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size;
1829       // Align up the unaligned header size. bracket_size may not be a power of two.
1830       size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
1831           tmp_unaligned_header_size :
1832           tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
1833       DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
1834       DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
1835       if (tmp_slots_size + tmp_header_size <= run_size) {
1836         // Found the right number of slots, that is, there was enough
1837         // space for the header (including the bit maps.)
1838         num_of_slots = s;
1839         header_size = tmp_header_size;
1840         bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off;
1841         thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off;
1842         break;
1843       }
1844     }
1845     DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0);
1846     // Add the padding for the alignment remainder.
1847     header_size += run_size % bracket_size;
1848     DCHECK_EQ(header_size + num_of_slots * bracket_size, run_size);
1849     numOfSlots[i] = num_of_slots;
1850     headerSizes[i] = header_size;
1851     bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset;
1852     threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset;
1853     if (kTraceRosAlloc) {
1854       LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i]
1855                 << ", headerSizes[" << i << "]=" << headerSizes[i]
1856                 << ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i]
1857                 << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
1858     }
1859   }
1860   // Fill the alloc bitmap so nobody can successfully allocate from it.
1861   if (kIsDebugBuild) {
1862     dedicated_full_run_->magic_num_ = kMagicNum;
1863   }
1864   // It doesn't matter which size bracket we use since the main goal is to have the allocation
1865   // fail 100% of the time you attempt to allocate into the dedicated full run.
1866   dedicated_full_run_->size_bracket_idx_ = 0;
1867   dedicated_full_run_->FillAllocBitMap();
1868   dedicated_full_run_->SetIsThreadLocal(true);
1869 }
1870 
BytesAllocatedCallback(void * start ATTRIBUTE_UNUSED,void * end ATTRIBUTE_UNUSED,size_t used_bytes,void * arg)1871 void RosAlloc::BytesAllocatedCallback(void* start ATTRIBUTE_UNUSED, void* end ATTRIBUTE_UNUSED,
1872                                       size_t used_bytes, void* arg) {
1873   if (used_bytes == 0) {
1874     return;
1875   }
1876   size_t* bytes_allocated = reinterpret_cast<size_t*>(arg);
1877   *bytes_allocated += used_bytes;
1878 }
1879 
ObjectsAllocatedCallback(void * start ATTRIBUTE_UNUSED,void * end ATTRIBUTE_UNUSED,size_t used_bytes,void * arg)1880 void RosAlloc::ObjectsAllocatedCallback(void* start ATTRIBUTE_UNUSED, void* end ATTRIBUTE_UNUSED,
1881                                         size_t used_bytes, void* arg) {
1882   if (used_bytes == 0) {
1883     return;
1884   }
1885   size_t* objects_allocated = reinterpret_cast<size_t*>(arg);
1886   ++(*objects_allocated);
1887 }
1888 
Verify()1889 void RosAlloc::Verify() {
1890   Thread* self = Thread::Current();
1891   CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
1892       << "The mutator locks isn't exclusively locked at " << __PRETTY_FUNCTION__;
1893   MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
1894   ReaderMutexLock wmu(self, bulk_free_lock_);
1895   std::vector<Run*> runs;
1896   {
1897     MutexLock lock_mu(self, lock_);
1898     size_t pm_end = page_map_size_;
1899     size_t i = 0;
1900     size_t valgrind_modifier =  running_on_valgrind_ ?
1901         2 * ::art::gc::space::kDefaultValgrindRedZoneBytes :  // Redzones before and after.
1902         0;
1903     while (i < pm_end) {
1904       uint8_t pm = page_map_[i];
1905       switch (pm) {
1906         case kPageMapReleased:
1907           // Fall-through.
1908         case kPageMapEmpty: {
1909           // The start of a free page run.
1910           FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1911           DCHECK_EQ(fpr->magic_num_, kMagicNumFree);
1912           CHECK(free_page_runs_.find(fpr) != free_page_runs_.end())
1913               << "An empty page must belong to the free page run set";
1914           size_t fpr_size = fpr->ByteSize(this);
1915           CHECK(IsAligned<kPageSize>(fpr_size))
1916               << "A free page run size isn't page-aligned : " << fpr_size;
1917           size_t num_pages = fpr_size / kPageSize;
1918           CHECK_GT(num_pages, static_cast<uintptr_t>(0))
1919               << "A free page run size must be > 0 : " << fpr_size;
1920           for (size_t j = i + 1; j < i + num_pages; ++j) {
1921             CHECK(IsFreePage(j))
1922                 << "A mismatch between the page map table for kPageMapEmpty "
1923                 << " at page index " << j
1924                 << " and the free page run size : page index range : "
1925                 << i << " to " << (i + num_pages) << std::endl << DumpPageMap();
1926           }
1927           i += num_pages;
1928           CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1929                               << std::endl << DumpPageMap();
1930           break;
1931         }
1932         case kPageMapLargeObject: {
1933           // The start of a large object.
1934           size_t num_pages = 1;
1935           size_t idx = i + 1;
1936           while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
1937             num_pages++;
1938             idx++;
1939           }
1940           uint8_t* start = base_ + i * kPageSize;
1941           if (running_on_valgrind_) {
1942             start += ::art::gc::space::kDefaultValgrindRedZoneBytes;
1943           }
1944           mirror::Object* obj = reinterpret_cast<mirror::Object*>(start);
1945           size_t obj_size = obj->SizeOf();
1946           CHECK_GT(obj_size + valgrind_modifier, kLargeSizeThreshold)
1947               << "A rosalloc large object size must be > " << kLargeSizeThreshold;
1948           CHECK_EQ(num_pages, RoundUp(obj_size + valgrind_modifier, kPageSize) / kPageSize)
1949               << "A rosalloc large object size " << obj_size + valgrind_modifier
1950               << " does not match the page map table " << (num_pages * kPageSize)
1951               << std::endl << DumpPageMap();
1952           i += num_pages;
1953           CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1954                               << std::endl << DumpPageMap();
1955           break;
1956         }
1957         case kPageMapLargeObjectPart:
1958           LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm) << std::endl << DumpPageMap();
1959           break;
1960         case kPageMapRun: {
1961           // The start of a run.
1962           Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1963           DCHECK_EQ(run->magic_num_, kMagicNum);
1964           size_t idx = run->size_bracket_idx_;
1965           CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << idx;
1966           size_t num_pages = numOfPages[idx];
1967           CHECK_GT(num_pages, static_cast<uintptr_t>(0))
1968               << "Run size must be > 0 : " << num_pages;
1969           for (size_t j = i + 1; j < i + num_pages; ++j) {
1970             CHECK_EQ(page_map_[j], kPageMapRunPart)
1971                 << "A mismatch between the page map table for kPageMapRunPart "
1972                 << " at page index " << j
1973                 << " and the run size : page index range " << i << " to " << (i + num_pages)
1974                 << std::endl << DumpPageMap();
1975           }
1976           // Don't verify the dedicated_full_run_ since it doesn't have any real allocations.
1977           runs.push_back(run);
1978           i += num_pages;
1979           CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1980                               << std::endl << DumpPageMap();
1981           break;
1982         }
1983         case kPageMapRunPart:
1984           // Fall-through.
1985         default:
1986           LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm) << std::endl << DumpPageMap();
1987           break;
1988       }
1989     }
1990   }
1991   std::list<Thread*> threads = Runtime::Current()->GetThreadList()->GetList();
1992   for (Thread* thread : threads) {
1993     for (size_t i = 0; i < kNumThreadLocalSizeBrackets; ++i) {
1994       MutexLock brackets_mu(self, *size_bracket_locks_[i]);
1995       Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
1996       CHECK(thread_local_run != nullptr);
1997       CHECK(thread_local_run->IsThreadLocal());
1998       CHECK(thread_local_run == dedicated_full_run_ ||
1999             thread_local_run->size_bracket_idx_ == i);
2000     }
2001   }
2002   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
2003     MutexLock brackets_mu(self, *size_bracket_locks_[i]);
2004     Run* current_run = current_runs_[i];
2005     CHECK(current_run != nullptr);
2006     if (current_run != dedicated_full_run_) {
2007       // The dedicated full run is currently marked as thread local.
2008       CHECK(!current_run->IsThreadLocal());
2009       CHECK_EQ(current_run->size_bracket_idx_, i);
2010     }
2011   }
2012   // Call Verify() here for the lock order.
2013   for (auto& run : runs) {
2014     run->Verify(self, this, running_on_valgrind_);
2015   }
2016 }
2017 
Verify(Thread * self,RosAlloc * rosalloc,bool running_on_valgrind)2018 void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc, bool running_on_valgrind) {
2019   DCHECK_EQ(magic_num_, kMagicNum) << "Bad magic number : " << Dump();
2020   const size_t idx = size_bracket_idx_;
2021   CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump();
2022   uint8_t* slot_base = reinterpret_cast<uint8_t*>(this) + headerSizes[idx];
2023   const size_t num_slots = numOfSlots[idx];
2024   const size_t num_vec = RoundUp(num_slots, 32) / 32;
2025   CHECK_GT(num_vec, 0U);
2026   size_t bracket_size = IndexToBracketSize(idx);
2027   CHECK_EQ(slot_base + num_slots * bracket_size,
2028            reinterpret_cast<uint8_t*>(this) + numOfPages[idx] * kPageSize)
2029       << "Mismatch in the end address of the run " << Dump();
2030   // Check that the bulk free bitmap is clean. It's only used during BulkFree().
2031   CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump();
2032   uint32_t last_word_mask = GetBitmapLastVectorMask(num_slots, num_vec);
2033   // Make sure all the bits at the end of the run are set so that we don't allocate there.
2034   CHECK_EQ(alloc_bit_map_[num_vec - 1] & last_word_mask, last_word_mask);
2035   // Ensure that the first bitmap index is valid.
2036   CHECK_LT(first_search_vec_idx_, num_vec);
2037   // Check the thread local runs, the current runs, and the run sets.
2038   if (IsThreadLocal()) {
2039     // If it's a thread local run, then it must be pointed to by an owner thread.
2040     bool owner_found = false;
2041     std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
2042     for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
2043       Thread* thread = *it;
2044       for (size_t i = 0; i < kNumThreadLocalSizeBrackets; i++) {
2045         MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
2046         Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
2047         if (thread_local_run == this) {
2048           CHECK(!owner_found)
2049               << "A thread local run has more than one owner thread " << Dump();
2050           CHECK_EQ(i, idx)
2051               << "A mismatching size bracket index in a thread local run " << Dump();
2052           owner_found = true;
2053         }
2054       }
2055     }
2056     CHECK(owner_found) << "A thread local run has no owner thread " << Dump();
2057   } else {
2058     // If it's not thread local, check that the thread local free bitmap is clean.
2059     CHECK(IsThreadLocalFreeBitmapClean())
2060         << "A non-thread-local run's thread local free bitmap isn't clean "
2061         << Dump();
2062     // Check if it's a current run for the size bucket.
2063     bool is_current_run = false;
2064     for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
2065       MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
2066       Run* current_run = rosalloc->current_runs_[i];
2067       if (idx == i) {
2068         if (this == current_run) {
2069           is_current_run = true;
2070         }
2071       } else {
2072         // If the size bucket index does not match, then it must not
2073         // be a current run.
2074         CHECK_NE(this, current_run)
2075             << "A current run points to a run with a wrong size bracket index " << Dump();
2076       }
2077     }
2078     // If it's neither a thread local or current run, then it must be
2079     // in a run set.
2080     if (!is_current_run) {
2081       MutexLock mu(self, rosalloc->lock_);
2082       auto& non_full_runs = rosalloc->non_full_runs_[idx];
2083       // If it's all free, it must be a free page run rather than a run.
2084       CHECK(!IsAllFree()) << "A free run must be in a free page run set " << Dump();
2085       if (!IsFull()) {
2086         // If it's not full, it must in the non-full run set.
2087         CHECK(non_full_runs.find(this) != non_full_runs.end())
2088             << "A non-full run isn't in the non-full run set " << Dump();
2089       } else {
2090         // If it's full, it must in the full run set (debug build only.)
2091         if (kIsDebugBuild) {
2092           auto& full_runs = rosalloc->full_runs_[idx];
2093           CHECK(full_runs.find(this) != full_runs.end())
2094               << " A full run isn't in the full run set " << Dump();
2095         }
2096       }
2097     }
2098   }
2099   // Check each slot.
2100   size_t slots = 0;
2101   size_t valgrind_modifier = running_on_valgrind ?
2102       2 * ::art::gc::space::kDefaultValgrindRedZoneBytes :
2103       0U;
2104   for (size_t v = 0; v < num_vec; v++, slots += 32) {
2105     DCHECK_GE(num_slots, slots) << "Out of bounds";
2106     uint32_t vec = alloc_bit_map_[v];
2107     uint32_t thread_local_free_vec = ThreadLocalFreeBitMap()[v];
2108     size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
2109     for (size_t i = 0; i < end; ++i) {
2110       bool is_allocated = ((vec >> i) & 0x1) != 0;
2111       // If a thread local run, slots may be marked freed in the
2112       // thread local free bitmap.
2113       bool is_thread_local_freed = IsThreadLocal() && ((thread_local_free_vec >> i) & 0x1) != 0;
2114       if (is_allocated && !is_thread_local_freed) {
2115         uint8_t* slot_addr = slot_base + (slots + i) * bracket_size;
2116         if (running_on_valgrind) {
2117           slot_addr += ::art::gc::space::kDefaultValgrindRedZoneBytes;
2118         }
2119         mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
2120         size_t obj_size = obj->SizeOf();
2121         CHECK_LE(obj_size + valgrind_modifier, kLargeSizeThreshold)
2122             << "A run slot contains a large object " << Dump();
2123         CHECK_EQ(SizeToIndex(obj_size + valgrind_modifier), idx)
2124             << PrettyTypeOf(obj) << " "
2125             << "obj_size=" << obj_size << "(" << obj_size + valgrind_modifier << "), idx=" << idx
2126             << " A run slot contains an object with wrong size " << Dump();
2127       }
2128     }
2129   }
2130 }
2131 
ReleasePages()2132 size_t RosAlloc::ReleasePages() {
2133   VLOG(heap) << "RosAlloc::ReleasePages()";
2134   DCHECK(!DoesReleaseAllPages());
2135   Thread* self = Thread::Current();
2136   size_t reclaimed_bytes = 0;
2137   size_t i = 0;
2138   // Check the page map size which might have changed due to grow/shrink.
2139   while (i < page_map_size_) {
2140     // Reading the page map without a lock is racy but the race is benign since it should only
2141     // result in occasionally not releasing pages which we could release.
2142     uint8_t pm = page_map_[i];
2143     switch (pm) {
2144       case kPageMapReleased:
2145         // Fall through.
2146       case kPageMapEmpty: {
2147         // This is currently the start of a free page run.
2148         // Acquire the lock to prevent other threads racing in and modifying the page map.
2149         MutexLock mu(self, lock_);
2150         // Check that it's still empty after we acquired the lock since another thread could have
2151         // raced in and placed an allocation here.
2152         if (IsFreePage(i)) {
2153           // Free page runs can start with a released page if we coalesced a released page free
2154           // page run with an empty page run.
2155           FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
2156           // There is a race condition where FreePage can coalesce fpr with the previous
2157           // free page run before we acquire lock_. In that case free_page_runs_.find will not find
2158           // a run starting at fpr. To handle this race, we skip reclaiming the page range and go
2159           // to the next page.
2160           if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
2161             size_t fpr_size = fpr->ByteSize(this);
2162             DCHECK(IsAligned<kPageSize>(fpr_size));
2163             uint8_t* start = reinterpret_cast<uint8_t*>(fpr);
2164             reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
2165             size_t pages = fpr_size / kPageSize;
2166             CHECK_GT(pages, 0U) << "Infinite loop probable";
2167             i += pages;
2168             DCHECK_LE(i, page_map_size_);
2169             break;
2170           }
2171         }
2172         FALLTHROUGH_INTENDED;
2173       }
2174       case kPageMapLargeObject:      // Fall through.
2175       case kPageMapLargeObjectPart:  // Fall through.
2176       case kPageMapRun:              // Fall through.
2177       case kPageMapRunPart:          // Fall through.
2178         ++i;
2179         break;  // Skip.
2180       default:
2181         LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm);
2182         break;
2183     }
2184   }
2185   return reclaimed_bytes;
2186 }
2187 
ReleasePageRange(uint8_t * start,uint8_t * end)2188 size_t RosAlloc::ReleasePageRange(uint8_t* start, uint8_t* end) {
2189   DCHECK_ALIGNED(start, kPageSize);
2190   DCHECK_ALIGNED(end, kPageSize);
2191   DCHECK_LT(start, end);
2192   if (kIsDebugBuild) {
2193     // In the debug build, the first page of a free page run
2194     // contains a magic number for debugging. Exclude it.
2195     start += kPageSize;
2196 
2197     // Single pages won't be released.
2198     if (start == end) {
2199       return 0;
2200     }
2201   }
2202   if (!kMadviseZeroes) {
2203     // TODO: Do this when we resurrect the page instead.
2204     memset(start, 0, end - start);
2205   }
2206   CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
2207   size_t pm_idx = ToPageMapIndex(start);
2208   size_t reclaimed_bytes = 0;
2209   // Calculate reclaimed bytes and upate page map.
2210   const size_t max_idx = pm_idx + (end - start) / kPageSize;
2211   for (; pm_idx < max_idx; ++pm_idx) {
2212     DCHECK(IsFreePage(pm_idx));
2213     if (page_map_[pm_idx] == kPageMapEmpty) {
2214       // Mark the page as released and update how many bytes we released.
2215       reclaimed_bytes += kPageSize;
2216       page_map_[pm_idx] = kPageMapReleased;
2217     }
2218   }
2219   return reclaimed_bytes;
2220 }
2221 
LogFragmentationAllocFailure(std::ostream & os,size_t failed_alloc_bytes)2222 void RosAlloc::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) {
2223   Thread* self = Thread::Current();
2224   size_t largest_continuous_free_pages = 0;
2225   WriterMutexLock wmu(self, bulk_free_lock_);
2226   MutexLock mu(self, lock_);
2227   for (FreePageRun* fpr : free_page_runs_) {
2228     largest_continuous_free_pages = std::max(largest_continuous_free_pages,
2229                                              fpr->ByteSize(this));
2230   }
2231   if (failed_alloc_bytes > kLargeSizeThreshold) {
2232     // Large allocation.
2233     size_t required_bytes = RoundUp(failed_alloc_bytes, kPageSize);
2234     if (required_bytes > largest_continuous_free_pages) {
2235       os << "; failed due to fragmentation (required continguous free "
2236          << required_bytes << " bytes where largest contiguous free "
2237          <<  largest_continuous_free_pages << " bytes)";
2238     }
2239   } else {
2240     // Non-large allocation.
2241     size_t required_bytes = numOfPages[SizeToIndex(failed_alloc_bytes)] * kPageSize;
2242     if (required_bytes > largest_continuous_free_pages) {
2243       os << "; failed due to fragmentation (required continguous free "
2244          << required_bytes << " bytes for a new buffer where largest contiguous free "
2245          <<  largest_continuous_free_pages << " bytes)";
2246     }
2247   }
2248 }
2249 
2250 }  // namespace allocator
2251 }  // namespace gc
2252 }  // namespace art
2253