1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <atomic>
17 
18 #include "tensorflow/core/common_runtime/bfc_allocator.h"
19 
20 #include "tensorflow/core/common_runtime/allocator_retry.h"
21 #include "tensorflow/core/framework/device_base.h"
22 #include "tensorflow/core/lib/core/bits.h"
23 #include "tensorflow/core/lib/gtl/stl_util.h"
24 #include "tensorflow/core/lib/strings/numbers.h"
25 #include "tensorflow/core/lib/strings/str_util.h"
26 #include "tensorflow/core/lib/strings/strcat.h"
27 #include "tensorflow/core/platform/logging.h"
28 #include "tensorflow/core/platform/mutex.h"
29 #include "tensorflow/core/platform/types.h"
30 
31 namespace tensorflow {
32 
BFCAllocator(SubAllocator * sub_allocator,size_t total_memory,bool allow_growth,const string & name)33 BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
34                            bool allow_growth, const string& name)
35     : sub_allocator_(sub_allocator),
36       name_(name),
37       free_chunks_list_(kInvalidChunkHandle),
38       next_allocation_id_(1) {
39   if (allow_growth) {
40     // 1MiB smallest initial allocation, unless total memory available
41     // is less.
42     curr_region_allocation_bytes_ =
43         RoundedBytes(std::min(total_memory, size_t{1048576}));
44   } else {
45     curr_region_allocation_bytes_ = RoundedBytes(total_memory);
46   }
47 
48   // Allocate the requested amount of memory.
49   memory_limit_ = total_memory;
50   stats_.bytes_limit = static_cast<int64>(total_memory);
51 
52   // Create a bunch of bins of various good sizes.
53 
54   // We create bins to fit all possible ranges that cover the
55   // memory_limit_ starting from allocations up to 256 bytes to
56   // allocations up to (and including) the memory limit.
57   for (BinNum b = 0; b < kNumBins; b++) {
58     size_t bin_size = BinNumToSize(b);
59     VLOG(1) << "Creating bin of max chunk size "
60             << strings::HumanReadableNumBytes(bin_size);
61     new (BinFromIndex(b)) Bin(this, bin_size);
62     CHECK_EQ(BinForSize(bin_size), BinFromIndex(b));
63     CHECK_EQ(BinForSize(bin_size + 255), BinFromIndex(b));
64     CHECK_EQ(BinForSize(bin_size * 2 - 1), BinFromIndex(b));
65     if (b + 1 < kNumBins) {
66       CHECK_NE(BinForSize(bin_size * 2), BinFromIndex(b));
67     }
68   }
69 }
70 
~BFCAllocator()71 BFCAllocator::~BFCAllocator() {
72   // Return memory back.
73   VLOG(2) << "Number of regions allocated: "
74           << region_manager_.regions().size();
75   for (const auto& region : region_manager_.regions()) {
76     sub_allocator_->Free(region.ptr(), region.memory_size());
77   }
78 
79   for (BinNum b = 0; b < kNumBins; b++) {
80     BinFromIndex(b)->~Bin();
81   }
82 }
83 
ChunkFromHandle(ChunkHandle h)84 BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) {
85   DCHECK_GE(h, 0);
86   DCHECK_LT(h, static_cast<int>(chunks_.size()));
87   return &(chunks_[h]);
88 }
89 
Extend(size_t alignment,size_t rounded_bytes)90 bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
91   size_t available_bytes = memory_limit_ - total_region_allocated_bytes_;
92   // Rounds available_bytes down to the nearest multiple of kMinAllocationSize.
93   available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize;
94 
95   // Do we have enough space to handle the client's request?
96   // If not, fail immediately.
97   if (rounded_bytes > available_bytes) {
98     return false;
99   }
100 
101   // If curr_region_allocation_bytes_ is not enough to satisfy the
102   // allocation, keep multiplying by a power of two until that is
103   // sufficient.
104   bool increased_allocation = false;
105   while (rounded_bytes > curr_region_allocation_bytes_) {
106     curr_region_allocation_bytes_ *= 2;
107     increased_allocation = true;
108   }
109 
110   // Try allocating.
111   size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes);
112   void* mem_addr = sub_allocator_->Alloc(alignment, bytes);
113   if (mem_addr == nullptr && !started_backpedal_) {
114     // Only backpedal once.
115     started_backpedal_ = true;
116 
117     static constexpr float kBackpedalFactor = 0.9;
118 
119     // Try allocating less memory.
120     while (mem_addr == nullptr) {
121       bytes = RoundedBytes(bytes * kBackpedalFactor);
122       if (bytes < rounded_bytes) break;
123       mem_addr = sub_allocator_->Alloc(alignment, bytes);
124     }
125   }
126 
127   if (mem_addr == nullptr) {
128     return false;
129   }
130 
131   if (!increased_allocation) {
132     // Increase the region size of the next required allocation.
133     curr_region_allocation_bytes_ *= 2;
134   }
135 
136   VLOG(1) << "Extending allocation by " << strings::HumanReadableNumBytes(bytes)
137           << " bytes.";
138 
139   total_region_allocated_bytes_ += bytes;
140   VLOG(1) << "Total allocated bytes: "
141           << strings::HumanReadableNumBytes(total_region_allocated_bytes_);
142 
143   VLOG(1) << "Allocated memory at " << mem_addr << " to "
144           << static_cast<void*>(static_cast<char*>(mem_addr) + bytes);
145   region_manager_.AddAllocationRegion(mem_addr, bytes);
146 
147   // Create one large chunk for the whole memory space that will
148   // be chunked later.
149   ChunkHandle h = AllocateChunk();
150   BFCAllocator::Chunk* c = ChunkFromHandle(h);
151   c->ptr = mem_addr;
152   c->size = bytes;
153   c->allocation_id = -1;
154   c->prev = kInvalidChunkHandle;
155   c->next = kInvalidChunkHandle;
156   c->freed_count = 0;
157 
158   region_manager_.set_handle(c->ptr, h);
159 
160   // Insert the chunk into the right bin.
161   InsertFreeChunkIntoBin(h);
162 
163   return true;
164 }
165 
AllocateChunk()166 BFCAllocator::ChunkHandle BFCAllocator::AllocateChunk() {
167   if (free_chunks_list_ != kInvalidChunkHandle) {
168     ChunkHandle h = free_chunks_list_;
169     Chunk* c = ChunkFromHandle(h);
170     free_chunks_list_ = c->next;
171     return h;
172   } else {
173     ChunkHandle h = chunks_.size();
174     chunks_.resize(h + 1);
175     return h;
176   }
177 }
178 
DeallocateChunk(ChunkHandle h)179 void BFCAllocator::DeallocateChunk(ChunkHandle h) {
180   Chunk* c = ChunkFromHandle(h);
181   c->next = free_chunks_list_;
182   free_chunks_list_ = h;
183 }
184 
AllocateRawInternalWithRetry(size_t unused_alignment,size_t num_bytes,const AllocationAttributes & allocation_attr)185 void* BFCAllocator::AllocateRawInternalWithRetry(
186     size_t unused_alignment, size_t num_bytes,
187     const AllocationAttributes& allocation_attr) {
188   // Fast path: Try once to allocate without getting the retry_helper_ involved
189   uint64 freed_by_count = 0;
190   if (allocation_attr.freed_by_func != nullptr) {
191     freed_by_count = allocation_attr.freed_by_func();
192   }
193   void* r =
194       AllocateRawInternal(unused_alignment, num_bytes, false, freed_by_count);
195   if (r != nullptr) {
196     return r;
197   } else {
198     static const int64 kMaxMillisToWait = 10000;  // 10 seconds
199     r = retry_helper_.AllocateRaw(
200         [this, &allocation_attr](size_t a, size_t nb, bool v) {
201           uint64 freed_by_count = 0;
202           if (allocation_attr.freed_by_func != nullptr) {
203             freed_by_count = allocation_attr.freed_by_func();
204           }
205           return AllocateRawInternal(a, nb, v, freed_by_count);
206         },
207         kMaxMillisToWait, unused_alignment, num_bytes);
208     return r;
209   }
210 }
211 
AllocateRaw(size_t unused_alignment,size_t num_bytes,const AllocationAttributes & allocation_attr)212 void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes,
213                                 const AllocationAttributes& allocation_attr) {
214   VLOG(1) << "AllocateRaw " << Name() << "  " << num_bytes;
215   if (allocation_attr.no_retry_on_failure) {
216     // Return immediately upon the first failure if this is for allocating an
217     // optional scratch space.
218     bool dump_log_on_failure = VLOG_IS_ON(2);
219     uint64 freed_by_count = 0;
220     if (allocation_attr.freed_by_func != nullptr) {
221       freed_by_count = allocation_attr.freed_by_func();
222     }
223     void* result = AllocateRawInternal(unused_alignment, num_bytes,
224                                        dump_log_on_failure, freed_by_count);
225     if (result == nullptr) {
226       static std::atomic<int32> log_counter{0};
227       int32 counter_value = log_counter.load(std::memory_order_relaxed);
228       if (counter_value < 10) {
229         log_counter.store(counter_value + 1, std::memory_order_relaxed);
230         LOG(WARNING)
231             << "Allocator (" << Name() << ") ran out of memory trying "
232             << "to allocate " << strings::HumanReadableNumBytes(num_bytes)
233             << ". The caller indicates that this is not a failure, but"
234             << " may mean that there could be performance gains if more"
235             << " memory were available.";
236       }
237     }
238     return result;
239   } else {
240     return AllocateRawInternalWithRetry(unused_alignment, num_bytes,
241                                         allocation_attr);
242   }
243 }
244 
245 // static
RoundedBytes(size_t bytes)246 size_t BFCAllocator::RoundedBytes(size_t bytes) {
247   size_t rounded_bytes =
248       (kMinAllocationSize *
249        ((bytes + kMinAllocationSize - 1) / kMinAllocationSize));
250   DCHECK_EQ(size_t{0}, rounded_bytes % kMinAllocationSize);
251   return rounded_bytes;
252 }
253 
AllocateRawInternal(size_t unused_alignment,size_t num_bytes,bool dump_log_on_failure,uint64 freed_before)254 void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
255                                         size_t num_bytes,
256                                         bool dump_log_on_failure,
257                                         uint64 freed_before) {
258   if (num_bytes == 0) {
259     LOG(ERROR) << "tried to allocate 0 bytes";
260     return nullptr;
261   }
262   // First, always allocate memory of at least kMinAllocationSize
263   // bytes, and always allocate multiples of kMinAllocationSize bytes
264   // so all memory addresses are nicely byte aligned.
265   size_t rounded_bytes = RoundedBytes(num_bytes);
266 
267   // The BFC allocator tries to find the best fit first.
268   BinNum bin_num = BinNumForSize(rounded_bytes);
269 
270   mutex_lock l(lock_);
271   void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
272   if (ptr != nullptr) {
273     return ptr;
274   }
275 
276   // Try to extend
277   if (Extend(unused_alignment, rounded_bytes)) {
278     ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
279     if (ptr != nullptr) {
280       return ptr;
281     }
282   }
283 
284   // We searched all bins for an existing free chunk to use and
285   // couldn't find one.  This means we must have run out of memory,
286   // Dump the memory log for analysis.
287   if (dump_log_on_failure) {
288     LOG(WARNING) << "Allocator (" << Name() << ") ran out of memory trying "
289                  << "to allocate " << strings::HumanReadableNumBytes(num_bytes)
290                  << ".  Current allocation summary follows.";
291     DumpMemoryLog(rounded_bytes);
292     LOG(WARNING) << RenderOccupancy();
293   }
294   return nullptr;
295 }
296 
FindChunkPtr(BinNum bin_num,size_t rounded_bytes,size_t num_bytes,uint64 freed_before)297 void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes,
298                                  size_t num_bytes, uint64 freed_before) {
299   // First identify the first bin that could satisfy rounded_bytes.
300   for (; bin_num < kNumBins; bin_num++) {
301     // Start searching from the first bin for the smallest chunk that fits
302     // rounded_bytes.
303     Bin* b = BinFromIndex(bin_num);
304     for (auto citer = b->free_chunks.begin(); citer != b->free_chunks.end();
305          ++citer) {
306       const BFCAllocator::ChunkHandle h = (*citer);
307       BFCAllocator::Chunk* chunk = ChunkFromHandle(h);
308       DCHECK(!chunk->in_use());
309       if (freed_before > 0 && freed_before < chunk->freed_count) {
310         continue;
311       }
312       if (chunk->size >= rounded_bytes) {
313         // We found an existing chunk that fits us that wasn't in use, so remove
314         // it from the free bin structure prior to using.
315         RemoveFreeChunkIterFromBin(&b->free_chunks, citer);
316 
317         // If we can break the size of the chunk into two reasonably large
318         // pieces, do so.  In any case don't waste more than
319         // kMaxInternalFragmentation bytes on padding this alloc.
320         const int64 kMaxInternalFragmentation = 128 << 20;  // 128mb
321         if (chunk->size >= rounded_bytes * 2 ||
322             static_cast<int64>(chunk->size) - rounded_bytes >=
323                 kMaxInternalFragmentation) {
324           SplitChunk(h, rounded_bytes);
325           chunk = ChunkFromHandle(h);  // Update chunk pointer in case it moved
326         }
327 
328         // The requested size of the returned chunk is what the user
329         // has allocated.
330         chunk->requested_size = num_bytes;
331         // Assign a unique id and increment the id counter, marking the
332         // chunk as being in use.
333         chunk->allocation_id = next_allocation_id_++;
334 
335         // Update stats.
336         ++stats_.num_allocs;
337         stats_.bytes_in_use += chunk->size;
338         stats_.peak_bytes_in_use =
339             std::max(stats_.peak_bytes_in_use, stats_.bytes_in_use);
340         stats_.largest_alloc_size =
341             std::max<std::size_t>(stats_.largest_alloc_size, chunk->size);
342 
343         VLOG(4) << "Returning: " << chunk->ptr;
344         if (VLOG_IS_ON(4)) {
345           LOG(INFO) << "A: " << RenderOccupancy();
346         }
347         return chunk->ptr;
348       }
349     }
350   }
351 
352   return nullptr;
353 }
354 
SplitChunk(BFCAllocator::ChunkHandle h,size_t num_bytes)355 void BFCAllocator::SplitChunk(BFCAllocator::ChunkHandle h, size_t num_bytes) {
356   // Allocate the new chunk before we do any ChunkFromHandle
357   ChunkHandle h_new_chunk = AllocateChunk();
358 
359   Chunk* c = ChunkFromHandle(h);
360   CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
361 
362   // Create a new chunk starting num_bytes after c
363   BFCAllocator::Chunk* new_chunk = ChunkFromHandle(h_new_chunk);
364   new_chunk->ptr = static_cast<void*>(static_cast<char*>(c->ptr) + num_bytes);
365   region_manager_.set_handle(new_chunk->ptr, h_new_chunk);
366 
367   // Set the new sizes of the chunks.
368   new_chunk->size = c->size - num_bytes;
369   c->size = num_bytes;
370 
371   // The new chunk is not in use.
372   new_chunk->allocation_id = -1;
373 
374   // It inherits the freed time.
375   new_chunk->freed_count = c->freed_count;
376 
377   // Maintain the pointers.
378   // c <-> c_neighbor becomes
379   // c <-> new_chunk <-> c_neighbor
380   BFCAllocator::ChunkHandle h_neighbor = c->next;
381   new_chunk->prev = h;
382   new_chunk->next = h_neighbor;
383   c->next = h_new_chunk;
384   if (h_neighbor != kInvalidChunkHandle) {
385     Chunk* c_neighbor = ChunkFromHandle(h_neighbor);
386     c_neighbor->prev = h_new_chunk;
387   }
388 
389   // Add the newly free chunk to the free bin.
390   InsertFreeChunkIntoBin(h_new_chunk);
391 }
392 
DeallocateRaw(void * ptr)393 void BFCAllocator::DeallocateRaw(void* ptr) {
394   VLOG(1) << "DeallocateRaw " << Name() << " " << RequestedSize(ptr);
395   DeallocateRawInternal(ptr);
396   retry_helper_.NotifyDealloc();
397 }
398 
DeallocateRawInternal(void * ptr)399 void BFCAllocator::DeallocateRawInternal(void* ptr) {
400   if (ptr == nullptr) {
401     LOG(ERROR) << "tried to deallocate nullptr";
402     return;
403   }
404   mutex_lock l(lock_);
405 
406   // Find the chunk from the ptr.
407   BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
408   CHECK(h != kInvalidChunkHandle);
409 
410   // Consider coalescing it.
411   FreeAndMaybeCoalesce(h);
412 
413   if (VLOG_IS_ON(4)) {
414     LOG(INFO) << "F: " << RenderOccupancy();
415   }
416 }
417 
418 // Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1.
419 // We merge Chunk(h2) into Chunk(h1).
Merge(BFCAllocator::ChunkHandle h1,BFCAllocator::ChunkHandle h2)420 void BFCAllocator::Merge(BFCAllocator::ChunkHandle h1,
421                          BFCAllocator::ChunkHandle h2) {
422   Chunk* c1 = ChunkFromHandle(h1);
423   Chunk* c2 = ChunkFromHandle(h2);
424   // We can only merge chunks that are not in use.
425   CHECK(!c1->in_use() && !c2->in_use());
426 
427   // c1's prev doesn't change, still points to the same ptr, and is
428   // still not in use.
429 
430   // Fix up neighbor pointers
431   //
432   // c1 <-> c2 <-> c3 should become
433   // c1 <-> c3
434 
435   BFCAllocator::ChunkHandle h3 = c2->next;
436   c1->next = h3;
437   CHECK(c2->prev == h1);
438   if (h3 != kInvalidChunkHandle) {
439     BFCAllocator::Chunk* c3 = ChunkFromHandle(h3);
440     c3->prev = h1;
441   }
442 
443   // Set the new size
444   c1->size += c2->size;
445 
446   // Pick latest free time.
447   c1->freed_count = std::max(c1->freed_count, c2->freed_count);
448 
449   DeleteChunk(h2);
450 }
451 
DeleteChunk(ChunkHandle h)452 void BFCAllocator::DeleteChunk(ChunkHandle h) {
453   // Delete h and cleanup all state
454   Chunk* c = ChunkFromHandle(h);
455   //  VLOG(4) << "Removing: " << c->ptr;
456   region_manager_.erase(c->ptr);
457   DeallocateChunk(h);
458 }
459 
InsertFreeChunkIntoBin(BFCAllocator::ChunkHandle h)460 void BFCAllocator::InsertFreeChunkIntoBin(BFCAllocator::ChunkHandle h) {
461   Chunk* c = ChunkFromHandle(h);
462   CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
463   BinNum bin_num = BinNumForSize(c->size);
464   Bin* new_bin = BinFromIndex(bin_num);
465   c->bin_num = bin_num;
466   new_bin->free_chunks.insert(h);
467 }
468 
RemoveFreeChunkIterFromBin(BFCAllocator::Bin::FreeChunkSet * free_chunks,const BFCAllocator::Bin::FreeChunkSet::iterator & citer)469 void BFCAllocator::RemoveFreeChunkIterFromBin(
470     BFCAllocator::Bin::FreeChunkSet* free_chunks,
471     const BFCAllocator::Bin::FreeChunkSet::iterator& citer) {
472   ChunkHandle h = *citer;
473   Chunk* c = ChunkFromHandle(h);
474   CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
475   free_chunks->erase(citer);
476   c->bin_num = kInvalidBinNum;
477 }
478 
RemoveFreeChunkFromBin(BFCAllocator::ChunkHandle h)479 void BFCAllocator::RemoveFreeChunkFromBin(BFCAllocator::ChunkHandle h) {
480   Chunk* c = ChunkFromHandle(h);
481   CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
482   CHECK_GT(BinFromIndex(c->bin_num)->free_chunks.erase(h), 0)
483       << "Could not find chunk in bin";
484   c->bin_num = kInvalidBinNum;
485 }
486 
FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h)487 void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) {
488   Chunk* c = ChunkFromHandle(h);
489   CHECK(c->in_use() && (c->bin_num == kInvalidBinNum));
490 
491   // Mark the chunk as no longer in use.
492   c->allocation_id = -1;
493 
494   // Optionally record the free time.
495   if (timing_counter_) {
496     c->freed_count = timing_counter_->next();
497   }
498 
499   // Updates the stats.
500   stats_.bytes_in_use -= c->size;
501 
502   ChunkHandle coalesced_chunk = h;
503 
504   // If the next chunk is free, merge it into c and delete it.
505   if (c->next != kInvalidChunkHandle && !ChunkFromHandle(c->next)->in_use()) {
506     // VLOG(8) << "Merging c->next " << ChunkFromHandle(c->next)->ptr
507     //         << " with c " << c->ptr;
508     RemoveFreeChunkFromBin(c->next);
509     Merge(h, c->next);
510   }
511 
512   // If the previous chunk is free, merge c into it and delete c.
513   if (c->prev != kInvalidChunkHandle && !ChunkFromHandle(c->prev)->in_use()) {
514     // VLOG(8) << "Merging c " << c->ptr << " into c->prev "
515     //         << ChunkFromHandle(c->prev)->ptr;
516 
517     coalesced_chunk = c->prev;
518     RemoveFreeChunkFromBin(c->prev);
519     Merge(c->prev, h);
520   }
521 
522   InsertFreeChunkIntoBin(coalesced_chunk);
523 }
524 
TracksAllocationSizes()525 bool BFCAllocator::TracksAllocationSizes() { return true; }
526 
RequestedSize(const void * ptr)527 size_t BFCAllocator::RequestedSize(const void* ptr) {
528   mutex_lock l(lock_);
529   BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
530   CHECK(h != kInvalidChunkHandle)
531       << "Asked for requested size of pointer we never allocated: " << ptr;
532   BFCAllocator::Chunk* c = ChunkFromHandle(h);
533   return c->requested_size;
534 }
535 
AllocatedSize(const void * ptr)536 size_t BFCAllocator::AllocatedSize(const void* ptr) {
537   mutex_lock l(lock_);
538   BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
539   CHECK(h != kInvalidChunkHandle)
540       << "Asked for allocated size of pointer we never allocated: " << ptr;
541   BFCAllocator::Chunk* c = ChunkFromHandle(h);
542   return c->size;
543 }
544 
AllocationId(const void * ptr)545 int64 BFCAllocator::AllocationId(const void* ptr) {
546   mutex_lock l(lock_);
547   BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
548   CHECK(h != kInvalidChunkHandle)
549       << "Asked for allocation id of pointer we never allocated: " << ptr;
550   BFCAllocator::Chunk* c = ChunkFromHandle(h);
551   return c->allocation_id;
552 }
553 
554 namespace {
555 
RenderRegion(char * rendered,const size_t resolution,const size_t total_render_size,const size_t offset,const void * base_ptr,const void * ptr,const size_t size,const char c)556 void RenderRegion(char* rendered, const size_t resolution,
557                   const size_t total_render_size, const size_t offset,
558                   const void* base_ptr, const void* ptr, const size_t size,
559                   const char c) {
560   const char* base_ptr_c = static_cast<const char*>(base_ptr);
561   const char* ptr_c = static_cast<const char*>(ptr);
562 
563   size_t start_location =
564       ((ptr_c - base_ptr_c + offset) * resolution) / total_render_size;
565   CHECK_GE(start_location, 0);
566   CHECK_LT(start_location, resolution);
567   size_t end_location =
568       ((ptr_c + size - 1 - base_ptr_c + offset) * resolution) /
569       total_render_size;
570   CHECK_GE(end_location, 0);
571   CHECK_LT(end_location, resolution);
572 
573   for (size_t i = start_location; i <= end_location; ++i) {
574     rendered[i] = c;
575   }
576 }
577 
578 }  // namespace
579 
RenderOccupancy()580 string BFCAllocator::RenderOccupancy() {
581   // Make a buffer for the ASCII-art representation.
582   const size_t resolution = 100;
583   char rendered[resolution];
584 
585   // Compute the total region size to render over
586   size_t total_region_size = 0;
587   for (const auto& region : region_manager_.regions()) {
588     total_region_size += region.memory_size();
589   }
590 
591   if (total_region_size == 0) {
592     return "<allocator contains no memory>";
593   }
594 
595   // Start out with everything empty
596   RenderRegion(rendered, resolution, total_region_size, 0, nullptr, nullptr,
597                total_region_size, '_');
598 
599   size_t region_offset = 0;
600   for (const auto& region : region_manager_.regions()) {
601     ChunkHandle h = region_manager_.get_handle(region.ptr());
602     // Then render each chunk left to right.
603     while (h != kInvalidChunkHandle) {
604       Chunk* c = ChunkFromHandle(h);
605       if (c->in_use()) {
606         // Render the wasted space
607         size_t wasted = c->size - c->requested_size;
608         if (wasted > 0) {
609           RenderRegion(rendered, resolution, total_region_size,
610                        region_offset + c->requested_size, region.ptr(), c->ptr,
611                        wasted, 'x');
612         }
613         // Then the occupied space
614         RenderRegion(rendered, resolution, total_region_size, region_offset,
615                      region.ptr(), c->ptr, c->requested_size, '*');
616       }
617       h = c->next;
618     }
619     region_offset += region.memory_size();
620   }
621 
622   return string(rendered, resolution);
623 }
624 
DumpMemoryLog(size_t num_bytes)625 void BFCAllocator::DumpMemoryLog(size_t num_bytes) {
626   const std::array<BinDebugInfo, kNumBins> bin_infos = get_bin_debug_info();
627   for (BinNum bin_num = 0; bin_num < kNumBins; bin_num++) {
628     Bin* b = BinFromIndex(bin_num);
629     const BinDebugInfo& bin_info = bin_infos[bin_num];
630     CHECK_EQ(b->free_chunks.size(),
631              bin_info.total_chunks_in_bin - bin_info.total_chunks_in_use);
632 
633     LOG(INFO) << "Bin (" << b->bin_size
634               << "): \tTotal Chunks: " << bin_info.total_chunks_in_bin
635               << ", Chunks in use: " << bin_info.total_chunks_in_use << ". "
636               << strings::HumanReadableNumBytes(bin_info.total_bytes_in_bin)
637               << " allocated for chunks. "
638               << strings::HumanReadableNumBytes(bin_info.total_bytes_in_use)
639               << " in use in bin. "
640               << strings::HumanReadableNumBytes(
641                      bin_info.total_requested_bytes_in_use)
642               << " client-requested in use in bin.";
643   }
644 
645   // Find the bin that we would have liked to allocate in, so we
646   // can get some further analysis about fragmentation.
647   Bin* b = BinForSize(num_bytes);
648 
649   LOG(INFO) << "Bin for " << strings::HumanReadableNumBytes(num_bytes)
650             << " was " << strings::HumanReadableNumBytes(b->bin_size)
651             << ", Chunk State: ";
652 
653   for (ChunkHandle h : b->free_chunks) {
654     Chunk* c = ChunkFromHandle(h);
655     LOG(INFO) << c->DebugString(this, true);
656   }
657 
658   // Next show the chunks that are in use, and also summarize their
659   // number by size.
660   std::map<size_t, int> in_use_by_size;
661   for (const auto& region : region_manager_.regions()) {
662     ChunkHandle h = region_manager_.get_handle(region.ptr());
663     while (h != kInvalidChunkHandle) {
664       const Chunk* c = ChunkFromHandle(h);
665       if (c->in_use()) {
666         in_use_by_size[c->size]++;
667       }
668       LOG(INFO) << (c->in_use() ? "Chunk" : "Free ") << " at " << c->ptr
669                 << " of size " << c->size
670                 << (timing_counter_
671                         ? strings::StrCat(" freed_count ", c->freed_count)
672                         : "");
673       h = c->next;
674     }
675   }
676 
677   LOG(INFO) << "     Summary of in-use Chunks by size: ";
678   size_t total_bytes = 0;
679   for (auto& it : in_use_by_size) {
680     LOG(INFO) << it.second << " Chunks of size " << it.first << " totalling "
681               << strings::HumanReadableNumBytes(it.first * it.second);
682     total_bytes += (it.first * it.second);
683   }
684   LOG(INFO) << "Sum Total of in-use chunks: "
685             << strings::HumanReadableNumBytes(total_bytes);
686   LOG(INFO) << "Stats: \n" << stats_.DebugString();
687 }
688 
GetStats()689 absl::optional<AllocatorStats> BFCAllocator::GetStats() {
690   mutex_lock l(lock_);
691   return stats_;
692 }
693 
ClearStats()694 void BFCAllocator::ClearStats() {
695   mutex_lock l(lock_);
696   stats_.num_allocs = 0;
697   stats_.peak_bytes_in_use = stats_.bytes_in_use;
698   stats_.largest_alloc_size = 0;
699 }
700 
701 std::array<BFCAllocator::BinDebugInfo, BFCAllocator::kNumBins>
get_bin_debug_info()702 BFCAllocator::get_bin_debug_info() {
703   std::array<BinDebugInfo, kNumBins> bin_infos;
704   for (const auto& region : region_manager_.regions()) {
705     ChunkHandle h = region_manager_.get_handle(region.ptr());
706     while (h != kInvalidChunkHandle) {
707       const Chunk* c = ChunkFromHandle(h);
708       BinNum bin_num = BinNumForSize(c->size);
709       BinDebugInfo& bin_info = bin_infos[bin_num];
710       bin_info.total_bytes_in_bin += c->size;
711       bin_info.total_chunks_in_bin++;
712       if (c->in_use()) {
713         bin_info.total_bytes_in_use += c->size;
714         bin_info.total_requested_bytes_in_use += c->requested_size;
715         bin_info.total_chunks_in_use++;
716       } else {
717         Bin* bin = BinFromIndex(bin_num);
718         CHECK_EQ(bin->free_chunks.count(h), 1);
719         CHECK_EQ(c->bin_num, bin_num);
720       }
721       h = c->next;
722     }
723   }
724   return bin_infos;
725 }
726 
727 }  // namespace tensorflow
728