1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_
17 #define TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_
18 
19 #include <array>
20 #include <deque>
21 #include <memory>
22 #include <string>
23 #include <unordered_map>
24 #include <vector>
25 
26 #include "absl/container/flat_hash_set.h"
27 #include "tensorflow/core/common_runtime/allocator_retry.h"
28 #include "tensorflow/core/common_runtime/shared_counter.h"
29 #include "tensorflow/core/framework/allocator.h"
30 #include "tensorflow/core/lib/strings/numbers.h"
31 #include "tensorflow/core/lib/strings/strcat.h"
32 #include "tensorflow/core/platform/macros.h"
33 #include "tensorflow/core/platform/mutex.h"
34 #include "tensorflow/core/platform/thread_annotations.h"
35 #include "tensorflow/core/platform/types.h"
36 
37 namespace tensorflow {
38 
39 class MemoryDump;
40 
41 // A memory allocator that implements a 'best-fit with coalescing'
42 // algorithm.  This is essentially a very simple version of Doug Lea's
43 // malloc (dlmalloc).
44 //
45 // The goal of this allocator is to support defragmentation via
46 // coalescing.  One assumption we make is that the process using this
47 // allocator owns pretty much all of the memory, and that nearly
48 // all requests to allocate memory go through this interface.
49 class BFCAllocator : public Allocator {
50  public:
51   // Takes ownership of sub_allocator.
52   BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
53                bool allow_growth, const string& name,
54                bool garbage_collection = false);
55   ~BFCAllocator() override;
56 
Name()57   string Name() override { return name_; }
58 
AllocateRaw(size_t alignment,size_t num_bytes)59   void* AllocateRaw(size_t alignment, size_t num_bytes) override {
60     return AllocateRaw(alignment, num_bytes, AllocationAttributes());
61   }
62 
63   void* AllocateRaw(size_t alignment, size_t num_bytes,
64                     const AllocationAttributes& allocation_attr) override;
65 
66   void DeallocateRaw(void* ptr) override;
67 
68   bool TracksAllocationSizes() const override;
69 
70   size_t RequestedSize(const void* ptr) const override;
71 
72   size_t AllocatedSize(const void* ptr) const override;
73 
74   int64 AllocationId(const void* ptr) const override;
75 
76   absl::optional<AllocatorStats> GetStats() override;
77 
78   void ClearStats() override;
79 
SetTimingCounter(SharedCounter * sc)80   void SetTimingCounter(SharedCounter* sc) { timing_counter_ = sc; }
81 
82   void SetSafeFrontier(uint64 count) override;
83 
ShouldRecordOpName()84   bool ShouldRecordOpName() const { return true; }
85 
86   MemoryDump RecordMemoryMap();
87 
88  private:
89   struct Bin;
90 
91   void* AllocateRawInternal(size_t alignment, size_t num_bytes,
92                             bool dump_log_on_failure,
93                             uint64 freed_before_count);
94 
95   void* AllocateRawInternalWithRetry(
96       size_t alignment, size_t num_bytes,
97       const AllocationAttributes& allocation_attr);
98 
99   void DeallocateRawInternal(void* ptr);
100 
101   // Chunks whose freed_at_count is later than the safe frontier value are kept
102   // on a special list and not subject to merging immediately upon being freed.
103   //
104   // This function sweeps that list looking for Chunks whose timestamp is now
105   // safe. When found their freed_at_count is set to 0 and we attempt to merge
106   // them with their neighbors.
107   //
108   // If required_bytes > 0 then this function is being called in the context of
109   // a need for this many bytes that could not be satisfied without merging
110   // unsafe chunks, so we go ahead and merge the unsafe chunks too, just up to
111   // the point that a free chunk of required_bytes is produced.  Note that
112   // unsafe merged chunks adopt the most conservative timestamp from their
113   // constituents so they're only useful for allocations not requiring a
114   // particular timestamp.
115   bool MergeTimestampedChunks(size_t required_bytes)
116       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
117 
118   // Return the largest free chunk bytes from the largest bin in constant time.
119   // The free chunks are sorted by size (and then address) in a bin.
120   int64 LargestFreeChunk() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
121 
122   // Add TraceMe (in memory allocation and deallocation) for memory stats
123   // profiling. The chunk_ptr is passed to get information such as address,
124   // chunk size and requested_size.
125   void AddTraceMe(absl::string_view traceme_name, const void* ptr)
126       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
127 
128   // Overloaded AddTraceMe function with chunk information.
129   void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr,
130                   int64 req_bytes, int64 alloc_bytes)
131       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
132 
133   // A ChunkHandle is an index into the chunks_ vector in BFCAllocator
134   // kInvalidChunkHandle means an invalid chunk
135   typedef size_t ChunkHandle;
136   static constexpr ChunkHandle kInvalidChunkHandle = SIZE_MAX;
137 
138   typedef int BinNum;
139   static constexpr int kInvalidBinNum = -1;
140   // The following means that the largest bin'd chunk size is 256 << 21 = 512MB.
141   static constexpr int kNumBins = 21;
142 
143   // A Chunk points to a piece of memory that's either entirely free or entirely
144   // in use by one user memory allocation.
145   //
146   // An AllocationRegion's memory is split up into one or more disjoint Chunks,
147   // which together cover the whole region without gaps.  Chunks participate in
148   // a doubly-linked list, and the prev/next pointers point to the physically
149   // adjacent chunks.
150   //
151   // Since a chunk cannot be partially in use, we may need to split a free chunk
152   // in order to service a user allocation.  We always merge adjacent free
153   // chunks.
154   //
155   // Chunks contain information about whether they are in use or whether they
156   // are free, and contain a pointer to the bin they are in.
157   struct Chunk {
158     size_t size = 0;  // Full size of buffer.
159 
160     // We sometimes give chunks that are larger than needed to reduce
161     // fragmentation.  requested_size keeps track of what the client
162     // actually wanted so we can understand whether our splitting
163     // strategy is efficient.
164     size_t requested_size = 0;
165 
166     // allocation_id is set to -1 when the chunk is not in use. It is assigned a
167     // value greater than zero before the chunk is returned from
168     // AllocateRaw, and this value is unique among values assigned by
169     // the parent allocator.
170     int64 allocation_id = -1;
171     void* ptr = nullptr;  // pointer to granted subbuffer.
172 
173     // If not kInvalidChunkHandle, the memory referred to by 'prev' is directly
174     // preceding the memory used by this chunk.  E.g., It should start
175     // at 'ptr - prev->size'
176     ChunkHandle prev = kInvalidChunkHandle;
177 
178     // If not kInvalidChunkHandle, the memory referred to by 'next' is directly
179     // following the memory used by this chunk.  E.g., It should be at
180     // 'ptr + size'
181     ChunkHandle next = kInvalidChunkHandle;
182 
183     // What bin are we in?
184     BinNum bin_num = kInvalidBinNum;
185 
186     // Optional count when this chunk was most recently made free.
187     uint64 freed_at_count = 0;
188 
in_useChunk189     bool in_use() const { return allocation_id != -1; }
190 
191     // optional debugging info
192     const char* op_name = nullptr;
193     uint64 step_id = 0;
194     uint64 action_count = 0;
195 
196     // Get the op name used for memory debugging.
GetDebugOpNameChunk197     const char* GetDebugOpName() const {
198       // If chunk is not in use, although the op_name pointer is not nullptr,
199       // the corresponding OpKernel might have already been deallocated, and the
200       // op_name pointer might point to invalid memory. So in this case, return
201       // a special op name "UNUSED";
202       if (!in_use())
203         return "UNUSED";
204       else if (op_name)
205         return op_name;
206       else
207         return "UNKNOWN";
208     }
209 
DebugStringChunk210     string DebugString(BFCAllocator* a,
211                        bool recurse) TF_NO_THREAD_SAFETY_ANALYSIS {
212       string dbg;
213       strings::StrAppend(
214           &dbg, "  Size: ", strings::HumanReadableNumBytes(size),
215           " | Requested Size: ", strings::HumanReadableNumBytes(requested_size),
216           " | in_use: ", in_use(), " | bin_num: ", bin_num);
217       if (recurse && prev != BFCAllocator::kInvalidChunkHandle) {
218         Chunk* p = a->ChunkFromHandle(prev);
219         strings::StrAppend(&dbg, ", prev: ", p->DebugString(a, false));
220       }
221       if (recurse && next != BFCAllocator::kInvalidChunkHandle) {
222         Chunk* n = a->ChunkFromHandle(next);
223         strings::StrAppend(&dbg, ", next: ", n->DebugString(a, false));
224       }
225       strings::StrAppend(&dbg, ", for: ", GetDebugOpName(),
226                          ", stepid: ", step_id,
227                          ", last_action: ", action_count);
228       return dbg;
229     }
230   };
231 
232   // A Bin is a collection of similar-sized free chunks.
233   // Allocated chunks are never in a Bin.
234   struct Bin {
235     // All chunks in this bin have >= bin_size memory.
236     size_t bin_size = 0;
237 
238     class ChunkComparator {
239      public:
ChunkComparatorBin240       explicit ChunkComparator(BFCAllocator* allocator)
241           : allocator_(allocator) {}
242       // Sort first by size and then use pointer address as a tie breaker.
operatorBin243       bool operator()(const ChunkHandle ha,
244                       const ChunkHandle hb) const TF_NO_THREAD_SAFETY_ANALYSIS {
245         const Chunk* a = allocator_->ChunkFromHandle(ha);
246         const Chunk* b = allocator_->ChunkFromHandle(hb);
247         if (a->size != b->size) {
248           return a->size < b->size;
249         }
250         return a->ptr < b->ptr;
251       }
252 
253      private:
254       BFCAllocator* allocator_;  // The parent allocator
255     };
256 
257     typedef std::set<ChunkHandle, ChunkComparator> FreeChunkSet;
258     // List of free chunks within the bin, sorted by chunk size.
259     // Chunk * not owned.
260     FreeChunkSet free_chunks;
BinBin261     Bin(BFCAllocator* allocator, size_t bs)
262         : bin_size(bs), free_chunks(ChunkComparator(allocator)) {}
263   };
264 
265   static constexpr size_t kMinAllocationBits = 8;
266   static constexpr size_t kMinAllocationSize = 1 << kMinAllocationBits;
267 
268   // BFCAllocator allocates memory into a collection of disjoint
269   // AllocationRegions.  Each AllocationRegion corresponds to one call to
270   // SubAllocator::Alloc().  (Actually, if a subsequent call to
271   // SubAllocator::Alloc() returns another region immediately adjacent to the
272   // last, it will be used to extend the first AllocationRegion, not create a
273   // separate one.)
274   //
275   // An AllocationRegion contains one or more Chunks, covering all of its
276   // memory.  Its primary job is to map pointers to ChunkHandles.
277   //
278   // This class is thread-compatible.
279   class AllocationRegion {
280    public:
AllocationRegion(void * ptr,size_t memory_size)281     AllocationRegion(void* ptr, size_t memory_size)
282         : ptr_(ptr),
283           memory_size_(memory_size),
284           end_ptr_(
285               static_cast<void*>(static_cast<char*>(ptr_) + memory_size_)) {
286       DCHECK_EQ(0, memory_size % kMinAllocationSize);
287       const size_t n_handles =
288           (memory_size + kMinAllocationSize - 1) / kMinAllocationSize;
289       handles_.resize(n_handles, kInvalidChunkHandle);
290     }
291 
292     AllocationRegion() = default;
AllocationRegion(AllocationRegion && other)293     AllocationRegion(AllocationRegion&& other) { Swap(&other); }
294     AllocationRegion& operator=(AllocationRegion&& other) {
295       Swap(&other);
296       return *this;
297     }
298 
ptr()299     void* ptr() const { return ptr_; }
end_ptr()300     void* end_ptr() const { return end_ptr_; }
memory_size()301     size_t memory_size() const { return memory_size_; }
extend(size_t size)302     void extend(size_t size) {
303       memory_size_ += size;
304       DCHECK_EQ(0, memory_size_ % kMinAllocationSize);
305 
306       end_ptr_ = static_cast<void*>(static_cast<char*>(end_ptr_) + size);
307       const size_t n_handles =
308           (memory_size_ + kMinAllocationSize - 1) / kMinAllocationSize;
309       handles_.resize(n_handles, kInvalidChunkHandle);
310     }
get_handle(const void * p)311     ChunkHandle get_handle(const void* p) const {
312       return handles_[IndexFor(p)];
313     }
set_handle(const void * p,ChunkHandle h)314     void set_handle(const void* p, ChunkHandle h) { handles_[IndexFor(p)] = h; }
erase(const void * p)315     void erase(const void* p) { set_handle(p, kInvalidChunkHandle); }
316 
317    private:
Swap(AllocationRegion * other)318     void Swap(AllocationRegion* other) {
319       std::swap(ptr_, other->ptr_);
320       std::swap(memory_size_, other->memory_size_);
321       std::swap(end_ptr_, other->end_ptr_);
322       std::swap(handles_, other->handles_);
323     }
324 
IndexFor(const void * p)325     size_t IndexFor(const void* p) const {
326       std::uintptr_t p_int = reinterpret_cast<std::uintptr_t>(p);
327       std::uintptr_t base_int = reinterpret_cast<std::uintptr_t>(ptr_);
328       DCHECK_GE(p_int, base_int);
329       DCHECK_LT(p_int, base_int + memory_size_);
330       return static_cast<size_t>(((p_int - base_int) >> kMinAllocationBits));
331     }
332 
333     // Metadata about the allocation region.
334     void* ptr_ = nullptr;
335     size_t memory_size_ = 0;
336     void* end_ptr_ = nullptr;
337 
338     // Array of size "memory_size / kMinAllocationSize".  It is
339     // indexed by (p-base) / kMinAllocationSize, contains ChunkHandle
340     // for the memory allocation represented by "p"
341     std::vector<ChunkHandle> handles_;
342 
343     TF_DISALLOW_COPY_AND_ASSIGN(AllocationRegion);
344   };
345 
346   // RegionManager aggregates one or more "AllocationRegions" and provides
347   // a layer of indirection from pointers to the underlying ChunkHandle,
348   // allowing allocation across multiple discontiguous memory regions.
349   //
350   // This class is thread-compatible.
351   class RegionManager {
352    public:
RegionManager()353     RegionManager() {}
~RegionManager()354     ~RegionManager() {}
355 
AddAllocationRegion(void * ptr,size_t memory_size)356     void AddAllocationRegion(void* ptr, size_t memory_size) {
357       // Insert sorted by end_ptr.
358       auto entry =
359           std::upper_bound(regions_.begin(), regions_.end(), ptr, &Comparator);
360       regions_.insert(entry, AllocationRegion(ptr, memory_size));
361     }
362 
363     // Adds an alloation region for the given ptr and size, potentially
364     // extending a region if ptr matches the end_ptr of an existing region.
365     // If a region is extended, returns a pointer to the extended region so that
366     // the BFC allocator can reason about chunkification.
AddOrExtendAllocationRegion(void * ptr,size_t memory_size)367     AllocationRegion* AddOrExtendAllocationRegion(void* ptr,
368                                                   size_t memory_size) {
369       // Insert sorted by end_ptr.
370       auto entry =
371           std::upper_bound(regions_.begin(), regions_.end(), ptr, &Comparator);
372       // Check if can be coalesced with preceding region.
373       if (entry != regions_.begin()) {
374         auto preceding_region = entry - 1;
375         if (preceding_region->end_ptr() == ptr) {
376           if (VLOG_IS_ON(1)) {
377             LOG(INFO) << "Extending region " << preceding_region->ptr()
378                       << " of "
379                       << strings::HumanReadableNumBytes(
380                              preceding_region->memory_size())
381                       << "  by " << strings::HumanReadableNumBytes(memory_size)
382                       << " bytes";
383           }
384           preceding_region->extend(memory_size);
385           return &*preceding_region;
386         }
387       }
388       VLOG(1) << "Inserting new region " << ptr << " of "
389               << strings::HumanReadableNumBytes(memory_size);
390       regions_.insert(entry, AllocationRegion(ptr, memory_size));
391       return nullptr;
392     }
393 
RemoveAllocationRegion(std::vector<AllocationRegion>::iterator it)394     std::vector<AllocationRegion>::iterator RemoveAllocationRegion(
395         std::vector<AllocationRegion>::iterator it) {
396       return regions_.erase(it);
397     }
398 
get_handle(const void * p)399     ChunkHandle get_handle(const void* p) const {
400       return RegionFor(p)->get_handle(p);
401     }
402 
set_handle(const void * p,ChunkHandle h)403     void set_handle(const void* p, ChunkHandle h) {
404       return MutableRegionFor(p)->set_handle(p, h);
405     }
erase(const void * p)406     void erase(const void* p) { return MutableRegionFor(p)->erase(p); }
407 
regions()408     const std::vector<AllocationRegion>& regions() const { return regions_; }
409 
410    private:
Comparator(const void * ptr,const AllocationRegion & other)411     static bool Comparator(const void* ptr, const AllocationRegion& other) {
412       return ptr < other.end_ptr();
413     }
414 
MutableRegionFor(const void * p)415     AllocationRegion* MutableRegionFor(const void* p) {
416       return const_cast<AllocationRegion*>(RegionFor(p));
417     }
418 
RegionFor(const void * p)419     const AllocationRegion* RegionFor(const void* p) const {
420       auto entry =
421           std::upper_bound(regions_.begin(), regions_.end(), p, &Comparator);
422 
423       if (entry != regions_.end()) {
424         return &(*entry);
425       }
426 
427       LOG(FATAL) << "Could not find Region for " << p;
428       return nullptr;
429     }
430 
431    private:
432     std::vector<AllocationRegion> regions_;
433   };
434 
435   // Returns 'bytes' rounded up to the next highest kMinAllocationSize.
436   static size_t RoundedBytes(size_t bytes);
437 
438   // Try to add a new memory region that can satisfy an allocation of
439   // 'rounded_bytes' bytes.  Returns true on success and false on
440   // failure.
441   bool Extend(size_t alignment, size_t rounded_bytes)
442       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
443 
444   // Deallocate free regions to give back the memory to suballocator, so that
445   // we can re-allocate a larger region.  The main use scenario of this function
446   // is when OOM happens but we have free regions and the sum of sizes of free
447   // regions and unallocated bytes is larger than the requested size, implying
448   // (external) memory fragmentation.  Returns true if any free regions are
449   // found and freed; false otherwise.
450   bool DeallocateFreeRegions(size_t rounded_bytes);
451 
452   // Helper function to deallocate regions.
453   void DeallocateRegions(const absl::flat_hash_set<void*>& region_ptrs)
454       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
455 
456   // Returns a pointer to an underlying allocated chunk of size
457   // 'rounded_bytes'.
458   void* FindChunkPtr(BinNum bin_num, size_t rounded_bytes, size_t num_bytes,
459                      uint64 freed_before) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
460 
461   // Splits the chunk specified by 'h' into two chunks, one at least
462   // of size 'num_bytes'.
463   void SplitChunk(ChunkHandle h, size_t num_bytes)
464       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
465 
466   // Merges the two chunk handles.  Requires that the chunks are
467   // contiguous in their allocation.
468   void Merge(ChunkHandle h, ChunkHandle h2) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
469 
470   // Adds the chunk 'h' to the proper free bin.
471   void InsertFreeChunkIntoBin(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
472 
473   // Removes the free chunk pointed to by 'c' from the set free_chunks.
474   void RemoveFreeChunkIterFromBin(Bin::FreeChunkSet* free_chunks,
475                                   const Bin::FreeChunkSet::iterator& c)
476       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
477 
478   // Removes a free chunk from the bin.
479   void RemoveFreeChunkFromBin(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
480   void MaybeRemoveFreeChunkFromBin(ChunkHandle h)
481       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
482 
483   // Removes the chunk metadata represented by 'h'.
484   void DeleteChunk(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
485 
486   string RenderOccupancy() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
487   void DumpMemoryLog(size_t num_bytes) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
488   MemoryDump RecordMemoryMapInternal() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
489   void MaybeWriteMemoryMap() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
490 
491   ChunkHandle AllocateChunk() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
492   void DeallocateChunk(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
493 
494   Chunk* ChunkFromHandle(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
495   const Chunk* ChunkFromHandle(ChunkHandle h) const
496       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
497 
498   void MarkFree(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
499 
500   ChunkHandle TryToCoalesce(ChunkHandle h, bool ignore_freed_at)
501       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
502 
503   // Fragmentation is calculated as the reverse ratio of the largest free chunk
504   // size over total free memory, and returns a value within [0, 1].
505   double GetFragmentation() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
506 
507   // Information about a Bin that is useful for debugging.
508   struct BinDebugInfo {
509     size_t total_bytes_in_use = 0;
510     size_t total_bytes_in_bin = 0;
511     size_t total_requested_bytes_in_use = 0;
512     size_t total_chunks_in_use = 0;
513     size_t total_chunks_in_bin = 0;
514   };
515 
516   // Computes and returns a BinDebugInfo for each Bin.
517   std::array<BinDebugInfo, kNumBins> get_bin_debug_info()
518       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
519 
520   AllocatorRetry retry_helper_;
521 
522   // Structures immutable after construction
523   size_t memory_limit_ = 0;
524 
Log2FloorNonZeroSlow(uint64 n)525   inline int Log2FloorNonZeroSlow(uint64 n) {
526     int r = 0;
527     while (n > 0) {
528       r++;
529       n >>= 1;
530     }
531     return r - 1;
532   }
533 
534   // Returns floor(log2(n)).
Log2FloorNonZero(uint64 n)535   inline int Log2FloorNonZero(uint64 n) {
536 #if defined(__GNUC__)
537     return 63 ^ __builtin_clzll(n);
538 #elif defined(PLATFORM_WINDOWS) && (_WIN64)
539     unsigned long index;
540     _BitScanReverse64(&index, n);
541     return index;
542 #else
543     return Log2FloorNonZeroSlow(n);
544 #endif
545   }
546 
547   // Map from bin size to Bin
BinFromIndex(BinNum index)548   Bin* BinFromIndex(BinNum index) {
549     return reinterpret_cast<Bin*>(&(bins_space_[index * sizeof(Bin)]));
550   }
BinNumToSize(BinNum index)551   size_t BinNumToSize(BinNum index) {
552     return static_cast<size_t>(256) << index;
553   }
BinNumForSize(size_t bytes)554   BinNum BinNumForSize(size_t bytes) {
555     uint64 v = std::max<size_t>(bytes, 256) >> kMinAllocationBits;
556     int b = std::min(kNumBins - 1, Log2FloorNonZero(v));
557     return b;
558   }
BinForSize(size_t bytes)559   Bin* BinForSize(size_t bytes) { return BinFromIndex(BinNumForSize(bytes)); }
560 
561   char bins_space_[sizeof(Bin) * kNumBins];
562 
563   // The size of the current region allocation.
564   size_t curr_region_allocation_bytes_;
565 
566   // The total number of allocated bytes by the allocator.
567   size_t total_region_allocated_bytes_ = 0;
568 
569   // An indicator that expansion of a region has hit the limits
570   // of the available memory.
571   bool started_backpedal_ = false;
572 
573   // Whether the allocator will deallocate free regions to avoid OOM due to
574   // memory fragmentation.
575   const bool garbage_collection_;
576 
577   // Whether the allocator will coalesce adjacent sub allocator provided
578   // AllocationRegions. This may be disabled if discrete sub allocator
579   // regions can't be treated as contiguous (e.g. if the allocation refers to
580   // device visible memory which is not adjacent to the other region in the
581   // device's address space).
582   const bool coalesce_regions_;
583 
584   std::unique_ptr<SubAllocator> sub_allocator_;
585   string name_;
586   SharedCounter* timing_counter_ = nullptr;
587   std::deque<ChunkHandle> timestamped_chunks_;
588 
589   std::atomic<uint64> safe_frontier_ = {0};
590 
591   // Structures mutable after construction
592   mutable mutex lock_;
593   RegionManager region_manager_ TF_GUARDED_BY(lock_);
594 
595   std::vector<Chunk> chunks_ TF_GUARDED_BY(lock_);
596 
597   // Pointer to head of linked list of free Chunks
598   ChunkHandle free_chunks_list_ TF_GUARDED_BY(lock_);
599 
600   // Counter containing the next unique identifier to assign to a
601   // newly-created chunk.
602   int64 next_allocation_id_ TF_GUARDED_BY(lock_);
603 
604   // Stats.
605   AllocatorStats stats_ TF_GUARDED_BY(lock_);
606   uint64 action_counter_ TF_GUARDED_BY(lock_);
607 
608   // The circular buffer used to track memory operation history.
609   static constexpr uint64 kMemDebugHistorySize = 4096;
610   int64 size_history_[kMemDebugHistorySize];
611 
612   friend class GPUBFCAllocatorPrivateMethodsTest;
613   friend class GPUBFCAllocatorPrivateMethodsTest_SubAllocatorSpecific;
614   TF_DISALLOW_COPY_AND_ASSIGN(BFCAllocator);
615 };
616 
617 }  // namespace tensorflow
618 
619 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_
620