1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // This approach to arenas overcomes many of the limitations described
17 // in the "Specialized allocators" section of
18 //     http://www.pdos.lcs.mit.edu/~dm/c++-new.html
19 //
20 // A somewhat similar approach to Gladiator, but for heap-detection, was
21 // suggested by Ron van der Wal and Scott Meyers at
22 //     http://www.aristeia.com/BookErrata/M27Comments_frames.html
23 
24 #include "tensorflow/core/lib/core/arena.h"
25 
26 #include <assert.h>
27 
28 #include <algorithm>
29 #include <vector>
30 
31 #include "tensorflow/core/lib/math/math_util.h"
32 #include "tensorflow/core/platform/logging.h"
33 #include "tensorflow/core/platform/macros.h"
34 #include "tensorflow/core/platform/mem.h"
35 
36 namespace tensorflow {
37 namespace core {
38 
39 // ----------------------------------------------------------------------
40 // Arena::Arena()
41 // Arena::~Arena()
42 //    Destroying the arena automatically calls Reset()
43 // ----------------------------------------------------------------------
44 
Arena(const size_t block_size)45 Arena::Arena(const size_t block_size)
46     : remaining_(0),
47       block_size_(block_size),
48       freestart_(nullptr),  // set for real in Reset()
49       blocks_alloced_(1),
50       overflow_blocks_(nullptr) {
51   assert(block_size > kDefaultAlignment);
52 
53   first_blocks_[0].mem =
54       reinterpret_cast<char*>(port::AlignedMalloc(block_size_, sizeof(void*)));
55 
56   first_blocks_[0].size = block_size_;
57 
58   Reset();
59 }
60 
~Arena()61 Arena::~Arena() {
62   FreeBlocks();
63   assert(overflow_blocks_ == nullptr);  // FreeBlocks() should do that
64   // The first X blocks stay allocated always by default.  Delete them now.
65   for (size_t i = 0; i < blocks_alloced_; ++i) {
66     port::AlignedFree(first_blocks_[i].mem);
67   }
68 }
69 
70 // Returns true iff it advances freestart_ to the first position
71 // satisfying alignment without exhausting the current block.
SatisfyAlignment(size_t alignment)72 bool Arena::SatisfyAlignment(size_t alignment) {
73   const size_t overage = reinterpret_cast<size_t>(freestart_) & (alignment - 1);
74   if (overage > 0) {
75     const size_t waste = alignment - overage;
76     if (waste >= remaining_) {
77       return false;
78     }
79     freestart_ += waste;
80     remaining_ -= waste;
81   }
82   DCHECK_EQ(size_t{0}, reinterpret_cast<size_t>(freestart_) & (alignment - 1));
83   return true;
84 }
85 
86 // ----------------------------------------------------------------------
87 // Arena::Reset()
88 //    Clears all the memory an arena is using.
89 // ----------------------------------------------------------------------
90 
Reset()91 void Arena::Reset() {
92   FreeBlocks();
93   freestart_ = first_blocks_[0].mem;
94   remaining_ = first_blocks_[0].size;
95 
96   // There is no guarantee the first block is properly aligned, so
97   // enforce that now.
98   CHECK(SatisfyAlignment(kDefaultAlignment));
99 
100   freestart_when_empty_ = freestart_;
101 }
102 
103 // ----------------------------------------------------------------------
104 // Arena::MakeNewBlock()
105 //    Our sbrk() equivalent.  We always make blocks of the same size
106 //    (though GetMemory() can also make a new block for really big
107 //    data.
108 // ----------------------------------------------------------------------
109 
MakeNewBlock(const uint32 alignment)110 void Arena::MakeNewBlock(const uint32 alignment) {
111   AllocatedBlock* block = AllocNewBlock(block_size_, alignment);
112   freestart_ = block->mem;
113   remaining_ = block->size;
114   CHECK(SatisfyAlignment(alignment));
115 }
116 
LeastCommonMultiple(uint32 a,uint32 b)117 static uint32 LeastCommonMultiple(uint32 a, uint32 b) {
118   if (a > b) {
119     return (a / MathUtil::GCD<uint32>(a, b)) * b;
120   } else if (a < b) {
121     return (b / MathUtil::GCD<uint32>(b, a)) * a;
122   } else {
123     return a;
124   }
125 }
126 
127 // -------------------------------------------------------------
128 // Arena::AllocNewBlock()
129 //    Adds and returns an AllocatedBlock.
130 //    The returned AllocatedBlock* is valid until the next call
131 //    to AllocNewBlock or Reset.  (i.e. anything that might
132 //    affect overflow_blocks_).
133 // -------------------------------------------------------------
134 
AllocNewBlock(const size_t block_size,const uint32 alignment)135 Arena::AllocatedBlock* Arena::AllocNewBlock(const size_t block_size,
136                                             const uint32 alignment) {
137   AllocatedBlock* block;
138   // Find the next block.
139   if (blocks_alloced_ < TF_ARRAYSIZE(first_blocks_)) {
140     // Use one of the pre-allocated blocks
141     block = &first_blocks_[blocks_alloced_++];
142   } else {  // oops, out of space, move to the vector
143     if (overflow_blocks_ == nullptr)
144       overflow_blocks_ = new std::vector<AllocatedBlock>;
145     // Adds another block to the vector.
146     overflow_blocks_->resize(overflow_blocks_->size() + 1);
147     // block points to the last block of the vector.
148     block = &overflow_blocks_->back();
149   }
150 
151   // NOTE(tucker): this utility is made slightly more complex by
152   // not disallowing the case where alignment > block_size.
153   // Can we, without breaking existing code?
154 
155   // Must be a multiple of kDefaultAlignment, unless requested
156   // alignment is 1, in which case we don't care at all.
157   uint32 adjusted_alignment =
158       (alignment > 1 ? LeastCommonMultiple(alignment, kDefaultAlignment) : 1);
159   // Required minimum alignment for port::AlignedMalloc().
160   adjusted_alignment =
161       std::max(adjusted_alignment, static_cast<uint32>(sizeof(void*)));
162 
163   CHECK_LE(adjusted_alignment, static_cast<uint32>(1 << 20))
164       << "Alignment on boundaries greater than 1MB not supported.";
165 
166   // If block_size > alignment we force block_size to be a multiple
167   // of alignment; if block_size < alignment we make no adjustment.
168   size_t adjusted_block_size = block_size;
169   if (adjusted_block_size > adjusted_alignment) {
170     const uint32 excess = adjusted_block_size % adjusted_alignment;
171     adjusted_block_size += (excess > 0 ? adjusted_alignment - excess : 0);
172   }
173   block->mem = reinterpret_cast<char*>(
174       port::AlignedMalloc(adjusted_block_size, adjusted_alignment));
175   block->size = adjusted_block_size;
176   CHECK(nullptr != block->mem) << "block_size=" << block_size
177                                << " adjusted_block_size=" << adjusted_block_size
178                                << " alignment=" << alignment
179                                << " adjusted_alignment=" << adjusted_alignment;
180 
181   return block;
182 }
183 
184 // ----------------------------------------------------------------------
185 // Arena::GetMemoryFallback()
186 //    We take memory out of our pool, aligned on the byte boundary
187 //    requested.  If we don't have space in our current pool, we
188 //    allocate a new block (wasting the remaining space in the
189 //    current block) and give you that.  If your memory needs are
190 //    too big for a single block, we make a special your-memory-only
191 //    allocation -- this is equivalent to not using the arena at all.
192 // ----------------------------------------------------------------------
193 
GetMemoryFallback(const size_t size,const int alignment)194 void* Arena::GetMemoryFallback(const size_t size, const int alignment) {
195   if (0 == size) {
196     return nullptr;  // stl/stl_alloc.h says this is okay
197   }
198 
199   // alignment must be a positive power of 2.
200   CHECK(alignment > 0 && 0 == (alignment & (alignment - 1)));
201 
202   // If the object is more than a quarter of the block size, allocate
203   // it separately to avoid wasting too much space in leftover bytes.
204   if (block_size_ == 0 || size > block_size_ / 4) {
205     return AllocNewBlock(size, alignment)->mem;
206   }
207 
208   // Enforce alignment on freestart_ then check for adequate space,
209   // which may require starting a new block.
210   if (!SatisfyAlignment(alignment) || size > remaining_) {
211     MakeNewBlock(alignment);
212   }
213   CHECK_LE(size, remaining_);
214 
215   remaining_ -= size;
216   void* result = freestart_;
217   freestart_ += size;
218 
219   return result;
220 }
221 
222 // ----------------------------------------------------------------------
223 // Arena::ReturnMemoryFallback()
224 // Arena::FreeBlocks()
225 //    Unlike GetMemory(), which does actual work, ReturnMemory() is a
226 //    no-op: we don't "free" memory until Reset() is called.  We do
227 //    update some stats, though.  Note we do no checking that the
228 //    pointer you pass in was actually allocated by us, or that it
229 //    was allocated for the size you say, so be careful here!
230 //       FreeBlocks() does the work for Reset(), actually freeing all
231 //    memory allocated in one fell swoop.
232 // ----------------------------------------------------------------------
233 
FreeBlocks()234 void Arena::FreeBlocks() {
235   for (size_t i = 1; i < blocks_alloced_; ++i) {  // keep first block alloced
236     port::AlignedFree(first_blocks_[i].mem);
237     first_blocks_[i].mem = nullptr;
238     first_blocks_[i].size = 0;
239   }
240   blocks_alloced_ = 1;
241   if (overflow_blocks_ != nullptr) {
242     std::vector<AllocatedBlock>::iterator it;
243     for (it = overflow_blocks_->begin(); it != overflow_blocks_->end(); ++it) {
244       port::AlignedFree(it->mem);
245     }
246     delete overflow_blocks_;  // These should be used very rarely
247     overflow_blocks_ = nullptr;
248   }
249 }
250 
251 }  // namespace core
252 }  // namespace tensorflow
253