1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/common_runtime/pool_allocator.h"
17 
18 #include <errno.h>
19 #ifndef _MSC_VER
20 #include <strings.h>
21 #include <sys/mman.h>  // for munmap
22 #endif
23 
24 #include <map>
25 #include <utility>
26 
27 #include "tensorflow/core/lib/strings/numbers.h"
28 #include "tensorflow/core/platform/logging.h"
29 #include "tensorflow/core/platform/mem.h"
30 #include "tensorflow/core/platform/mutex.h"
31 #include "tensorflow/core/platform/numa.h"
32 #include "tensorflow/core/platform/types.h"
33 
34 namespace tensorflow {
35 
PoolAllocator(size_t pool_size_limit,bool auto_resize,SubAllocator * allocator,RoundUpInterface * size_rounder,string name)36 PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize,
37                              SubAllocator* allocator,
38                              RoundUpInterface* size_rounder, string name)
39     : name_(std::move(name)),
40       has_size_limit_(pool_size_limit > 0),
41       auto_resize_(auto_resize),
42       pool_size_limit_(pool_size_limit),
43       allocator_(allocator),
44       size_rounder_(size_rounder) {
45   if (auto_resize) {
46     CHECK_LT(size_t{0}, pool_size_limit)
47         << "size limit must be > 0 if auto_resize is true.";
48   }
49 }
50 
~PoolAllocator()51 PoolAllocator::~PoolAllocator() { Clear(); }
52 
53 namespace {
54 // Pools contain Chunks allocated from the underlying Allocator.
55 // Chunk alignment is always on kPoolAlignment boundaries.  Each Chunk
56 // begins with a descriptor (ChunkPrefix) that gives its size and a
57 // pointer to itself.  The pointer returned to the user is just past
58 // the ChunkPrefix.  If the user asks for a larger alignment, we will
59 // increase the size of the chunk, then adjust the returned user
60 // pointer and also re-write the ChunkPrefix.chunk_ptr value
61 // immediately before it.  This way the Chunk address and size can be
62 // recovered from the returned user pointer, regardless of alignment.
63 // Note that this dereferencing of the pointers means that we cannot
64 // handle GPU memory, only CPU memory.
65 struct ChunkPrefix {
66   size_t num_bytes;
67   void* chunk_ptr;
68 };
69 // kPoolAlignment cannot be less than the size of ChunkPrefix.
70 static const int kPoolAlignment = sizeof(ChunkPrefix);
71 
PrepareChunk(void * chunk,size_t alignment,size_t num_bytes)72 void* PrepareChunk(void* chunk, size_t alignment, size_t num_bytes) {
73   ChunkPrefix* cp = reinterpret_cast<ChunkPrefix*>(chunk);
74   cp->num_bytes = num_bytes;
75   cp->chunk_ptr = chunk;
76   void* user_ptr = reinterpret_cast<void*>(cp + 1);
77   if (alignment > kPoolAlignment) {
78     // Move user_ptr forward to the first satisfying offset, and write
79     // chunk_ptr just before it.
80     size_t aligned_ptr = reinterpret_cast<size_t>(user_ptr) + alignment;
81     user_ptr = reinterpret_cast<void*>(aligned_ptr & ~(alignment - 1));
82     (reinterpret_cast<ChunkPrefix*>(user_ptr) - 1)->chunk_ptr = chunk;
83   }
84   // Safety check that user_ptr is always past the ChunkPrefix.
85   CHECK_GE(user_ptr, reinterpret_cast<ChunkPrefix*>(chunk) + 1);
86   return user_ptr;
87 }
88 
FindPrefix(void * user_ptr)89 ChunkPrefix* FindPrefix(void* user_ptr) {
90   ChunkPrefix* cp = reinterpret_cast<ChunkPrefix*>(user_ptr) - 1;
91   return reinterpret_cast<ChunkPrefix*>(cp->chunk_ptr);
92 }
93 }  // namespace
94 
AllocateRaw(size_t alignment,size_t num_bytes)95 void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
96   if (num_bytes == 0) return nullptr;
97 
98   // If alignment is larger than kPoolAlignment, increase num_bytes so that we
99   // are guaranteed to be able to return an aligned ptr by advancing user_ptr
100   // without overrunning the end of the chunk.
101   if (alignment > kPoolAlignment) {
102     num_bytes += alignment;
103   }
104   num_bytes += sizeof(ChunkPrefix);
105   num_bytes = size_rounder_->RoundUp(num_bytes);
106   PtrRecord* pr = nullptr;
107   if (has_size_limit_) {
108     {
109       mutex_lock lock(mutex_);
110       auto iter = pool_.find(num_bytes);
111       if (iter == pool_.end()) {
112         allocated_count_++;
113         // Deliberately fall out of lock scope before
114         // calling the allocator.  No further modification
115         // to the pool will be performed.
116       } else {
117         get_from_pool_count_++;
118         pr = iter->second;
119         RemoveFromList(pr);
120         pool_.erase(iter);
121         // Fall out of lock scope and do the result without the lock held.
122       }
123     }
124   }
125   if (pr != nullptr) {
126     void* r = pr->ptr;
127     delete pr;
128     return PrepareChunk(r, alignment, num_bytes);
129   } else {
130     void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes);
131     return PrepareChunk(ptr, alignment, num_bytes);
132   }
133 }
134 
DeallocateRaw(void * ptr)135 void PoolAllocator::DeallocateRaw(void* ptr) {
136   if (ptr == nullptr) return;
137   ChunkPrefix* cp = FindPrefix(ptr);
138   CHECK_LE((void*)cp, (void*)ptr);
139   if (!has_size_limit_ && !auto_resize_) {
140     allocator_->Free(cp, cp->num_bytes);
141   } else {
142     mutex_lock lock(mutex_);
143     ++put_count_;
144     while (pool_.size() >= pool_size_limit_) {
145       EvictOne();
146     }
147     PtrRecord* pr = new PtrRecord;
148     pr->num_bytes = cp->num_bytes;
149     pr->ptr = cp;
150     AddToList(pr);
151     pool_.insert(std::make_pair(cp->num_bytes, pr));
152   }
153 }
154 
Clear()155 void PoolAllocator::Clear() {
156   if (has_size_limit_) {
157     mutex_lock lock(mutex_);
158     for (auto iter : pool_) {
159       PtrRecord* pr = iter.second;
160       allocator_->Free(pr->ptr, pr->num_bytes);
161       delete pr;
162     }
163     pool_.clear();
164     get_from_pool_count_ = 0;
165     put_count_ = 0;
166     allocated_count_ = 0;
167     evicted_count_ = 0;
168     lru_head_ = nullptr;
169     lru_tail_ = nullptr;
170   }
171 }
172 
RemoveFromList(PtrRecord * pr)173 void PoolAllocator::RemoveFromList(PtrRecord* pr) {
174   if (pr->prev == nullptr) {
175     DCHECK_EQ(lru_head_, pr);
176     lru_head_ = nullptr;
177   } else {
178     pr->prev->next = pr->next;
179   }
180   if (pr->next == nullptr) {
181     DCHECK_EQ(lru_tail_, pr);
182     lru_tail_ = pr->prev;
183   } else {
184     pr->next->prev = pr->prev;
185     if (lru_head_ == nullptr) {
186       lru_head_ = pr->next;
187     }
188   }
189 }
190 
AddToList(PtrRecord * pr)191 void PoolAllocator::AddToList(PtrRecord* pr) {
192   pr->prev = nullptr;
193   if (lru_head_ == nullptr) {
194     CHECK(lru_tail_ == nullptr);
195     lru_tail_ = pr;
196     pr->next = nullptr;
197   } else {
198     pr->next = lru_head_;
199     pr->next->prev = pr;
200   }
201   lru_head_ = pr;
202 }
203 
EvictOne()204 void PoolAllocator::EvictOne() {
205   DCHECK(lru_tail_ != nullptr);
206   PtrRecord* prec = lru_tail_;
207   RemoveFromList(prec);
208   auto iter = pool_.find(prec->num_bytes);
209   while (iter->second != prec) {
210     ++iter;
211     DCHECK(iter != pool_.end());
212   }
213   pool_.erase(iter);
214   allocator_->Free(prec->ptr, prec->num_bytes);
215   delete prec;
216   ++evicted_count_;
217   // Auto-resizing, and warning messages.
218   static const double kTolerable = 2e-3;
219   static const int kCheckInterval = 1000;
220   static const double kIncreaseFactor = 1.1;
221   static const int kMinPoolSize = 100;
222   if (0 == evicted_count_ % kCheckInterval) {
223     const double eviction_rate =
224         evicted_count_ / static_cast<double>(put_count_);
225     const int64 alloc_request_count = allocated_count_ + get_from_pool_count_;
226     const double alloc_rate =
227         (alloc_request_count == 0)
228             ? 0.0
229             : allocated_count_ / static_cast<double>(alloc_request_count);
230     // Can turn on for debugging purposes.
231     const bool kShouldLog = false;
232     if (kShouldLog) {
233       LOG(INFO) << "PoolAllocator: After " << alloc_request_count
234                 << " get requests, put_count=" << put_count_
235                 << " evicted_count=" << evicted_count_
236                 << " eviction_rate=" << eviction_rate
237                 << " and unsatisfied allocation rate=" << alloc_rate;
238     }
239     if (auto_resize_ && (eviction_rate > kTolerable) &&
240         (alloc_rate > kTolerable)) {
241       size_t new_size_limit = (pool_size_limit_ < kMinPoolSize)
242                                   ? kMinPoolSize
243                                   : (kIncreaseFactor * pool_size_limit_);
244       if (kShouldLog) {
245         LOG(INFO) << "Raising pool_size_limit_ from " << pool_size_limit_
246                   << " to " << new_size_limit;
247       }
248       pool_size_limit_ = new_size_limit;
249       // Reset all the counters so that ratios are relative to new sizes
250       // at next test interval.
251       put_count_ = 0;
252       allocated_count_ = 0;
253       evicted_count_ = 0;
254       get_from_pool_count_ = 0;
255     }
256   }
257 }
258 
Alloc(size_t alignment,size_t num_bytes)259 void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes) {
260   void* ptr = nullptr;
261   if (num_bytes > 0) {
262     if (numa_node_ == port::kNUMANoAffinity) {
263       ptr = port::AlignedMalloc(num_bytes, static_cast<int>(alignment));
264     } else {
265       ptr =
266           port::NUMAMalloc(numa_node_, num_bytes, static_cast<int>(alignment));
267     }
268     VisitAlloc(ptr, numa_node_, num_bytes);
269   }
270   return ptr;
271 }
272 
Free(void * ptr,size_t num_bytes)273 void BasicCPUAllocator::Free(void* ptr, size_t num_bytes) {
274   if (num_bytes > 0) {
275     VisitFree(ptr, numa_node_, num_bytes);
276     if (numa_node_ == port::kNUMANoAffinity) {
277       port::AlignedFree(ptr);
278     } else {
279       port::NUMAFree(ptr, num_bytes);
280     }
281   }
282 }
283 }  // namespace tensorflow
284