1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
17 #define TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
18 
19 // Simple LRU pool allocators for various flavors of CPU RAM.
20 
21 #include <atomic>
22 #include <map>
23 #include <memory>
24 #include <vector>
25 #include "tensorflow/core/framework/allocator.h"
26 #include "tensorflow/core/lib/core/bits.h"
27 #include "tensorflow/core/platform/logging.h"
28 #include "tensorflow/core/platform/macros.h"
29 #include "tensorflow/core/platform/mutex.h"
30 #include "tensorflow/core/platform/types.h"
31 
32 namespace tensorflow {
33 
34 // Interface of an object that rounds up integers.
35 class RoundUpInterface {
36  public:
~RoundUpInterface()37   virtual ~RoundUpInterface() {}
38   virtual size_t RoundUp(size_t num_bytes) = 0;
39 };
40 
41 // Size-limited pool of memory buffers obtained from a SubAllocator
42 // instance.  Pool eviction policy is LRU.
43 class PoolAllocator : public Allocator {
44  public:
45   // "pool_size_limit" is the maximum number of returned, re-usable
46   // memory buffers to keep in the pool.  If pool_size_limit == 0, the
47   // pool is effectively a thin wrapper around the allocator.
48   // If "auto_resize" is true, then the pool_size_limit will gradually
49   // be raised so that deallocations happen very rarely, if at all.
50   // Transitory start-up objects may deallocate, but the long-term
51   // working-set should not. Auto-resizing can raise pool_size_limit
52   // but will never lower it.
53   // "allocator" is the object that performs the underlying memory
54   // malloc/free operations.  This object takes ownership of allocator.
55   PoolAllocator(size_t pool_size_limit, bool auto_resize,
56                 SubAllocator* allocator, RoundUpInterface* size_rounder,
57                 string name);
58   ~PoolAllocator() override;
59 
Name()60   string Name() override { return name_; }
61 
62   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
63 
64   void DeallocateRaw(void* ptr) override;
65 
66   // Allocate an unused memory region of size "num_bytes".  Fetch from
67   // the pool if available, otherwise call allocator_.
68   void* Get(size_t num_bytes);
69 
70   // Return a no-longer needed memory region to the pool.  It is an error
71   // to deference "ptr" after this call.  If the pool is full, the least
72   // recently used region will be deallocated.
73   void Put(void* ptr, size_t num_bytes);
74 
75   // Reset the pool to empty.
76   void Clear();
77 
78   // The following accessors permit monitoring the effectiveness of
79   // the pool at avoiding repeated malloc/frees on the underlying
80   // allocator.  Read locks are not taken on the theory that value
81   // consistency with other threads is not important.
82 
83   // Number of Get() requests satisfied from pool.
get_from_pool_count()84   int64 get_from_pool_count() const NO_THREAD_SAFETY_ANALYSIS {
85     return get_from_pool_count_;
86   }
87   // Number of Put() requests.
put_count()88   int64 put_count() const NO_THREAD_SAFETY_ANALYSIS { return put_count_; }
89   // Number of Get() requests requiring a fresh allocation.
allocated_count()90   int64 allocated_count() const NO_THREAD_SAFETY_ANALYSIS {
91     return allocated_count_;
92   }
93   // Number of pool evictions.
evicted_count()94   int64 evicted_count() const NO_THREAD_SAFETY_ANALYSIS {
95     return evicted_count_;
96   }
97   // Current size limit.
size_limit()98   size_t size_limit() const NO_THREAD_SAFETY_ANALYSIS {
99     return pool_size_limit_;
100   }
101 
102  private:
103   struct PtrRecord {
104     void* ptr;
105     size_t num_bytes;
106     PtrRecord* prev;
107     PtrRecord* next;
108   };
109 
110   // Remove "pr" from the double-linked LRU list.
111   void RemoveFromList(PtrRecord* pr) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
112 
113   // Add "pr" to the head of the double-linked LRU list.
114   void AddToList(PtrRecord* pr) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
115 
116   // Delete the least recently used record.
117   void EvictOne() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
118 
119   const string name_;
120   const bool has_size_limit_;
121   const bool auto_resize_;
122   size_t pool_size_limit_;
123   std::unique_ptr<SubAllocator> allocator_;
124   std::unique_ptr<RoundUpInterface> size_rounder_;
125   mutex mutex_;
126   std::multimap<const size_t, PtrRecord*> pool_ GUARDED_BY(mutex_);
127   PtrRecord* lru_head_ GUARDED_BY(mutex_) = nullptr;
128   PtrRecord* lru_tail_ GUARDED_BY(mutex_) = nullptr;
129   int64 get_from_pool_count_ GUARDED_BY(mutex_) = 0;
130   int64 put_count_ GUARDED_BY(mutex_) = 0;
131   int64 allocated_count_ GUARDED_BY(mutex_) = 0;
132   int64 evicted_count_ GUARDED_BY(mutex_) = 0;
133 };
134 
135 // Do-nothing rounder. Passes through sizes unchanged.
136 class NoopRounder : public RoundUpInterface {
137  public:
RoundUp(size_t num_bytes)138   size_t RoundUp(size_t num_bytes) override { return num_bytes; }
139 };
140 
141 // Power of 2 rounder: rounds up to nearest power of 2 size.
142 class Pow2Rounder : public RoundUpInterface {
143  public:
RoundUp(size_t num_bytes)144   size_t RoundUp(size_t num_bytes) override {
145     return 1uLL << Log2Ceiling64(num_bytes);
146   }
147 };
148 
149 class BasicCPUAllocator : public SubAllocator {
150  public:
BasicCPUAllocator(int numa_node,const std::vector<Visitor> & alloc_visitors,const std::vector<Visitor> & free_visitors)151   BasicCPUAllocator(int numa_node, const std::vector<Visitor>& alloc_visitors,
152                     const std::vector<Visitor>& free_visitors)
153       : SubAllocator(alloc_visitors, free_visitors), numa_node_(numa_node) {}
154 
~BasicCPUAllocator()155   ~BasicCPUAllocator() override {}
156 
157   void* Alloc(size_t alignment, size_t num_bytes) override;
158 
159   void Free(void* ptr, size_t num_bytes) override;
160 
161  private:
162   int numa_node_;
163 
164   TF_DISALLOW_COPY_AND_ASSIGN(BasicCPUAllocator);
165 };
166 
167 }  // namespace tensorflow
168 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
169