1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <atomic>
17 
18 #include "tensorflow/core/framework/allocator.h"
19 #include "tensorflow/core/framework/allocator_registry.h"
20 #include "tensorflow/core/framework/tracking_allocator.h"
21 #include "tensorflow/core/lib/strings/strcat.h"
22 #include "tensorflow/core/lib/strings/stringprintf.h"
23 #include "tensorflow/core/platform/mem.h"
24 #include "tensorflow/core/platform/mutex.h"
25 #include "tensorflow/core/platform/types.h"
26 
27 namespace tensorflow {
28 
29 // If true, cpu allocator collects more stats.
30 static bool cpu_allocator_collect_stats = false;
31 
EnableCPUAllocatorStats()32 void EnableCPUAllocatorStats() { cpu_allocator_collect_stats = true; }
DisableCPUAllocatorStats()33 void DisableCPUAllocatorStats() { cpu_allocator_collect_stats = false; }
CPUAllocatorStatsEnabled()34 bool CPUAllocatorStatsEnabled() { return cpu_allocator_collect_stats; }
35 
36 static const int kMaxTotalAllocationWarnings = 1;
37 
38 static const int kMaxSingleAllocationWarnings = 5;
39 
40 // If cpu_allocator_collect_stats is true, warn when the total allocated memory
41 // exceeds this threshold.
42 static const double kTotalAllocationWarningThreshold = 0.5;
43 
44 // Individual allocations large than this amount will trigger a warning.
45 static const double kLargeAllocationWarningThreshold = 0.1;
46 
47 // Cache first invocation to port::AvailableRam, as it can be expensive.
LargeAllocationWarningBytes()48 static int64_t LargeAllocationWarningBytes() {
49   static int64_t value = static_cast<int64>(port::AvailableRam() *
50                                             kLargeAllocationWarningThreshold);
51   return value;
52 }
53 
TotalAllocationWarningBytes()54 static int64_t TotalAllocationWarningBytes() {
55   static int64_t value = static_cast<int64>(port::AvailableRam() *
56                                             kTotalAllocationWarningThreshold);
57   return value;
58 }
59 
60 namespace {
61 
62 // A default Allocator for CPU devices.  ProcessState::GetCPUAllocator() will
63 // return a different version that may perform better, but may also lack the
64 // optional stats triggered by the functions above.  TODO(tucker): migrate all
65 // uses of cpu_allocator() except tests to use ProcessState instead.
66 class CPUAllocator : public Allocator {
67  public:
CPUAllocator()68   CPUAllocator()
69       : single_allocation_warning_count_(0),
70         total_allocation_warning_count_(0) {}
71 
~CPUAllocator()72   ~CPUAllocator() override {}
73 
Name()74   string Name() override { return "cpu"; }
75 
AllocateRaw(size_t alignment,size_t num_bytes)76   void* AllocateRaw(size_t alignment, size_t num_bytes) override {
77     if (num_bytes > static_cast<size_t>(LargeAllocationWarningBytes()) &&
78         single_allocation_warning_count_ < kMaxSingleAllocationWarnings) {
79       ++single_allocation_warning_count_;
80       LOG(WARNING) << "Allocation of " << num_bytes << " exceeds "
81                    << 100 * kLargeAllocationWarningThreshold
82                    << "% of free system memory.";
83     }
84 
85     void* p = port::AlignedMalloc(num_bytes, alignment);
86     if (cpu_allocator_collect_stats) {
87       const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p);
88       mutex_lock l(mu_);
89       ++stats_.num_allocs;
90       stats_.bytes_in_use += alloc_size;
91       stats_.peak_bytes_in_use =
92           std::max<int64>(stats_.peak_bytes_in_use, stats_.bytes_in_use);
93       stats_.largest_alloc_size =
94           std::max<int64>(stats_.largest_alloc_size, alloc_size);
95 
96       if (stats_.bytes_in_use > TotalAllocationWarningBytes() &&
97           total_allocation_warning_count_ < kMaxTotalAllocationWarnings) {
98         ++total_allocation_warning_count_;
99         LOG(WARNING) << "Total allocated memory " << stats_.bytes_in_use
100                      << "exceeds " << 100 * kTotalAllocationWarningThreshold
101                      << "% of free system memory";
102       }
103     }
104     return p;
105   }
106 
DeallocateRaw(void * ptr)107   void DeallocateRaw(void* ptr) override {
108     if (cpu_allocator_collect_stats) {
109       const std::size_t alloc_size =
110           port::MallocExtension_GetAllocatedSize(ptr);
111       mutex_lock l(mu_);
112       stats_.bytes_in_use -= alloc_size;
113     }
114     port::AlignedFree(ptr);
115   }
116 
GetStats()117   absl::optional<AllocatorStats> GetStats() override {
118     mutex_lock l(mu_);
119     return stats_;
120   }
121 
ClearStats()122   void ClearStats() override {
123     mutex_lock l(mu_);
124     stats_.num_allocs = 0;
125     stats_.peak_bytes_in_use = stats_.bytes_in_use;
126     stats_.largest_alloc_size = 0;
127   }
128 
AllocatedSizeSlow(const void * ptr) const129   size_t AllocatedSizeSlow(const void* ptr) const override {
130     return port::MallocExtension_GetAllocatedSize(ptr);
131   }
132 
133  private:
134   mutex mu_;
135   AllocatorStats stats_ TF_GUARDED_BY(mu_);
136 
137   // Use <atomic> for single allocations to avoid mutex contention when
138   // statistics are disabled.
139   std::atomic<int> single_allocation_warning_count_;
140   int total_allocation_warning_count_ TF_GUARDED_BY(mu_);
141 
142   TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator);
143 };
144 
145 class CPUAllocatorFactory : public AllocatorFactory {
146  public:
CreateAllocator()147   Allocator* CreateAllocator() override { return new CPUAllocator; }
148 
CreateSubAllocator(int numa_node)149   SubAllocator* CreateSubAllocator(int numa_node) override {
150     return new CPUSubAllocator(new CPUAllocator);
151   }
152 
153  private:
154   class CPUSubAllocator : public SubAllocator {
155    public:
CPUSubAllocator(CPUAllocator * cpu_allocator)156     explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
157         : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
158 
Alloc(size_t alignment,size_t num_bytes,size_t * bytes_received)159     void* Alloc(size_t alignment, size_t num_bytes,
160                 size_t* bytes_received) override {
161       *bytes_received = num_bytes;
162       return cpu_allocator_->AllocateRaw(alignment, num_bytes);
163     }
164 
Free(void * ptr,size_t num_bytes)165     void Free(void* ptr, size_t num_bytes) override {
166       cpu_allocator_->DeallocateRaw(ptr);
167     }
168 
SupportsCoalescing() const169     bool SupportsCoalescing() const override { return false; }
170 
171    private:
172     CPUAllocator* cpu_allocator_;
173   };
174 };
175 
176 REGISTER_MEM_ALLOCATOR("DefaultCPUAllocator", 100, CPUAllocatorFactory);
177 }  // namespace
178 
179 }  // namespace tensorflow
180