1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #if GOOGLE_CUDA
17 
18 #include "tensorflow/core/common_runtime/pool_allocator.h"
19 
20 #include "tensorflow/core/common_runtime/gpu/gpu_host_allocator.h"
21 #include "tensorflow/core/platform/stream_executor.h"
22 #include "tensorflow/core/platform/test.h"
23 
24 namespace tensorflow {
25 namespace {
26 
TEST(PoolAllocatorTest,ZeroSizeBuffers)27 TEST(PoolAllocatorTest, ZeroSizeBuffers) {
28   se::Platform* platform =
29       se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
30   PoolAllocator pool(
31       2 /*pool_size_limit*/, false /*auto_resize*/,
32       new GpuHostAllocator(
33           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
34               .ValueOrDie(),
35           0 /*numa_node*/, {}, {}),
36       new NoopRounder, "pool");
37 
38   EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/));
39   pool.DeallocateRaw(nullptr);  // Should not crash.
40   EXPECT_EQ(0, pool.get_from_pool_count());
41   EXPECT_EQ(0, pool.put_count());
42   EXPECT_EQ(0, pool.allocated_count());
43   EXPECT_EQ(0, pool.evicted_count());
44 }
45 
TEST(PoolAllocatorTest,ZeroSizePool)46 TEST(PoolAllocatorTest, ZeroSizePool) {
47   se::Platform* platform =
48       se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
49   PoolAllocator pool(
50       0 /*pool_size_limit*/, false /*auto_resize*/,
51       new GpuHostAllocator(
52           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
53               .ValueOrDie(),
54           0 /*numa_node*/, {}, {}),
55       new NoopRounder, "pool");
56 
57   EXPECT_EQ(0, pool.get_from_pool_count());
58   EXPECT_EQ(0, pool.put_count());
59   EXPECT_EQ(0, pool.allocated_count());
60   EXPECT_EQ(0, pool.evicted_count());
61 
62   // All allocations should bypass the pool and return valid pointers.
63   for (int i = 0; i < 3; ++i) {
64     void* p0 = pool.AllocateRaw(4, 0);
65     void* p4 = pool.AllocateRaw(4, 4);
66     void* p12 = pool.AllocateRaw(4, 12);
67     EXPECT_EQ(nullptr, p0);
68     EXPECT_NE(nullptr, p4);
69     EXPECT_NE(nullptr, p12);
70     pool.DeallocateRaw(p0);
71     pool.DeallocateRaw(p4);
72     pool.DeallocateRaw(p12);
73   }
74   EXPECT_EQ(0, pool.get_from_pool_count());
75   EXPECT_EQ(0, pool.put_count());
76   EXPECT_EQ(0, pool.allocated_count());
77   EXPECT_EQ(0, pool.evicted_count());
78 }
79 
TEST(PoolAllocatorTest,Alignment)80 TEST(PoolAllocatorTest, Alignment) {
81   se::Platform* platform =
82       se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
83   PoolAllocator pool(
84       0 /*pool_size_limit*/, false /*auto_resize*/,
85       new GpuHostAllocator(
86           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
87               .ValueOrDie(),
88           0 /*numa_node*/, {}, {}),
89       new NoopRounder, "pool");
90   for (int i = 0; i < 16; ++i) {
91     size_t alignment = 1 << i;
92     void* p = pool.AllocateRaw(alignment, 111);
93     EXPECT_TRUE(p != nullptr);
94     EXPECT_EQ(0, reinterpret_cast<int64>(p) & (alignment - 1))
95         << "ptr: " << p << " alignment " << alignment;
96     // Intentionally don't deallocate, to test that destruction of
97     // the PoolAllocator frees all pending memory.
98   }
99 }
100 
TEST(PoolAllocatorTest,AutoResize)101 TEST(PoolAllocatorTest, AutoResize) {
102   PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/,
103                      new BasicCPUAllocator(0 /*numa_node*/, {}, {}),
104                      new NoopRounder, "pool");
105 
106   // Alloc/dealloc 10 sizes just a few times, confirming pool size
107   // stays at 2.
108   for (int i = 0; i < 10; ++i) {
109     void* p = pool.AllocateRaw(4, 64 << i);
110     pool.DeallocateRaw(p);
111   }
112   EXPECT_EQ(0, pool.get_from_pool_count());
113   EXPECT_EQ(10, pool.allocated_count());
114   EXPECT_EQ(10, pool.put_count());
115   EXPECT_EQ(8, pool.evicted_count());
116   EXPECT_EQ(2, pool.size_limit());
117 
118   // Then repeat 1200 times.  Pool size limit should jump to 100.
119   for (int j = 0; j < 120; ++j) {
120     for (int i = 0; i < 10; ++i) {
121       void* p = pool.AllocateRaw(4, 64 << i);
122       pool.DeallocateRaw(p);
123     }
124   }
125   EXPECT_EQ(100, pool.size_limit());
126 }
127 
TEST(PoolAllocatorTest,CudaHostAllocator)128 TEST(PoolAllocatorTest, CudaHostAllocator) {
129   int alloc_count = 0;
130   int64 alloc_size = 0;
131   SubAllocator::Visitor alloc_visitor =
132       [&alloc_count, &alloc_size](void* ptr, int numa_node, int64 size) {
133         ++alloc_count;
134         alloc_size += size;
135       };
136   int free_count = 0;
137   int64 free_size = 0;
138   SubAllocator::Visitor free_visitor =
139       [&free_count, &free_size](void* ptr, int numa_node, int64 size) {
140         ++free_count;
141         free_size += size;
142       };
143   se::Platform* platform =
144       se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
145   GpuHostAllocator* sub_allocator = new GpuHostAllocator(
146       platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
147           .ValueOrDie(),
148       0 /*numa_node*/, {alloc_visitor}, {free_visitor});
149   PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/,
150                      sub_allocator, new NoopRounder, "pool");
151   EXPECT_EQ(0, alloc_count);
152   EXPECT_EQ(0, alloc_size);
153   EXPECT_EQ(0, free_count);
154   EXPECT_EQ(0, free_size);
155 
156   // Repeatedly Get a 16-byte value, confirming that there's only
157   // one real allocation.
158   void* p1_16 = pool.AllocateRaw(4, 16);
159   EXPECT_EQ(0, pool.get_from_pool_count());
160   EXPECT_EQ(1, pool.allocated_count());
161   EXPECT_NE(nullptr, p1_16);
162   EXPECT_EQ(1, alloc_count);  // Underlying suballoc of 16 bytes
163   // Each suballocation includes a 16B ChunkPrefix.
164   static const int kChunkPrefixSize = 16;
165   EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
166   pool.DeallocateRaw(p1_16);
167   // Pool contents {16}
168   EXPECT_EQ(1, pool.put_count());
169   void* p2_16 = pool.AllocateRaw(4, 16);  // Get it again.
170   EXPECT_EQ(1, pool.get_from_pool_count());
171   EXPECT_EQ(1, pool.allocated_count());
172   EXPECT_EQ(p1_16, p2_16);    // Same pointer value
173   pool.DeallocateRaw(p2_16);  // Put it back.
174   // Pool contents {16}
175   EXPECT_EQ(2, pool.put_count());
176   EXPECT_EQ(1, alloc_count);  // Underlying suballoc of 16 bytes
177   EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
178   EXPECT_EQ(0, free_count);
179 
180   // Get two more values of different sizes.
181   void* p3_4 = pool.AllocateRaw(4, 4);
182   EXPECT_EQ(2, pool.allocated_count());
183   EXPECT_NE(p1_16, p3_4);  // Different pointer value
184   EXPECT_NE(nullptr, p3_4);
185   pool.DeallocateRaw(p3_4);  // Put it back. Pool is now full.
186   // Pool contents {4, 16}
187   EXPECT_EQ(3, pool.put_count());
188   void* p4_2 = pool.AllocateRaw(4, 2);  // Get a third size buffer.
189   EXPECT_NE(nullptr, p4_2);
190   EXPECT_EQ(0, pool.evicted_count());
191   EXPECT_EQ(3, alloc_count);
192   EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
193   EXPECT_EQ(0, free_count);
194 
195   // The pool is full: when we put back p4_2, the 16-byte buffer
196   // should be evicted since it was least recently inserted.
197   pool.DeallocateRaw(p4_2);
198   // Pool contents {2, 4}
199   EXPECT_EQ(4, pool.put_count());
200   EXPECT_EQ(1, pool.evicted_count());
201   EXPECT_EQ(3, alloc_count);
202   EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
203   EXPECT_EQ(1, free_count);
204   EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
205 
206   // Re-getting and putting size 2 or 4 should not alter pool size or
207   // num-evicted.
208   void* p5_4 = pool.AllocateRaw(4, 4);
209   EXPECT_NE(nullptr, p5_4);
210   pool.DeallocateRaw(p5_4);
211   void* p6_2 = pool.AllocateRaw(4, 2);
212   EXPECT_NE(nullptr, p6_2);
213   pool.DeallocateRaw(p6_2);
214   EXPECT_EQ(3, pool.get_from_pool_count());
215   EXPECT_EQ(6, pool.put_count());
216   EXPECT_EQ(3, pool.allocated_count());
217   EXPECT_EQ(1, pool.evicted_count());
218   EXPECT_EQ(3, alloc_count);
219   EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
220   EXPECT_EQ(1, free_count);
221   EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
222 
223   pool.Clear();
224   EXPECT_EQ(0, pool.get_from_pool_count());
225   EXPECT_EQ(0, pool.put_count());
226   EXPECT_EQ(0, pool.allocated_count());
227   EXPECT_EQ(0, pool.evicted_count());
228   EXPECT_EQ(3, alloc_count);
229   EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
230   EXPECT_EQ(3, free_count);
231   EXPECT_EQ(16 + 4 + 2 + (free_count * kChunkPrefixSize), free_size);
232 }
233 
TEST(PoolAllocatorTest,Pow2Rounder)234 TEST(PoolAllocatorTest, Pow2Rounder) {
235   Pow2Rounder rounder;
236   EXPECT_EQ(1, rounder.RoundUp(1));
237   EXPECT_EQ(2, rounder.RoundUp(2));
238   EXPECT_EQ(16, rounder.RoundUp(9));
239   EXPECT_EQ(16, rounder.RoundUp(16));
240   EXPECT_EQ(65536, rounder.RoundUp(41234));
241   EXPECT_EQ(65536, rounder.RoundUp(65535));
242   EXPECT_EQ(65536, rounder.RoundUp(65536));
243 }
244 
TEST(PoolAllocatorTest,Name)245 TEST(PoolAllocatorTest, Name) {
246   se::Platform* platform =
247       se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
248   PoolAllocator pool(
249       2 /*pool_size_limit*/, false /*auto_resize*/,
250       new GpuHostAllocator(
251           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
252               .ValueOrDie(),
253           0 /*numa_node*/, {}, {}),
254       new NoopRounder, "pool");
255   EXPECT_EQ("pool", pool.Name());
256 }
257 
258 }  // namespace
259 }  // namespace tensorflow
260 
261 #endif  // GOOGLE_CUDA
262