1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
17 
18 #include "tensorflow/core/common_runtime/pool_allocator.h"
19 
20 #include "gpu_init.h"
21 #include "tensorflow/core/common_runtime/device/device_host_allocator.h"
22 #include "tensorflow/core/platform/stream_executor.h"
23 #include "tensorflow/core/platform/test.h"
24 namespace tensorflow {
25 namespace {
26 
TEST(PoolAllocatorTest,ZeroSizeBuffers)27 TEST(PoolAllocatorTest, ZeroSizeBuffers) {
28   se::Platform* platform =
29       se::MultiPlatformManager::PlatformWithName(GpuPlatformName())
30           .ValueOrDie();
31   PoolAllocator pool(
32       2 /*pool_size_limit*/, false /*auto_resize*/,
33       new DeviceHostAllocator(
34           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
35               .ValueOrDie(),
36           0 /*numa_node*/, {}, {}),
37       new NoopRounder, "pool");
38 
39   EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/));
40   pool.DeallocateRaw(nullptr);  // Should not crash.
41   EXPECT_EQ(0, pool.get_from_pool_count());
42   EXPECT_EQ(0, pool.put_count());
43   EXPECT_EQ(0, pool.allocated_count());
44   EXPECT_EQ(0, pool.evicted_count());
45 }
46 
TEST(PoolAllocatorTest,ZeroSizePool)47 TEST(PoolAllocatorTest, ZeroSizePool) {
48   se::Platform* platform =
49       se::MultiPlatformManager::PlatformWithName(GpuPlatformName())
50           .ValueOrDie();
51   PoolAllocator pool(
52       0 /*pool_size_limit*/, false /*auto_resize*/,
53       new DeviceHostAllocator(
54           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
55               .ValueOrDie(),
56           0 /*numa_node*/, {}, {}),
57       new NoopRounder, "pool");
58 
59   EXPECT_EQ(0, pool.get_from_pool_count());
60   EXPECT_EQ(0, pool.put_count());
61   EXPECT_EQ(0, pool.allocated_count());
62   EXPECT_EQ(0, pool.evicted_count());
63 
64   // All allocations should bypass the pool and return valid pointers.
65   for (int i = 0; i < 3; ++i) {
66     void* p0 = pool.AllocateRaw(4, 0);
67     void* p4 = pool.AllocateRaw(4, 4);
68     void* p12 = pool.AllocateRaw(4, 12);
69     EXPECT_EQ(nullptr, p0);
70     EXPECT_NE(nullptr, p4);
71     EXPECT_NE(nullptr, p12);
72     pool.DeallocateRaw(p0);
73     pool.DeallocateRaw(p4);
74     pool.DeallocateRaw(p12);
75   }
76   EXPECT_EQ(0, pool.get_from_pool_count());
77   EXPECT_EQ(0, pool.put_count());
78   EXPECT_EQ(0, pool.allocated_count());
79   EXPECT_EQ(0, pool.evicted_count());
80 }
81 
TEST(PoolAllocatorTest,Alignment)82 TEST(PoolAllocatorTest, Alignment) {
83   se::Platform* platform =
84       se::MultiPlatformManager::PlatformWithName(GpuPlatformName())
85           .ValueOrDie();
86   PoolAllocator pool(
87       0 /*pool_size_limit*/, false /*auto_resize*/,
88       new DeviceHostAllocator(
89           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
90               .ValueOrDie(),
91           0 /*numa_node*/, {}, {}),
92       new NoopRounder, "pool");
93   for (int i = 0; i < 16; ++i) {
94     size_t alignment = 1 << i;
95     void* p = pool.AllocateRaw(alignment, 111);
96     EXPECT_TRUE(p != nullptr);
97     EXPECT_EQ(0, reinterpret_cast<int64>(p) & (alignment - 1))
98         << "ptr: " << p << " alignment " << alignment;
99     // Intentionally don't deallocate, to test that destruction of
100     // the PoolAllocator frees all pending memory.
101   }
102 }
103 
TEST(PoolAllocatorTest,AutoResize)104 TEST(PoolAllocatorTest, AutoResize) {
105   PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/,
106                      new BasicCPUAllocator(0 /*numa_node*/, {}, {}),
107                      new NoopRounder, "pool");
108 
109   // Alloc/dealloc 10 sizes just a few times, confirming pool size
110   // stays at 2.
111   for (int i = 0; i < 10; ++i) {
112     void* p = pool.AllocateRaw(4, 64 << i);
113     pool.DeallocateRaw(p);
114   }
115   EXPECT_EQ(0, pool.get_from_pool_count());
116   EXPECT_EQ(10, pool.allocated_count());
117   EXPECT_EQ(10, pool.put_count());
118   EXPECT_EQ(8, pool.evicted_count());
119   EXPECT_EQ(2, pool.size_limit());
120 
121   // Then repeat 1200 times.  Pool size limit should jump to 100.
122   for (int j = 0; j < 120; ++j) {
123     for (int i = 0; i < 10; ++i) {
124       void* p = pool.AllocateRaw(4, 64 << i);
125       pool.DeallocateRaw(p);
126     }
127   }
128   EXPECT_EQ(100, pool.size_limit());
129 }
130 
TEST(PoolAllocatorTest,CudaHostAllocator)131 TEST(PoolAllocatorTest, CudaHostAllocator) {
132   int alloc_count = 0;
133   int64 alloc_size = 0;
134   SubAllocator::Visitor alloc_visitor =
135       [&alloc_count, &alloc_size](void* ptr, int numa_node, int64 size) {
136         ++alloc_count;
137         alloc_size += size;
138       };
139   int free_count = 0;
140   int64 free_size = 0;
141   SubAllocator::Visitor free_visitor =
142       [&free_count, &free_size](void* ptr, int numa_node, int64 size) {
143         ++free_count;
144         free_size += size;
145       };
146   se::Platform* platform =
147       se::MultiPlatformManager::PlatformWithName(GpuPlatformName())
148           .ValueOrDie();
149   DeviceHostAllocator* sub_allocator = new DeviceHostAllocator(
150       platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
151           .ValueOrDie(),
152       0 /*numa_node*/, {alloc_visitor}, {free_visitor});
153   PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/,
154                      sub_allocator, new NoopRounder, "pool");
155   EXPECT_EQ(0, alloc_count);
156   EXPECT_EQ(0, alloc_size);
157   EXPECT_EQ(0, free_count);
158   EXPECT_EQ(0, free_size);
159 
160   // Repeatedly Get a 16-byte value, confirming that there's only
161   // one real allocation.
162   void* p1_16 = pool.AllocateRaw(4, 16);
163   EXPECT_EQ(0, pool.get_from_pool_count());
164   EXPECT_EQ(1, pool.allocated_count());
165   EXPECT_NE(nullptr, p1_16);
166   EXPECT_EQ(1, alloc_count);  // Underlying suballoc of 16 bytes
167   // Each suballocation includes a 16B ChunkPrefix.
168   static const int kChunkPrefixSize = 16;
169   EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
170   pool.DeallocateRaw(p1_16);
171   // Pool contents {16}
172   EXPECT_EQ(1, pool.put_count());
173   void* p2_16 = pool.AllocateRaw(4, 16);  // Get it again.
174   EXPECT_EQ(1, pool.get_from_pool_count());
175   EXPECT_EQ(1, pool.allocated_count());
176   EXPECT_EQ(p1_16, p2_16);    // Same pointer value
177   pool.DeallocateRaw(p2_16);  // Put it back.
178   // Pool contents {16}
179   EXPECT_EQ(2, pool.put_count());
180   EXPECT_EQ(1, alloc_count);  // Underlying suballoc of 16 bytes
181   EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
182   EXPECT_EQ(0, free_count);
183 
184   // Get two more values of different sizes.
185   void* p3_4 = pool.AllocateRaw(4, 4);
186   EXPECT_EQ(2, pool.allocated_count());
187   EXPECT_NE(p1_16, p3_4);  // Different pointer value
188   EXPECT_NE(nullptr, p3_4);
189   pool.DeallocateRaw(p3_4);  // Put it back. Pool is now full.
190   // Pool contents {4, 16}
191   EXPECT_EQ(3, pool.put_count());
192   void* p4_2 = pool.AllocateRaw(4, 2);  // Get a third size buffer.
193   EXPECT_NE(nullptr, p4_2);
194   EXPECT_EQ(0, pool.evicted_count());
195   EXPECT_EQ(3, alloc_count);
196   EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
197   EXPECT_EQ(0, free_count);
198 
199   // The pool is full: when we put back p4_2, the 16-byte buffer
200   // should be evicted since it was least recently inserted.
201   pool.DeallocateRaw(p4_2);
202   // Pool contents {2, 4}
203   EXPECT_EQ(4, pool.put_count());
204   EXPECT_EQ(1, pool.evicted_count());
205   EXPECT_EQ(3, alloc_count);
206   EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
207   EXPECT_EQ(1, free_count);
208   EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
209 
210   // Re-getting and putting size 2 or 4 should not alter pool size or
211   // num-evicted.
212   void* p5_4 = pool.AllocateRaw(4, 4);
213   EXPECT_NE(nullptr, p5_4);
214   pool.DeallocateRaw(p5_4);
215   void* p6_2 = pool.AllocateRaw(4, 2);
216   EXPECT_NE(nullptr, p6_2);
217   pool.DeallocateRaw(p6_2);
218   EXPECT_EQ(3, pool.get_from_pool_count());
219   EXPECT_EQ(6, pool.put_count());
220   EXPECT_EQ(3, pool.allocated_count());
221   EXPECT_EQ(1, pool.evicted_count());
222   EXPECT_EQ(3, alloc_count);
223   EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
224   EXPECT_EQ(1, free_count);
225   EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
226 
227   pool.Clear();
228   EXPECT_EQ(0, pool.get_from_pool_count());
229   EXPECT_EQ(0, pool.put_count());
230   EXPECT_EQ(0, pool.allocated_count());
231   EXPECT_EQ(0, pool.evicted_count());
232   EXPECT_EQ(3, alloc_count);
233   EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
234   EXPECT_EQ(3, free_count);
235   EXPECT_EQ(16 + 4 + 2 + (free_count * kChunkPrefixSize), free_size);
236 }
237 
TEST(PoolAllocatorTest,Pow2Rounder)238 TEST(PoolAllocatorTest, Pow2Rounder) {
239   Pow2Rounder rounder;
240   EXPECT_EQ(1, rounder.RoundUp(1));
241   EXPECT_EQ(2, rounder.RoundUp(2));
242   EXPECT_EQ(16, rounder.RoundUp(9));
243   EXPECT_EQ(16, rounder.RoundUp(16));
244   EXPECT_EQ(65536, rounder.RoundUp(41234));
245   EXPECT_EQ(65536, rounder.RoundUp(65535));
246   EXPECT_EQ(65536, rounder.RoundUp(65536));
247 }
248 
TEST(PoolAllocatorTest,Name)249 TEST(PoolAllocatorTest, Name) {
250   se::Platform* platform =
251       se::MultiPlatformManager::PlatformWithName(GpuPlatformName())
252           .ValueOrDie();
253   PoolAllocator pool(
254       2 /*pool_size_limit*/, false /*auto_resize*/,
255       new DeviceHostAllocator(
256           platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
257               .ValueOrDie(),
258           0 /*numa_node*/, {}, {}),
259       new NoopRounder, "pool");
260   EXPECT_EQ("pool", pool.Name());
261 }
262 
263 }  // namespace
264 }  // namespace tensorflow
265 
266 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
267