1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
17 
18 #include "absl/memory/memory.h"
19 #include "tensorflow/compiler/xla/service/device_memory_allocator.h"
20 #include "tensorflow/compiler/xla/service/platform_util.h"
21 #include "tensorflow/compiler/xla/shape_util.h"
22 #include "tensorflow/compiler/xla/test.h"
23 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
24 #include "tensorflow/core/platform/test_benchmark.h"
25 #include "tensorflow/core/util/ptr_util.h"
26 
27 namespace xla {
28 namespace {
29 
TEST(ShapedBufferTest,ScopedShapeBufferAsShapedBufferB71629047)30 TEST(ShapedBufferTest, ScopedShapeBufferAsShapedBufferB71629047) {
31   TF_ASSERT_OK_AND_ASSIGN(auto platforms,
32                           xla::PlatformUtil::GetSupportedPlatforms());
33   ASSERT_FALSE(platforms.empty());
34   auto* platform = platforms[0];
35   TF_ASSERT_OK_AND_ASSIGN(auto executors,
36                           xla::PlatformUtil::GetStreamExecutors(platform));
37   xla::StreamExecutorMemoryAllocator allocator(platform, executors);
38   const xla::Shape shape = xla::ShapeUtil::MakeShape(xla::F32, {});
39   const int kDeviceOrdinal = 0;
40   auto scoped_buffer = absl::make_unique<xla::ScopedShapedBuffer>(
41       shape, shape, &allocator, kDeviceOrdinal);
42   std::unique_ptr<xla::ShapedBuffer> buffer = std::move(scoped_buffer);
43   buffer = nullptr;
44 }
45 
46 class TestAllocator : public DeviceMemoryAllocator {
47  public:
TestAllocator()48   TestAllocator()
49       : DeviceMemoryAllocator(PlatformUtil::GetDefaultPlatform().ValueOrDie()) {
50   }
51 
~TestAllocator()52   ~TestAllocator() override {
53     if (!allocations_.empty()) {
54       ADD_FAILURE() << "Some allocations not freed!";
55     }
56   }
57 
58   // Pull in two-arg overload of Allocate.
59   using DeviceMemoryAllocator::Allocate;
60 
Allocate(int device_ordinal,uint64 size,bool)61   StatusOr<OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
62                                         bool /*retry_on_failure*/) override {
63     // By contract, we must return null if size == 0.
64     if (size == 0) {
65       return OwningDeviceMemory();
66     }
67     void* buf = malloc(size);
68     allocations_.insert({device_ordinal, buf});
69     return OwningDeviceMemory(se::DeviceMemoryBase(buf, size), device_ordinal,
70                               this);
71   }
72 
Deallocate(int device_ordinal,se::DeviceMemoryBase mem)73   Status Deallocate(int device_ordinal, se::DeviceMemoryBase mem) override {
74     if (mem.is_null()) {
75       return Status::OK();
76     }
77 
78     auto it = allocations_.find({device_ordinal, mem.opaque()});
79     if (it == allocations_.end()) {
80       ADD_FAILURE() << "Allocation not found (double free?)";
81     } else {
82       free(mem.opaque());
83       allocations_.erase(it);
84     }
85     return Status::OK();
86   }
87 
AllowsAsynchronousDeallocation() const88   bool AllowsAsynchronousDeallocation() const override { return false; }
89 
90  private:
91   std::set<std::pair</*device_ordinal*/ int64, void*>> allocations_;
92 };
93 
TEST(ScopedShapedBufferTest,TestMoveAssignmentOperator)94 TEST(ScopedShapedBufferTest, TestMoveAssignmentOperator) {
95   Shape s = ShapeUtil::MakeShape(F32, {1});
96   TestAllocator allocator;
97   ScopedShapedBuffer sb1(s, s, &allocator, /*device_ordinal=*/0);
98   sb1.set_buffer(
99       allocator.Allocate(/*device_ordinal=*/0, /*size=*/42).ValueOrDie(),
100       /*index=*/{});
101 
102   ScopedShapedBuffer sb2(s, s, &allocator, /*device_ordinal=*/1);
103   sb2.set_buffer(
104       allocator.Allocate(/*device_ordinal=*/1, /*size=*/10).ValueOrDie(),
105       /*index=*/{});
106 
107   sb1 = std::move(sb2);
108 
109   // TestAllocator's destructor checks that all memory was freed.
110 }
111 
TEST(ScopedShapedBufferTest,TestTakeSubTree)112 TEST(ScopedShapedBufferTest, TestTakeSubTree) {
113   TestAllocator allocator;
114 
115   Shape s = ShapeUtil::MakeShape(F32, {1});
116   s = xla::ShapeUtil::MakeTupleShape(std::vector<xla::Shape>(2, s));
117   s = xla::ShapeUtil::MakeTupleShape(std::vector<xla::Shape>(3, s));
118 
119   ScopedShapedBuffer sb(s, s, &allocator, /*device_ordinal=*/0);
120   sb.buffers().ForEachMutableElement(
121       [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) {
122         TF_ASSERT_OK_AND_ASSIGN(
123             OwningDeviceMemory m,
124             allocator.Allocate(/*device_ordinal=*/0, /*size=*/77));
125         *buffer = m.Forget();
126       });
127   ShapeTree<se::DeviceMemoryBase> buffers = sb.buffers();
128 
129   // Takes a subtree out of 'sb', and verifies the buffers are as expected.
130   xla::ShapeIndex subtree_index = {1};
131   ScopedShapedBuffer output = sb.TakeSubTree(subtree_index);
132 
133   output.buffers().ForEachElement([&](const xla::ShapeIndex& sub_index,
134                                       const se::DeviceMemoryBase& buffer) {
135     xla::ShapeIndex orig_index = subtree_index;
136     for (int i : sub_index) {
137       orig_index.push_back(i);
138     }
139     EXPECT_TRUE(buffers.find(orig_index)->second.IsSameAs(buffer));
140   });
141   sb.buffers().ForEachElement(
142       [&](const xla::ShapeIndex& index, const se::DeviceMemoryBase& buffer) {
143         if (ShapeIndexView(index).StartsWith(subtree_index)) {
144           EXPECT_TRUE(buffer.is_null());
145         } else {
146           EXPECT_TRUE(buffers.find(index)->second.IsSameAs(buffer));
147         }
148       });
149 }
150 
151 // Test TakeSubTree with different depths (depth of ShapeTree) and fan-outs
152 // (cardinality of each non-leaf node's children).
BM_TakeSubTree(int iters,int depth,int fan_out)153 void BM_TakeSubTree(int iters, int depth, int fan_out) {
154   tensorflow::testing::StopTiming();
155   TestAllocator allocator;
156   xla::Shape shape = xla::ShapeUtil::MakeShape(xla::F32, {32, 64, 128});
157   for (int i = 0; i < depth; ++i) {
158     std::vector<xla::Shape> shapes(fan_out, shape);
159     shape = xla::ShapeUtil::MakeTupleShape(shapes);
160   }
161   xla::ScopedShapedBuffer shaped_buffer(shape, shape, /*allocator=*/&allocator,
162                                         /*device_ordinal=*/0);
163   tensorflow::testing::StartTiming();
164   for (int i = 0; i < iters; ++i) {
165     // Extract a buffer from approximately the middle of the first level of the
166     // tree.
167     (void)shaped_buffer.TakeSubTree(/*index=*/{fan_out / 2}).release();
168   }
169   tensorflow::testing::StopTiming();
170 }
171 
172 BENCHMARK(BM_TakeSubTree)
173     ->ArgPair(1, 4)
174     ->ArgPair(1, 8)
175     ->ArgPair(1, 32)
176     ->ArgPair(1, 64)
177     ->ArgPair(1, 128)
178     ->ArgPair(1, 256)
179     ->ArgPair(1, 512)
180     ->ArgPair(2, 4)
181     ->ArgPair(2, 8)
182     ->ArgPair(2, 32)
183     ->ArgPair(2, 64)
184     ->ArgPair(2, 128);
185 
186 }  // anonymous namespace
187 }  // namespace xla
188