1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
17
18 #include "absl/memory/memory.h"
19 #include "tensorflow/compiler/xla/service/device_memory_allocator.h"
20 #include "tensorflow/compiler/xla/service/platform_util.h"
21 #include "tensorflow/compiler/xla/shape_util.h"
22 #include "tensorflow/compiler/xla/test.h"
23 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
24 #include "tensorflow/core/platform/test_benchmark.h"
25 #include "tensorflow/core/util/ptr_util.h"
26
27 namespace xla {
28 namespace {
29
TEST(ShapedBufferTest,ScopedShapeBufferAsShapedBufferB71629047)30 TEST(ShapedBufferTest, ScopedShapeBufferAsShapedBufferB71629047) {
31 TF_ASSERT_OK_AND_ASSIGN(auto platforms,
32 xla::PlatformUtil::GetSupportedPlatforms());
33 ASSERT_FALSE(platforms.empty());
34 auto* platform = platforms[0];
35 TF_ASSERT_OK_AND_ASSIGN(auto executors,
36 xla::PlatformUtil::GetStreamExecutors(platform));
37 xla::StreamExecutorMemoryAllocator allocator(platform, executors);
38 const xla::Shape shape = xla::ShapeUtil::MakeShape(xla::F32, {});
39 const int kDeviceOrdinal = 0;
40 auto scoped_buffer = absl::make_unique<xla::ScopedShapedBuffer>(
41 shape, shape, &allocator, kDeviceOrdinal);
42 std::unique_ptr<xla::ShapedBuffer> buffer = std::move(scoped_buffer);
43 buffer = nullptr;
44 }
45
46 class TestAllocator : public DeviceMemoryAllocator {
47 public:
TestAllocator()48 TestAllocator()
49 : DeviceMemoryAllocator(PlatformUtil::GetDefaultPlatform().ValueOrDie()) {
50 }
51
~TestAllocator()52 ~TestAllocator() override {
53 if (!allocations_.empty()) {
54 ADD_FAILURE() << "Some allocations not freed!";
55 }
56 }
57
58 // Pull in two-arg overload of Allocate.
59 using DeviceMemoryAllocator::Allocate;
60
Allocate(int device_ordinal,uint64 size,bool)61 StatusOr<OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
62 bool /*retry_on_failure*/) override {
63 // By contract, we must return null if size == 0.
64 if (size == 0) {
65 return OwningDeviceMemory();
66 }
67 void* buf = malloc(size);
68 allocations_.insert({device_ordinal, buf});
69 return OwningDeviceMemory(se::DeviceMemoryBase(buf, size), device_ordinal,
70 this);
71 }
72
Deallocate(int device_ordinal,se::DeviceMemoryBase mem)73 Status Deallocate(int device_ordinal, se::DeviceMemoryBase mem) override {
74 if (mem.is_null()) {
75 return Status::OK();
76 }
77
78 auto it = allocations_.find({device_ordinal, mem.opaque()});
79 if (it == allocations_.end()) {
80 ADD_FAILURE() << "Allocation not found (double free?)";
81 } else {
82 free(mem.opaque());
83 allocations_.erase(it);
84 }
85 return Status::OK();
86 }
87
AllowsAsynchronousDeallocation() const88 bool AllowsAsynchronousDeallocation() const override { return false; }
89
90 private:
91 std::set<std::pair</*device_ordinal*/ int64, void*>> allocations_;
92 };
93
TEST(ScopedShapedBufferTest,TestMoveAssignmentOperator)94 TEST(ScopedShapedBufferTest, TestMoveAssignmentOperator) {
95 Shape s = ShapeUtil::MakeShape(F32, {1});
96 TestAllocator allocator;
97 ScopedShapedBuffer sb1(s, s, &allocator, /*device_ordinal=*/0);
98 sb1.set_buffer(
99 allocator.Allocate(/*device_ordinal=*/0, /*size=*/42).ValueOrDie(),
100 /*index=*/{});
101
102 ScopedShapedBuffer sb2(s, s, &allocator, /*device_ordinal=*/1);
103 sb2.set_buffer(
104 allocator.Allocate(/*device_ordinal=*/1, /*size=*/10).ValueOrDie(),
105 /*index=*/{});
106
107 sb1 = std::move(sb2);
108
109 // TestAllocator's destructor checks that all memory was freed.
110 }
111
TEST(ScopedShapedBufferTest,TestTakeSubTree)112 TEST(ScopedShapedBufferTest, TestTakeSubTree) {
113 TestAllocator allocator;
114
115 Shape s = ShapeUtil::MakeShape(F32, {1});
116 s = xla::ShapeUtil::MakeTupleShape(std::vector<xla::Shape>(2, s));
117 s = xla::ShapeUtil::MakeTupleShape(std::vector<xla::Shape>(3, s));
118
119 ScopedShapedBuffer sb(s, s, &allocator, /*device_ordinal=*/0);
120 sb.buffers().ForEachMutableElement(
121 [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) {
122 TF_ASSERT_OK_AND_ASSIGN(
123 OwningDeviceMemory m,
124 allocator.Allocate(/*device_ordinal=*/0, /*size=*/77));
125 *buffer = m.Forget();
126 });
127 ShapeTree<se::DeviceMemoryBase> buffers = sb.buffers();
128
129 // Takes a subtree out of 'sb', and verifies the buffers are as expected.
130 xla::ShapeIndex subtree_index = {1};
131 ScopedShapedBuffer output = sb.TakeSubTree(subtree_index);
132
133 output.buffers().ForEachElement([&](const xla::ShapeIndex& sub_index,
134 const se::DeviceMemoryBase& buffer) {
135 xla::ShapeIndex orig_index = subtree_index;
136 for (int i : sub_index) {
137 orig_index.push_back(i);
138 }
139 EXPECT_TRUE(buffers.find(orig_index)->second.IsSameAs(buffer));
140 });
141 sb.buffers().ForEachElement(
142 [&](const xla::ShapeIndex& index, const se::DeviceMemoryBase& buffer) {
143 if (ShapeIndexView(index).StartsWith(subtree_index)) {
144 EXPECT_TRUE(buffer.is_null());
145 } else {
146 EXPECT_TRUE(buffers.find(index)->second.IsSameAs(buffer));
147 }
148 });
149 }
150
151 // Test TakeSubTree with different depths (depth of ShapeTree) and fan-outs
152 // (cardinality of each non-leaf node's children).
BM_TakeSubTree(int iters,int depth,int fan_out)153 void BM_TakeSubTree(int iters, int depth, int fan_out) {
154 tensorflow::testing::StopTiming();
155 TestAllocator allocator;
156 xla::Shape shape = xla::ShapeUtil::MakeShape(xla::F32, {32, 64, 128});
157 for (int i = 0; i < depth; ++i) {
158 std::vector<xla::Shape> shapes(fan_out, shape);
159 shape = xla::ShapeUtil::MakeTupleShape(shapes);
160 }
161 xla::ScopedShapedBuffer shaped_buffer(shape, shape, /*allocator=*/&allocator,
162 /*device_ordinal=*/0);
163 tensorflow::testing::StartTiming();
164 for (int i = 0; i < iters; ++i) {
165 // Extract a buffer from approximately the middle of the first level of the
166 // tree.
167 (void)shaped_buffer.TakeSubTree(/*index=*/{fan_out / 2}).release();
168 }
169 tensorflow::testing::StopTiming();
170 }
171
172 BENCHMARK(BM_TakeSubTree)
173 ->ArgPair(1, 4)
174 ->ArgPair(1, 8)
175 ->ArgPair(1, 32)
176 ->ArgPair(1, 64)
177 ->ArgPair(1, 128)
178 ->ArgPair(1, 256)
179 ->ArgPair(1, 512)
180 ->ArgPair(2, 4)
181 ->ArgPair(2, 8)
182 ->ArgPair(2, 32)
183 ->ArgPair(2, 64)
184 ->ArgPair(2, 128);
185
186 } // anonymous namespace
187 } // namespace xla
188