1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/common_runtime/dma_helper.h"
17 #include "tensorflow/core/common_runtime/scoped_allocator.h"
18 #include "tensorflow/core/common_runtime/scoped_allocator_mgr.h"
19 #include "tensorflow/core/framework/allocator.h"
20 #include "tensorflow/core/framework/op_kernel.h"
21 #include "tensorflow/core/framework/tensor.h"
22 #include "tensorflow/core/lib/core/errors.h"
23 #include "tensorflow/core/lib/core/status.h"
24 
25 namespace tensorflow {
26 
27 class ScopedAllocatorOp : public OpKernel {
28  public:
ScopedAllocatorOp(OpKernelConstruction * context)29   explicit ScopedAllocatorOp(OpKernelConstruction* context)
30       : OpKernel(context) {
31     OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_));
32     OP_REQUIRES_OK(context, context->GetAttr("shapes", &shapes_));
33     OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_));
34     OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
35     OP_REQUIRES_OK(context, context->GetAttr("expected_call_count",
36                                              &expected_call_count_));
37     device_ = context->device();
38     // Precalculate the size of the backing tensor and the offsets of
39     // the subtensors to be allocated from it, taking into account
40     // alignment considerations.
41     ScopedAllocatorMgr::PopulateFields(id_, shapes_, dtype_, &fields_);
42     size_t num_bytes = fields_.back().offset + fields_.back().bytes_allocated;
43     num_elements_ = num_bytes / DataTypeSize(dtype_);
44     OP_REQUIRES(context, num_bytes % DataTypeSize(dtype_) == 0,
45                 errors::InvalidArgument(
46                     "Number of bytes ", num_bytes,
47                     " must be divisible by size of datatype ", dtype_));
48   }
49 
Compute(OpKernelContext * context)50   void Compute(OpKernelContext* context) override {
51     ScopedAllocatorMgr* sam = device_->GetScopedAllocatorMgr();
52     if (!sam) {
53       context->SetStatus(errors::Internal(
54           "ScopedAllocatorMgr not supported on device ", device_->name()));
55       return;
56     }
57     Tensor* backing_tensor = nullptr;
58     AllocatorAttributes attr = context->output_alloc_attr(0);
59     Status s =
60         context->allocate_output(0, {num_elements_}, &backing_tensor, attr);
61     VLOG(1) << "_ScopedAllocatorOp " << context->op_kernel().name()
62             << " new backing tensor size " << backing_tensor->TotalBytes()
63             << " num_elements_ " << num_elements_ << " buffer "
64             << DMAHelper::buffer(backing_tensor) << " base addr "
65             << DMAHelper::base(backing_tensor);
66     if (s.ok()) {
67       s = sam->AddScopedAllocator(*backing_tensor, context->step_id(), id_,
68                                   name_, fields_, expected_call_count_);
69     }
70     if (!s.ok()) {
71       context->SetStatus(s);
72     }
73   }
74 
75  private:
76   std::vector<TensorShape> shapes_;
77   DataType dtype_;
78   int64 num_elements_;
79   std::vector<ScopedAllocator::Field> fields_;
80   string name_;
81   int32 id_;
82   int32 expected_call_count_;
83   DeviceBase* device_;
84 };
85 
86 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_CPU),
87                         ScopedAllocatorOp);
88 
89 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_GPU),
90                         ScopedAllocatorOp);
91 
92 class ScopedAllocatorConcatOp : public OpKernel {
93  public:
ScopedAllocatorConcatOp(OpKernelConstruction * context)94   explicit ScopedAllocatorConcatOp(OpKernelConstruction* context)
95       : OpKernel(context) {
96     OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
97     OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_));
98     OP_REQUIRES_OK(context, context->GetAttr("reshape", &reshape_));
99     // These attributes are just for debugging.
100     OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_));
101     OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
102     device_ = context->device();
103   }
104 
Compute(OpKernelContext * context)105   void Compute(OpKernelContext* context) override {
106     const Tensor& backing_tensor = context->input(0);
107     // Check that type matches.
108     OP_REQUIRES(context, backing_tensor.dtype() == dtype_,
109                 errors::InvalidArgument("Backing tensor type ",
110                                         DataTypeString(backing_tensor.dtype()),
111                                         " does not match expected type ",
112                                         DataTypeString(dtype_)));
113     // Check that backing tensor is at least as large as the shape of the
114     // output.
115     OP_REQUIRES(context, backing_tensor.NumElements() >= shape_.num_elements(),
116                 errors::InvalidArgument("Backing tensor num elements ",
117                                         backing_tensor.NumElements(),
118                                         " is not >= to expected ",
119                                         shape_.num_elements()));
120     Tensor output(dtype_);
121     if (reshape_) {
122       CHECK(output.CopyFrom(backing_tensor, shape_));
123     } else {
124       CHECK(output.CopyFrom(backing_tensor, backing_tensor.shape()));
125     }
126     context->set_output(0, output);
127     const TensorBuffer* backing_buf = DMAHelper::buffer(&output);
128     const void* backing_tensor_lb = backing_buf->data();
129     const void* backing_tensor_ub = static_cast<const void*>(
130         static_cast<const char*>(backing_tensor_lb) + backing_buf->size());
131     // Check that all inputs lie entirely within the backing tensor.
132     for (int i = 1; i < context->num_inputs(); ++i) {
133       const TensorBuffer* input_buf = DMAHelper::buffer(&context->input(i));
134       const void* input_lb = input_buf->data();
135       const void* input_ub = static_cast<const void*>(
136           static_cast<const char*>(input_lb) + input_buf->size());
137       OP_REQUIRES(
138           context, input_lb >= backing_tensor_lb,
139           errors::InvalidArgument(
140               "Lower bound check fail for input ", i, " from node ",
141               context->op_kernel().requested_input(i), " to node ",
142               context->op_kernel().name(), " input bounds = [", input_lb, ", ",
143               input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb,
144               ", ", backing_tensor_ub, "]"));
145       OP_REQUIRES(
146           context, input_ub <= backing_tensor_ub,
147           errors::InvalidArgument(
148               "Upper bound check fail for input ", i, " from node ",
149               context->op_kernel().requested_input(i), " to node ",
150               context->op_kernel().name(), " input bounds = [", input_lb, ", ",
151               input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb,
152               ", ", backing_tensor_ub, "]"));
153     }
154     VLOG(1) << "_ScopedAllocatorConcatOp outputting backing tensor at "
155             << backing_buf;
156   }
157 
158  private:
159   TensorShape shape_;
160   DataType dtype_;
161   string name_;
162   int32 id_;
163   bool reshape_;
164   DeviceBase* device_;
165 };
166 
167 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_CPU),
168                         ScopedAllocatorConcatOp);
169 
170 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_GPU),
171                         ScopedAllocatorConcatOp);
172 
173 class ScopedAllocatorSplitOp : public OpKernel {
174  public:
ScopedAllocatorSplitOp(OpKernelConstruction * context)175   explicit ScopedAllocatorSplitOp(OpKernelConstruction* context)
176       : OpKernel(context) {
177     OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_));
178     // This stuff is just for debugging
179     OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_));
180     OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
181     device_ = context->device();
182   }
183 
Compute(OpKernelContext * context)184   void Compute(OpKernelContext* context) override {
185     Tensor backing_copy(context->input(0));
186     // Check that type matches.
187     OP_REQUIRES(context, backing_copy.dtype() == dtype_,
188                 errors::InvalidArgument("Backing tensor type ",
189                                         DataTypeString(backing_copy.dtype()),
190                                         " does not match expected type ",
191                                         DataTypeString(dtype_)));
192     const TensorBuffer* backing_buf = DMAHelper::buffer(&backing_copy);
193     const void* backing_tensor_lb = backing_buf->data();
194     const void* backing_tensor_ub = static_cast<const void*>(
195         static_cast<const char*>(backing_tensor_lb) + backing_buf->size());
196     for (int i = 1; i < context->num_inputs(); ++i) {
197       VLOG(1) << "_ScopedAllocatorSplitOp assigning input " << i
198               << " to output " << i - 1 << " buf addr "
199               << DMAHelper::base(&context->input(i));
200       Tensor copy(context->input(i));
201       OP_REQUIRES(context, copy.dtype() == dtype_,
202                   errors::InvalidArgument("Input ", i, " tensor type ",
203                                           DataTypeString(copy.dtype()),
204                                           " does not match expected type ",
205                                           DataTypeString(dtype_)));
206       context->set_output(i - 1, copy);
207       const TensorBuffer* input_buf = DMAHelper::buffer(&copy);
208       const void* input_lb = input_buf->data();
209       OP_REQUIRES(
210           context, input_lb >= backing_tensor_lb,
211           errors::InvalidArgument("Lower bound check fail for input ", i,
212                                   " to node ", context->op_kernel().name()));
213       const void* input_ub = static_cast<const void*>(
214           static_cast<const char*>(input_lb) + input_buf->size());
215       OP_REQUIRES(
216           context, input_ub <= backing_tensor_ub,
217           errors::InvalidArgument("Upper bound check fail for input ", i,
218                                   " to node ", context->op_kernel().name()));
219     }
220   }
221 
222  private:
223   DataType dtype_;
224   string name_;
225   int32 id_;
226   DeviceBase* device_;
227 };
228 
229 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_CPU),
230                         ScopedAllocatorSplitOp);
231 
232 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_GPU),
233                         ScopedAllocatorSplitOp);
234 
235 }  // namespace tensorflow
236