1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/common_runtime/dma_helper.h" 17 #include "tensorflow/core/common_runtime/scoped_allocator.h" 18 #include "tensorflow/core/common_runtime/scoped_allocator_mgr.h" 19 #include "tensorflow/core/framework/allocator.h" 20 #include "tensorflow/core/framework/op_kernel.h" 21 #include "tensorflow/core/framework/tensor.h" 22 #include "tensorflow/core/lib/core/errors.h" 23 #include "tensorflow/core/lib/core/status.h" 24 25 namespace tensorflow { 26 27 class ScopedAllocatorOp : public OpKernel { 28 public: ScopedAllocatorOp(OpKernelConstruction * context)29 explicit ScopedAllocatorOp(OpKernelConstruction* context) 30 : OpKernel(context) { 31 OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); 32 OP_REQUIRES_OK(context, context->GetAttr("shapes", &shapes_)); 33 OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); 34 OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); 35 OP_REQUIRES_OK(context, context->GetAttr("expected_call_count", 36 &expected_call_count_)); 37 device_ = context->device(); 38 // Precalculate the size of the backing tensor and the offsets of 39 // the subtensors to be allocated from it, taking into account 40 // alignment considerations. 41 ScopedAllocatorMgr::PopulateFields(id_, shapes_, dtype_, &fields_); 42 size_t num_bytes = fields_.back().offset + fields_.back().bytes_allocated; 43 num_elements_ = num_bytes / DataTypeSize(dtype_); 44 OP_REQUIRES(context, num_bytes % DataTypeSize(dtype_) == 0, 45 errors::InvalidArgument( 46 "Number of bytes ", num_bytes, 47 " must be divisible by size of datatype ", dtype_)); 48 } 49 Compute(OpKernelContext * context)50 void Compute(OpKernelContext* context) override { 51 ScopedAllocatorMgr* sam = device_->GetScopedAllocatorMgr(); 52 if (!sam) { 53 context->SetStatus(errors::Internal( 54 "ScopedAllocatorMgr not supported on device ", device_->name())); 55 return; 56 } 57 Tensor* backing_tensor = nullptr; 58 AllocatorAttributes attr = context->output_alloc_attr(0); 59 Status s = 60 context->allocate_output(0, {num_elements_}, &backing_tensor, attr); 61 VLOG(1) << "_ScopedAllocatorOp " << context->op_kernel().name() 62 << " new backing tensor size " << backing_tensor->TotalBytes() 63 << " num_elements_ " << num_elements_ << " buffer " 64 << DMAHelper::buffer(backing_tensor) << " base addr " 65 << DMAHelper::base(backing_tensor); 66 if (s.ok()) { 67 s = sam->AddScopedAllocator(*backing_tensor, context->step_id(), id_, 68 name_, fields_, expected_call_count_); 69 } 70 if (!s.ok()) { 71 context->SetStatus(s); 72 } 73 } 74 75 private: 76 std::vector<TensorShape> shapes_; 77 DataType dtype_; 78 int64 num_elements_; 79 std::vector<ScopedAllocator::Field> fields_; 80 string name_; 81 int32 id_; 82 int32 expected_call_count_; 83 DeviceBase* device_; 84 }; 85 86 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_CPU), 87 ScopedAllocatorOp); 88 89 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_GPU), 90 ScopedAllocatorOp); 91 92 class ScopedAllocatorConcatOp : public OpKernel { 93 public: ScopedAllocatorConcatOp(OpKernelConstruction * context)94 explicit ScopedAllocatorConcatOp(OpKernelConstruction* context) 95 : OpKernel(context) { 96 OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_)); 97 OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); 98 OP_REQUIRES_OK(context, context->GetAttr("reshape", &reshape_)); 99 // These attributes are just for debugging. 100 OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); 101 OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); 102 device_ = context->device(); 103 } 104 Compute(OpKernelContext * context)105 void Compute(OpKernelContext* context) override { 106 const Tensor& backing_tensor = context->input(0); 107 // Check that type matches. 108 OP_REQUIRES(context, backing_tensor.dtype() == dtype_, 109 errors::InvalidArgument("Backing tensor type ", 110 DataTypeString(backing_tensor.dtype()), 111 " does not match expected type ", 112 DataTypeString(dtype_))); 113 // Check that backing tensor is at least as large as the shape of the 114 // output. 115 OP_REQUIRES(context, backing_tensor.NumElements() >= shape_.num_elements(), 116 errors::InvalidArgument("Backing tensor num elements ", 117 backing_tensor.NumElements(), 118 " is not >= to expected ", 119 shape_.num_elements())); 120 Tensor output(dtype_); 121 if (reshape_) { 122 CHECK(output.CopyFrom(backing_tensor, shape_)); 123 } else { 124 CHECK(output.CopyFrom(backing_tensor, backing_tensor.shape())); 125 } 126 context->set_output(0, output); 127 const TensorBuffer* backing_buf = DMAHelper::buffer(&output); 128 const void* backing_tensor_lb = backing_buf->data(); 129 const void* backing_tensor_ub = static_cast<const void*>( 130 static_cast<const char*>(backing_tensor_lb) + backing_buf->size()); 131 // Check that all inputs lie entirely within the backing tensor. 132 for (int i = 1; i < context->num_inputs(); ++i) { 133 const TensorBuffer* input_buf = DMAHelper::buffer(&context->input(i)); 134 const void* input_lb = input_buf->data(); 135 const void* input_ub = static_cast<const void*>( 136 static_cast<const char*>(input_lb) + input_buf->size()); 137 OP_REQUIRES( 138 context, input_lb >= backing_tensor_lb, 139 errors::InvalidArgument( 140 "Lower bound check fail for input ", i, " from node ", 141 context->op_kernel().requested_input(i), " to node ", 142 context->op_kernel().name(), " input bounds = [", input_lb, ", ", 143 input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb, 144 ", ", backing_tensor_ub, "]")); 145 OP_REQUIRES( 146 context, input_ub <= backing_tensor_ub, 147 errors::InvalidArgument( 148 "Upper bound check fail for input ", i, " from node ", 149 context->op_kernel().requested_input(i), " to node ", 150 context->op_kernel().name(), " input bounds = [", input_lb, ", ", 151 input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb, 152 ", ", backing_tensor_ub, "]")); 153 } 154 VLOG(1) << "_ScopedAllocatorConcatOp outputting backing tensor at " 155 << backing_buf; 156 } 157 158 private: 159 TensorShape shape_; 160 DataType dtype_; 161 string name_; 162 int32 id_; 163 bool reshape_; 164 DeviceBase* device_; 165 }; 166 167 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_CPU), 168 ScopedAllocatorConcatOp); 169 170 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_GPU), 171 ScopedAllocatorConcatOp); 172 173 class ScopedAllocatorSplitOp : public OpKernel { 174 public: ScopedAllocatorSplitOp(OpKernelConstruction * context)175 explicit ScopedAllocatorSplitOp(OpKernelConstruction* context) 176 : OpKernel(context) { 177 OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); 178 // This stuff is just for debugging 179 OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); 180 OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); 181 device_ = context->device(); 182 } 183 Compute(OpKernelContext * context)184 void Compute(OpKernelContext* context) override { 185 Tensor backing_copy(context->input(0)); 186 // Check that type matches. 187 OP_REQUIRES(context, backing_copy.dtype() == dtype_, 188 errors::InvalidArgument("Backing tensor type ", 189 DataTypeString(backing_copy.dtype()), 190 " does not match expected type ", 191 DataTypeString(dtype_))); 192 const TensorBuffer* backing_buf = DMAHelper::buffer(&backing_copy); 193 const void* backing_tensor_lb = backing_buf->data(); 194 const void* backing_tensor_ub = static_cast<const void*>( 195 static_cast<const char*>(backing_tensor_lb) + backing_buf->size()); 196 for (int i = 1; i < context->num_inputs(); ++i) { 197 VLOG(1) << "_ScopedAllocatorSplitOp assigning input " << i 198 << " to output " << i - 1 << " buf addr " 199 << DMAHelper::base(&context->input(i)); 200 Tensor copy(context->input(i)); 201 OP_REQUIRES(context, copy.dtype() == dtype_, 202 errors::InvalidArgument("Input ", i, " tensor type ", 203 DataTypeString(copy.dtype()), 204 " does not match expected type ", 205 DataTypeString(dtype_))); 206 context->set_output(i - 1, copy); 207 const TensorBuffer* input_buf = DMAHelper::buffer(©); 208 const void* input_lb = input_buf->data(); 209 OP_REQUIRES( 210 context, input_lb >= backing_tensor_lb, 211 errors::InvalidArgument("Lower bound check fail for input ", i, 212 " to node ", context->op_kernel().name())); 213 const void* input_ub = static_cast<const void*>( 214 static_cast<const char*>(input_lb) + input_buf->size()); 215 OP_REQUIRES( 216 context, input_ub <= backing_tensor_ub, 217 errors::InvalidArgument("Upper bound check fail for input ", i, 218 " to node ", context->op_kernel().name())); 219 } 220 } 221 222 private: 223 DataType dtype_; 224 string name_; 225 int32 id_; 226 DeviceBase* device_; 227 }; 228 229 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_CPU), 230 ScopedAllocatorSplitOp); 231 232 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_GPU), 233 ScopedAllocatorSplitOp); 234 235 } // namespace tensorflow 236