1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
18 
19 #include <cstdint>
20 #include <memory>
21 
22 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
23 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
24 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
25 #include "tensorflow/lite/delegates/gpu/cl/cl_memory.h"
26 #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
27 #include "tensorflow/lite/delegates/gpu/cl/util.h"
28 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
29 #include "tensorflow/lite/delegates/gpu/common/shape.h"
30 #include "tensorflow/lite/delegates/gpu/common/status.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
33 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
34 #include "tensorflow/lite/delegates/gpu/common/types.h"
35 
36 namespace tflite {
37 namespace gpu {
38 namespace cl {
39 
40 class Tensor : public GPUObject, public GpuSpatialTensor {
41  public:
Tensor()42   Tensor()
43       : memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {}
44   Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
45          const TensorDescriptor& descriptor);
46   Tensor(cl_mem memory, bool memory_owner, const BHWDC& shape,
47          const TensorDescriptor& descriptor);
48   Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
49          const BHWC& shape, const TensorDescriptor& descriptor);
50   Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
51          const BHWDC& shape, const TensorDescriptor& descriptor);
52 
53   // Move only
54   Tensor(Tensor&& tensor);
55   Tensor& operator=(Tensor&& tensor);
56   Tensor(const Tensor&) = delete;
57   Tensor& operator=(const Tensor&) = delete;
58 
~Tensor()59   ~Tensor() override { Release(); }
60 
61   absl::Status GetGPUResources(const GPUObjectDescriptor* obj_ptr,
62                                GPUResourcesWithValue* resources) const override;
63 
Width()64   int Width() const override { return shape_.w; }
Height()65   int Height() const override { return shape_.h; }
Depth()66   int Depth() const override { return shape_.d; }
Channels()67   int Channels() const override { return shape_.c; }
Slices()68   int Slices() const override { return DivideRoundUp(shape_.c, 4); }
Batch()69   int Batch() const override { return shape_.b; }
70 
GetDescriptor()71   TensorDescriptor GetDescriptor() const { return descriptor_; }
GetDataType()72   DataType GetDataType() const { return descriptor_.data_type; }
GetStorageType()73   TensorStorageType GetStorageType() const { return descriptor_.storage_type; }
74 
75   // for profiling and memory statistics
76   uint64_t GetMemorySizeInBytes() const;
77 
78   cl_mem GetMemoryPtr() const;
79 
80   // This function returns buffer memory ptr for IMAGE_BUFFER instead of image
81   // memory ptr.
82   cl_mem GetMemoryPtrForWriting() const;
83 
84   absl::Status WriteData(CLCommandQueue* queue, const TensorFloat32& src);
85   absl::Status WriteData(
86       CLCommandQueue* queue,
87       const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src);
88   absl::Status WriteData(
89       CLCommandQueue* queue,
90       const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src);
91   absl::Status WriteData(CLCommandQueue* queue, const Tensor5DFloat32& src);
92   absl::Status ReadData(CLCommandQueue* queue, TensorFloat32* dst) const;
93   absl::Status ReadData(CLCommandQueue* queue, Tensor5DFloat32* dst) const;
94 
95   absl::Status CreateFromDescriptor(const TensorDescriptor& desc,
96                                     CLContext* context);
97 
98  private:
99   absl::Status IsValid(const BHWC& shape) const;
100   absl::Status IsValid(const BHWDC& shape) const;
101 
102   int GetChannelsAlignment() const;
103   int GetAlignedChannels() const;
104 
105   absl::Status WriteDataBHWDC(const float* in, CLCommandQueue* queue);
106   absl::Status ReadDataBHWDC(float* out, CLCommandQueue* queue) const;
107 
108   int3 GetFullTensorRegion() const;
109   void Release();
110 
111   cl_mem memory_;
112   cl_mem image_buffer_memory_;  // for TensorStorageType::IMAGE_BUFFER only
113   bool memory_owner_;
114   BHWDC shape_;
115   TensorDescriptor descriptor_;
116 };
117 
118 using TensorPtr = std::shared_ptr<Tensor>;
119 
120 absl::Status AllocateTensorMemory(const CLContext& context, const BHWC& shape,
121                                   const TensorDescriptor& descriptor,
122                                   CLMemory* result);
123 
124 absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
125                                   const TensorDescriptor& descriptor,
126                                   CLMemory* result);
127 
128 absl::Status CreateTensor(const CLContext& context, const BHWC& shape,
129                           const TensorDescriptor& descriptor, Tensor* result);
130 
131 absl::Status CreateTensor(const CLContext& context, const BHWDC& shape,
132                           const TensorDescriptor& descriptor, Tensor* result);
133 
134 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
135                                 const BHWC& shape,
136                                 const TensorDescriptor& descriptor,
137                                 Tensor* result);
138 
139 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
140                                 const BHWDC& shape,
141                                 const TensorDescriptor& descriptor,
142                                 Tensor* result);
143 
144 }  // namespace cl
145 }  // namespace gpu
146 }  // namespace tflite
147 
148 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
149