1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_API_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_API_H_ 18 19 #ifdef CL_DELEGATE_NO_GL 20 #define EGL_NO_PROTOTYPES 21 #endif 22 23 #include <EGL/egl.h> 24 25 #include <cstdint> 26 #include <memory> 27 28 #include "absl/types/span.h" 29 #include "tensorflow/lite/delegates/gpu/api.h" 30 #include "tensorflow/lite/delegates/gpu/common/model.h" 31 #include "tensorflow/lite/delegates/gpu/common/status.h" 32 33 // Usage example: 34 // 35 // std::unique_ptr<InferenceEnvironment> env; 36 // RETURN_IF_ERROR(NewInferenceEnvironment(option, &env)); 37 // 38 // InferenceOptions options; 39 // 40 // std::unique_ptr<InferenceBuilder> builder; 41 // RETURN_IF_ERROR(env->NewInferenceBuilder(options, model, &builder)); 42 // // now builder is ready to prepare inference runner. 43 // 44 // ----------------- 45 // Supported formats 46 // ----------------- 47 // 48 // OpenCL implementation uses 2D textures as the primary format. 49 // Tensor in HWDC4 layout is {TEXTURE_2D, RGBA, width := W*D, height := H}. 50 // 51 52 namespace tflite { 53 namespace gpu { 54 namespace cl { 55 56 struct InferenceOptions : public tflite::gpu::InferenceOptions {}; 57 58 // Indicates environment 59 struct InferenceEnvironmentProperties { 60 bool is_opencl_available = false; 61 62 // GL objects (buffers and textures) could be shared with CL context. 63 bool is_gl_sharing_supported = false; 64 65 // Indicates whether fast GL->CL synchronization is supported. 66 bool is_gl_to_cl_fast_sync_supported = false; 67 68 // Indicates whether fast CL->GL synchronization is supported. 69 bool is_cl_to_gl_fast_sync_supported = false; 70 }; 71 72 // Environment manages all resources that need to stay until any inference is 73 // running using OpenCL backend. 74 class InferenceEnvironment { 75 public: ~InferenceEnvironment()76 virtual ~InferenceEnvironment() {} 77 78 // Converts GraphFloat32 into intermediate, device-specific representation. 79 // This serialized_model specific for device and InferenceOptions. 80 // serialized_model cannot be used with another device or InferenceOptions. 81 // Loading serialized_model is much faster than loading GraphFloat32. 82 // serialized_model must be used with appropriate NewInferenceBuilder 83 // method (see below). 84 virtual absl::Status BuildSerializedModel( 85 const InferenceOptions& options, GraphFloat32 model, 86 std::vector<uint8_t>* serialized_model) = 0; 87 88 // std::unique_ptr<InferenceBuilder>* builder - required parameter 89 // std::vector<int64_t>* in_refs - optional, can be nullptr 90 // std::vector<int64_t>* out_refs - optional, can be nullptr 91 virtual absl::Status NewInferenceBuilder( 92 const absl::Span<const uint8_t> serialized_model, 93 std::unique_ptr<InferenceBuilder>* builder, std::vector<int64_t>* in_refs, 94 std::vector<int64_t>* out_refs) = 0; 95 96 virtual absl::Status NewInferenceBuilder( 97 const InferenceOptions& options, GraphFloat32 model, 98 std::unique_ptr<InferenceBuilder>* builder) = 0; 99 100 // Returns opaque binary blob that contains a collection of already compiled 101 // OpenCL kernels present in a cache. Returned data could be re-used later 102 // to speed up compilation time when new environment is created for the same 103 // set of models. 104 // Returned data is valid only if used on the same device, otherwise it will 105 // not be compatible and will be discarded. 106 virtual std::vector<uint8_t> GetSerializedBinaryCache() const = 0; 107 }; 108 109 struct InferenceEnvironmentOptions { 110 // If any of these objects are set, created environment will use them instead 111 // of creating/choosing own instances. 112 cl_device_id device = nullptr; 113 cl_context context = nullptr; 114 cl_command_queue command_queue = nullptr; 115 116 // Whenever input and/or output is GL object, EGL display and context must be 117 // set to create GL aware OpenCL context. Do not set these variables whenever 118 // GL interoperability is not needed. 119 // It is the error to set egl_display, egl_context AND context at the same 120 // time. If egl_display and egl_context are set, they will be used to create 121 // GL-aware CL context. 122 EGLDisplay egl_display = EGL_NO_DISPLAY; 123 EGLContext egl_context = EGL_NO_CONTEXT; 124 125 // Should contain data returned from 126 // InferenceEnvironment::GetSerializedBinaryCache method. 127 // Invalid or incompatible data will be discarded. Compiled binary may become 128 // incompatible when GPU driver is updated. 129 absl::Span<const uint8_t> serialized_binary_cache; 130 IsGlAwareInferenceEnvironmentOptions131 bool IsGlAware() const { 132 return egl_context != EGL_NO_CONTEXT && egl_display != EGL_NO_DISPLAY; 133 } 134 }; 135 136 // Creates new OpenCL environment that needs to stay around until all inference 137 // runners are destroyed. 138 absl::Status NewInferenceEnvironment( 139 const InferenceEnvironmentOptions& options, 140 std::unique_ptr<InferenceEnvironment>* environment, 141 InferenceEnvironmentProperties* properties /* optional */); 142 143 class CLInferenceRunner : public ::tflite::gpu::InferenceRunner { 144 public: 145 // The RunWithoutExternalBufferCopy provides a contract where the user of this 146 // interface does not need 147 // a. Inputs to be copied to the internal GPU buffer from the external CPU 148 // input buffer 149 // b. Outputs to be copied from the internal GPU buffer to the 150 // external CPU buffer 151 // 152 // The user of this interface is responsible for copying the inputs prior to 153 // running the GPU kernels and outputs post running with the other interfaces 154 // provided here. 155 virtual absl::Status RunWithoutExternalBufferCopy() = 0; 156 157 // Copies from the external input tensor (normally CPU buffer) to the internal 158 // OpenCL buffer. 159 virtual absl::Status CopyFromExternalInput(int index) = 0; 160 161 // Copies from the internal output OpenCL buffer to the external output tensor 162 virtual absl::Status CopyToExternalOutput(int index) = 0; 163 }; 164 165 } // namespace cl 166 } // namespace gpu 167 } // namespace tflite 168 169 #endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_API_H_ 170