1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/api.h"
17
18 #ifndef CL_DELEGATE_NO_GL
19 #define CL_DELEGATE_ALLOW_GL
20 #endif
21
22 #include <algorithm>
23 #include <cstring>
24
25 #include "absl/memory/memory.h"
26 #include "absl/types/span.h"
27 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
28 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
29 #include "tensorflow/lite/delegates/gpu/cl/cl_event.h"
30 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
31 #include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
32 #include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
33 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
34 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
35 #include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
36 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
37 #include "tensorflow/lite/delegates/gpu/common/precision.h"
38 #include "tensorflow/lite/delegates/gpu/common/shape.h"
39 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
40 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
41
42 #ifdef CL_DELEGATE_ALLOW_GL
43 #include <EGL/eglext.h>
44
45 #include "tensorflow/lite/delegates/gpu/cl/egl_sync.h"
46 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
47 #endif
48
49 namespace tflite {
50 namespace gpu {
51 namespace cl {
52 namespace {
53
54 // Both internal and external defs are identical, therefore nothing to connect
55 // here.
56 class NoopTensorTie : public TensorTie {
57 public:
NoopTensorTie(const TensorTieDef & def,TensorObject obj)58 NoopTensorTie(const TensorTieDef& def, TensorObject obj)
59 : TensorTie(def), obj_(obj) {}
60
IsSupported(const TensorTieDef & def)61 static bool IsSupported(const TensorTieDef& def) {
62 return def.external_def == def.internal_def;
63 }
64
SetExternalObject(TensorObject obj)65 absl::Status SetExternalObject(TensorObject obj) final {
66 if (!def().external_def.object_def.user_provided) {
67 return absl::InvalidArgumentError("Tensor object is readonly.");
68 }
69 if (!IsValid(def().external_def, obj)) {
70 return absl::InvalidArgumentError("Given object is not valid");
71 }
72 obj_ = obj;
73 return absl::OkStatus();
74 }
75
GetExternalObject()76 TensorObject GetExternalObject() final { return obj_; }
77
CopyToExternalObject()78 absl::Status CopyToExternalObject() final { return absl::OkStatus(); }
79
CopyFromExternalObject()80 absl::Status CopyFromExternalObject() final { return absl::OkStatus(); }
81
82 private:
83 TensorObject obj_;
84 };
85
86 // Does one-step conversion between internal and external objects.
87 // It may also allocate external objects if requested.
88 class DefaultTensorTie : public TensorTie {
89 public:
DefaultTensorTie(const TensorTieDef & def,TensorObject internal_obj)90 DefaultTensorTie(const TensorTieDef& def, TensorObject internal_obj)
91 : TensorTie(def), internal_obj_(internal_obj) {}
92
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)93 static bool IsSupported(
94 const TensorTieDef& def,
95 const TensorObjectConverterBuilder& converter_builder) {
96 auto object_type = def.external_def.object_def.object_type;
97 #ifdef CL_DELEGATE_ALLOW_GL
98 if (def.external_def.object_def.user_provided &&
99 GlClBufferCopier::IsSupported(def.external_def.object_def,
100 def.internal_def.object_def)) {
101 return true;
102 }
103 #endif
104 return (object_type == ObjectType::OPENCL_BUFFER ||
105 object_type == ObjectType::OPENCL_TEXTURE ||
106 object_type == ObjectType::CPU_MEMORY) &&
107 converter_builder.IsSupported(def.internal_def, def.external_def) &&
108 converter_builder.IsSupported(def.external_def, def.internal_def);
109 }
110
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)111 static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
112 TensorObjectConverterBuilder* converter_builder,
113 Environment* env, std::unique_ptr<TensorTie>* tie) {
114 auto tie_impl = absl::make_unique<DefaultTensorTie>(def, internal_object);
115 RETURN_IF_ERROR(tie_impl->Init(converter_builder, env));
116 *tie = std::move(tie_impl);
117 return absl::OkStatus();
118 }
119
CopyToExternalObject()120 absl::Status CopyToExternalObject() final {
121 if (!converter_to_) {
122 return absl::UnavailableError("Conversion is not available");
123 }
124 return converter_to_->Convert(internal_obj_, GetExternalObject());
125 }
126
CopyFromExternalObject()127 absl::Status CopyFromExternalObject() final {
128 if (!converter_from_) {
129 return absl::UnavailableError("Conversion is not available");
130 }
131 return converter_from_->Convert(GetExternalObject(), internal_obj_);
132 }
133
SetExternalObject(TensorObject obj)134 absl::Status SetExternalObject(TensorObject obj) final {
135 if (!def().external_def.object_def.user_provided) {
136 return absl::InvalidArgumentError("External object is read-only");
137 }
138 if (!IsValid(def().external_def, obj)) {
139 return absl::InvalidArgumentError("Given object is not valid");
140 }
141 external_obj_ = obj;
142 return absl::OkStatus();
143 }
144
GetExternalObject()145 TensorObject GetExternalObject() final { return external_obj_; }
146
147 private:
Init(TensorObjectConverterBuilder * converter_builder,Environment * env)148 absl::Status Init(TensorObjectConverterBuilder* converter_builder,
149 Environment* env) {
150 #ifdef CL_DELEGATE_ALLOW_GL
151 if (def().external_def.object_def.user_provided &&
152 GlClBufferCopier::IsSupported(def().external_def.object_def,
153 def().internal_def.object_def)) {
154 converter_from_ = absl::make_unique<GlClBufferCopier>(
155 def().internal_def, def().external_def, env);
156 } else {
157 RETURN_IF_ERROR(converter_builder->MakeConverter(
158 def().external_def, def().internal_def, &converter_from_));
159 }
160 if (def().external_def.object_def.user_provided &&
161 GlClBufferCopier::IsSupported(def().internal_def.object_def,
162 def().external_def.object_def)) {
163 converter_to_ = absl::make_unique<GlClBufferCopier>(
164 def().internal_def, def().external_def, env);
165 } else {
166 RETURN_IF_ERROR(converter_builder->MakeConverter(
167 def().internal_def, def().external_def, &converter_to_));
168 }
169 #else
170 RETURN_IF_ERROR(converter_builder->MakeConverter(
171 def().external_def, def().internal_def, &converter_from_));
172 RETURN_IF_ERROR(converter_builder->MakeConverter(
173 def().internal_def, def().external_def, &converter_to_));
174 #endif
175 return MaybeAllocateExternalObject(env);
176 }
177
MaybeAllocateExternalObject(Environment * env)178 absl::Status MaybeAllocateExternalObject(Environment* env) {
179 const TensorObjectDef& d = def().external_def;
180 if (d.object_def.user_provided) {
181 return absl::OkStatus();
182 }
183 switch (d.object_def.object_type) {
184 case ObjectType::CPU_MEMORY: {
185 size_t bytes_size = NumElements(d) * SizeOf(d.object_def.data_type);
186 cpu_memory_.resize(bytes_size);
187 external_obj_ = CpuMemory{cpu_memory_.data(), cpu_memory_.size()};
188 break;
189 }
190 case ObjectType::OPENCL_TEXTURE:
191 case ObjectType::OPENCL_BUFFER: {
192 auto& dims = d.dimensions;
193 const BHWC shape(dims.b, dims.h, dims.w, dims.c);
194 const TensorDescriptor desc{
195 d.object_def.data_type,
196 ToTensorStorageType(d.object_def.object_type,
197 d.object_def.data_layout),
198 Layout::BHWC};
199 RETURN_IF_ERROR(
200 AllocateTensorMemory(env->context(), shape, desc, &cl_memory_));
201 if (d.object_def.object_type == ObjectType::OPENCL_TEXTURE) {
202 external_obj_ = OpenClTexture{cl_memory_.memory()};
203 } else {
204 external_obj_ = OpenClBuffer{cl_memory_.memory()};
205 }
206 break;
207 }
208 default:
209 return absl::InternalError("Unexpected object type");
210 }
211 return absl::OkStatus();
212 }
213
214 const TensorObject internal_obj_;
215 TensorObject external_obj_;
216 CLMemory cl_memory_;
217 std::vector<uint8_t> cpu_memory_;
218 std::unique_ptr<TensorObjectConverter> converter_to_;
219 std::unique_ptr<TensorObjectConverter> converter_from_;
220 };
221
222 // Copies data to intermediate OpenCL buffer and then does two step conversion.
223 // It drives the following cases were one-step conversion is not supported:
224 // - CPU BHWC -> CL buffer BHWC -> CL texture DHWC4.
225 class TwoStepTensorTie : public TensorTie {
226 public:
TwoStepTensorTie(const TensorTieDef & def)227 explicit TwoStepTensorTie(const TensorTieDef& def) : TensorTie(def) {}
228
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)229 static bool IsSupported(
230 const TensorTieDef& def,
231 const TensorObjectConverterBuilder& converter_builder) {
232 auto defs = MakeOuterInnerDefs(def);
233 return DefaultTensorTie::IsSupported(defs.first, converter_builder) &&
234 DefaultTensorTie::IsSupported(defs.second, converter_builder);
235 }
236
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)237 static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
238 TensorObjectConverterBuilder* converter_builder,
239 Environment* env, std::unique_ptr<TensorTie>* tie) {
240 auto tie_impl = absl::make_unique<TwoStepTensorTie>(def);
241 RETURN_IF_ERROR(tie_impl->Init(internal_object, converter_builder, env));
242 *tie = std::move(tie_impl);
243 return absl::OkStatus();
244 }
245
CopyToExternalObject()246 absl::Status CopyToExternalObject() final {
247 RETURN_IF_ERROR(inner_tie_->CopyToExternalObject());
248 return outer_tie_->CopyToExternalObject();
249 }
250
CopyFromExternalObject()251 absl::Status CopyFromExternalObject() final {
252 RETURN_IF_ERROR(outer_tie_->CopyFromExternalObject());
253 return inner_tie_->CopyFromExternalObject();
254 }
255
SetExternalObject(TensorObject obj)256 absl::Status SetExternalObject(TensorObject obj) final {
257 return outer_tie_->SetExternalObject(obj);
258 }
259
GetExternalObject()260 TensorObject GetExternalObject() final {
261 return outer_tie_->GetExternalObject();
262 }
263
264 private:
MakeOuterInnerDefs(const TensorTieDef & def)265 static std::pair<TensorTieDef, TensorTieDef> MakeOuterInnerDefs(
266 const TensorTieDef& def) {
267 TensorTieDef outer_def;
268 outer_def.external_def = def.external_def;
269 outer_def.internal_def = def.external_def;
270 outer_def.internal_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
271 outer_def.internal_def.object_def.user_provided = true;
272
273 TensorTieDef inner_def;
274 inner_def.external_def = outer_def.internal_def;
275 inner_def.external_def.object_def.user_provided = false;
276 inner_def.internal_def = def.internal_def;
277 return std::make_pair(outer_def, inner_def);
278 }
279
Init(TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env)280 absl::Status Init(TensorObject internal_object,
281 TensorObjectConverterBuilder* converter_builder,
282 Environment* env) {
283 auto defs = MakeOuterInnerDefs(def());
284 RETURN_IF_ERROR(DefaultTensorTie::New(defs.second, internal_object,
285 converter_builder, env, &inner_tie_));
286 return DefaultTensorTie::New(defs.first, inner_tie_->GetExternalObject(),
287 converter_builder, env, &outer_tie_);
288 }
289
290 std::unique_ptr<TensorTie> inner_tie_;
291 std::unique_ptr<TensorTie> outer_tie_;
292 };
293
294 #ifdef CL_DELEGATE_ALLOW_GL
295 // Captures GL object into CL context before performing a conversion.
296 class GlBufferHolder : public TensorTie {
297 public:
GlBufferHolder(const TensorTieDef & def,GlInteropFabric * gl_interop_fabric,Environment * env)298 GlBufferHolder(const TensorTieDef& def, GlInteropFabric* gl_interop_fabric,
299 Environment* env)
300 : TensorTie(def),
301 gl_interop_fabric_(gl_interop_fabric),
302 environment_(env) {}
303
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)304 static bool IsSupported(
305 const TensorTieDef& def,
306 const TensorObjectConverterBuilder& converter_builder) {
307 if (!def.external_def.object_def.user_provided ||
308 def.external_def.object_def.object_type != ObjectType::OPENGL_SSBO) {
309 return false;
310 }
311 return DefaultTensorTie::IsSupported(MakeClDef(def), converter_builder);
312 }
313
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,GlInteropFabric * gl_interop_fabric,Environment * env,std::unique_ptr<TensorTie> * tie)314 static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
315 TensorObjectConverterBuilder* converter_builder,
316 GlInteropFabric* gl_interop_fabric, Environment* env,
317 std::unique_ptr<TensorTie>* tie) {
318 auto tie_impl =
319 absl::make_unique<GlBufferHolder>(def, gl_interop_fabric, env);
320 RETURN_IF_ERROR(DefaultTensorTie::New(MakeClDef(def), internal_object,
321 converter_builder, env,
322 &tie_impl->tie_));
323 *tie = std::move(tie_impl);
324 return absl::OkStatus();
325 }
326
SetExternalObject(TensorObject obj)327 absl::Status SetExternalObject(TensorObject obj) final {
328 auto ssbo = absl::get_if<OpenGlBuffer>(&obj);
329 if (!ssbo) {
330 return absl::InvalidArgumentError("Missing OpenGL SSBO");
331 }
332 auto old_ssbo = absl::get_if<OpenGlBuffer>(&external_obj_);
333 if (old_ssbo && ssbo->id == old_ssbo->id) {
334 return absl::OkStatus();
335 }
336 if (cl_object_.memory()) {
337 gl_interop_fabric_->UnregisterMemory(cl_object_.memory());
338 }
339 RETURN_IF_ERROR(CreateClMemoryFromGlBuffer(
340 ssbo->id, def().access_type, &environment_->context(), &cl_object_));
341 external_obj_ = obj;
342 RETURN_IF_ERROR(tie_->SetExternalObject(OpenClBuffer{cl_object_.memory()}));
343 gl_interop_fabric_->RegisterMemory(cl_object_.memory());
344 return absl::OkStatus();
345 }
346
GetExternalObject()347 TensorObject GetExternalObject() final { return external_obj_; }
348
CopyFromExternalObject()349 absl::Status CopyFromExternalObject() final {
350 return tie_->CopyFromExternalObject();
351 }
352
CopyToExternalObject()353 absl::Status CopyToExternalObject() final {
354 return tie_->CopyToExternalObject();
355 }
356
357 private:
MakeClDef(const TensorTieDef & def)358 static TensorTieDef MakeClDef(const TensorTieDef& def) {
359 auto cl_def = def;
360 cl_def.external_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
361 cl_def.external_def.object_def.user_provided = true;
362 return cl_def;
363 }
364
365 CLMemory cl_object_;
366 GlInteropFabric* gl_interop_fabric_;
367 Environment* environment_;
368 std::unique_ptr<TensorTie> tie_;
369 TensorObject external_obj_;
370 };
371 #endif
372
TensorToObj(const Tensor & tensor)373 TensorObject TensorToObj(const Tensor& tensor) {
374 if (tensor.GetStorageType() == TensorStorageType::BUFFER) {
375 return OpenClBuffer{tensor.GetMemoryPtr()};
376 }
377 if (tensor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
378 return OpenClBuffer{tensor.GetMemoryPtrForWriting()};
379 }
380 return OpenClTexture{tensor.GetMemoryPtr()};
381 }
382
383 // Responsible for creating new tensor objects.
384 class TensorTieFactory {
385 public:
TensorTieFactory(Environment * env,InferenceContext * context,GlInteropFabric * gl_interop_fabric)386 TensorTieFactory(Environment* env, InferenceContext* context
387 #ifdef CL_DELEGATE_ALLOW_GL
388 ,
389 GlInteropFabric* gl_interop_fabric
390 #endif
391 )
392 : env_(*env),
393 context_(*context),
394 #ifdef CL_DELEGATE_ALLOW_GL
395 gl_interop_fabric_(gl_interop_fabric),
396 #endif
397 converter_builder_(NewConverterBuilder(env)) {
398 }
399
IsSupported(const TensorTieDef & def) const400 bool IsSupported(const TensorTieDef& def) const {
401 return IsValid(def.external_def.object_def) &&
402 (NoopTensorTie::IsSupported(def) ||
403 DefaultTensorTie::IsSupported(def, *converter_builder_) ||
404 #ifdef CL_DELEGATE_ALLOW_GL
405 (gl_interop_fabric_ &&
406 GlBufferHolder::IsSupported(def, *converter_builder_)) ||
407 #endif
408 TwoStepTensorTie::IsSupported(def, *converter_builder_));
409 }
410
NewTensorTie(const TensorTieDef & def,std::unique_ptr<TensorTie> * tie)411 absl::Status NewTensorTie(const TensorTieDef& def,
412 std::unique_ptr<TensorTie>* tie) {
413 TensorObject internal_object = TensorToObj(*context_.GetTensor(def.id));
414 auto converter = converter_builder_.get();
415 if (NoopTensorTie::IsSupported(def)) {
416 *tie = absl::make_unique<NoopTensorTie>(def, internal_object);
417 return absl::OkStatus();
418 }
419 if (DefaultTensorTie::IsSupported(def, *converter)) {
420 return DefaultTensorTie::New(def, internal_object, converter, &env_, tie);
421 }
422 #ifdef CL_DELEGATE_ALLOW_GL
423 if (gl_interop_fabric_ && GlBufferHolder::IsSupported(def, *converter)) {
424 return GlBufferHolder::New(def, internal_object, converter,
425 gl_interop_fabric_, &env_, tie);
426 }
427 #endif
428 if (TwoStepTensorTie::IsSupported(def, *converter)) {
429 return TwoStepTensorTie::New(def, internal_object, converter, &env_, tie);
430 }
431 return absl::UnimplementedError("Unsupported tensor tie definition.");
432 }
433
434 private:
435 Environment& env_;
436 InferenceContext& context_;
437 #ifdef CL_DELEGATE_ALLOW_GL
438 GlInteropFabric* gl_interop_fabric_;
439 #endif
440 std::unique_ptr<TensorObjectConverterBuilder> converter_builder_;
441 };
442
443 class InferenceRunnerImpl : public CLInferenceRunner {
444 public:
InferenceRunnerImpl(Environment * environment,std::unique_ptr<InferenceContext> context,std::unique_ptr<GlInteropFabric> gl_interop_fabric)445 InferenceRunnerImpl(Environment* environment,
446 std::unique_ptr<InferenceContext> context
447 #ifdef CL_DELEGATE_ALLOW_GL
448 ,
449 std::unique_ptr<GlInteropFabric> gl_interop_fabric
450 #endif
451 )
452 : queue_(environment->queue()),
453 context_(std::move(context))
454 #ifdef CL_DELEGATE_ALLOW_GL
455 ,
456 gl_interop_fabric_(std::move(gl_interop_fabric))
457 #endif
458 {
459 }
460
Initialize(const std::vector<TensorTieDef> & inputs,const std::vector<TensorTieDef> & outputs,TensorTieFactory * factory)461 absl::Status Initialize(const std::vector<TensorTieDef>& inputs,
462 const std::vector<TensorTieDef>& outputs,
463 TensorTieFactory* factory) {
464 RETURN_IF_ERROR(LinkTensors(inputs, factory, &inputs_));
465 return LinkTensors(outputs, factory, &outputs_);
466 }
467
inputs() const468 std::vector<TensorObjectDef> inputs() const override {
469 return GetExternalDefinitions(inputs_);
470 }
471
outputs() const472 std::vector<TensorObjectDef> outputs() const override {
473 return GetExternalDefinitions(outputs_);
474 }
475
GetInputObject(int index,TensorObject * object)476 absl::Status GetInputObject(int index, TensorObject* object) override {
477 if (index < 0 || index >= inputs_.size()) {
478 return absl::OutOfRangeError("Index is out of range");
479 }
480 *object = inputs_[index]->GetExternalObject();
481 return absl::OkStatus();
482 }
483
GetOutputObject(int index,TensorObject * object)484 absl::Status GetOutputObject(int index, TensorObject* object) override {
485 if (index < 0 || index >= outputs_.size()) {
486 return absl::OutOfRangeError("Index is out of range");
487 }
488 *object = outputs_[index]->GetExternalObject();
489 return absl::OkStatus();
490 }
491
SetInputObject(int index,TensorObject object)492 absl::Status SetInputObject(int index, TensorObject object) override {
493 if (index < 0 || index >= inputs_.size()) {
494 return absl::OutOfRangeError("Input index is out of range");
495 }
496 return inputs_[index]->SetExternalObject(object);
497 }
498
SetOutputObject(int index,TensorObject object)499 absl::Status SetOutputObject(int index, TensorObject object) override {
500 if (index < 0 || index >= outputs_.size()) {
501 return absl::OutOfRangeError("Output index is out of range");
502 }
503 return outputs_[index]->SetExternalObject(object);
504 }
505
CopyFromExternalInput(int index)506 absl::Status CopyFromExternalInput(int index) override {
507 if (index > inputs_.size()) {
508 return absl::NotFoundError(
509 absl::StrCat("Input id ", index, " is an invalid input index."));
510 }
511 return inputs_[index]->CopyFromExternalObject();
512 }
513
CopyToExternalOutput(int index)514 absl::Status CopyToExternalOutput(int index) override {
515 if (index > outputs_.size()) {
516 return absl::NotFoundError(
517 absl::StrCat("Output id ", index, " is an invalid output index"));
518 }
519 return outputs_[index]->CopyToExternalObject();
520 }
521
Run()522 absl::Status Run() override {
523 #ifdef CL_DELEGATE_ALLOW_GL
524 if (gl_interop_fabric_) {
525 RETURN_IF_ERROR(gl_interop_fabric_->Start());
526 }
527 #endif
528 for (int i = 0; i < inputs_.size(); i++) {
529 RETURN_IF_ERROR(CopyFromExternalInput(i));
530 }
531
532 RETURN_IF_ERROR(RunWithoutExternalBufferCopy());
533
534 for (int i = 0; i < outputs_.size(); i++) {
535 RETURN_IF_ERROR(CopyToExternalOutput(i));
536 }
537 #ifdef CL_DELEGATE_ALLOW_GL
538 if (gl_interop_fabric_) {
539 RETURN_IF_ERROR(gl_interop_fabric_->Finish());
540 }
541 #endif
542 return absl::OkStatus();
543 }
544
RunWithoutExternalBufferCopy()545 absl::Status RunWithoutExternalBufferCopy() override {
546 RETURN_IF_ERROR(context_->AddToQueue(queue_));
547 clFlush(queue_->queue());
548
549 return absl::OkStatus();
550 }
551
552 private:
LinkTensors(const std::vector<TensorTieDef> & defs,TensorTieFactory * factory,std::vector<std::unique_ptr<TensorTie>> * objects)553 static absl::Status LinkTensors(
554 const std::vector<TensorTieDef>& defs, TensorTieFactory* factory,
555 std::vector<std::unique_ptr<TensorTie>>* objects) {
556 objects->reserve(defs.size());
557 for (auto& def : defs) {
558 std::unique_ptr<TensorTie> object;
559 RETURN_IF_ERROR(factory->NewTensorTie(def, &object));
560 objects->push_back(std::move(object));
561 }
562 return absl::OkStatus();
563 }
564
GetExternalDefinitions(const std::vector<std::unique_ptr<TensorTie>> & objects)565 static std::vector<TensorObjectDef> GetExternalDefinitions(
566 const std::vector<std::unique_ptr<TensorTie>>& objects) {
567 std::vector<TensorObjectDef> defs;
568 defs.reserve(objects.size());
569 for (auto& obj : objects) {
570 defs.push_back(obj->def().external_def);
571 }
572 return defs;
573 }
574
575 CLCommandQueue* queue_;
576 std::unique_ptr<InferenceContext> context_;
577 #ifdef CL_DELEGATE_ALLOW_GL
578 std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
579 #endif
580 std::vector<std::unique_ptr<TensorTie>> inputs_;
581 std::vector<std::unique_ptr<TensorTie>> outputs_;
582 };
583
TensorToDef(const Tensor & tensor)584 TensorObjectDef TensorToDef(const Tensor& tensor) {
585 TensorObjectDef def;
586 def.dimensions.b = tensor.Batch();
587 def.dimensions.h = tensor.Height();
588 def.dimensions.w = tensor.Width();
589 def.dimensions.c = tensor.Channels();
590 def.object_def.data_layout = ToDataLayout(tensor.GetStorageType());
591 def.object_def.data_type = tensor.GetDataType();
592 def.object_def.object_type = ToObjectType(tensor.GetStorageType());
593 def.object_def.user_provided = false;
594 return def;
595 }
596
GetPrecision(const Environment & env,const InferenceOptions & options)597 CalculationsPrecision GetPrecision(const Environment& env,
598 const InferenceOptions& options) {
599 CalculationsPrecision precision;
600 switch (GetPosition(options, InferencePriority::MAX_PRECISION)) {
601 case 1:
602 precision = CalculationsPrecision::F32;
603 break;
604 case 2:
605 precision = CalculationsPrecision::F32_F16;
606 break;
607 case 3:
608 precision = CalculationsPrecision::F16;
609 break;
610 default:
611 precision = CalculationsPrecision::F16;
612 break;
613 }
614 // Increase precision if lower precision is not supported.
615 if (!env.IsSupported(precision)) {
616 precision = CalculationsPrecision::F32_F16;
617 if (!env.IsSupported(precision)) {
618 precision = CalculationsPrecision::F32;
619 }
620 }
621 return precision;
622 }
623
GetStorageTypeFromOptions(const Environment & env,const InferenceOptions & options)624 TensorStorageType GetStorageTypeFromOptions(const Environment& env,
625 const InferenceOptions& options) {
626 // Fallback to BUFFER that should be supported by default.
627 std::vector<TensorStorageType> preferred_storage_types;
628 if (GetRelativeImportance(options, InferencePriority::MIN_LATENCY,
629 InferencePriority::MIN_MEMORY_USAGE) ==
630 PriorityImportance::HIGHER) {
631 preferred_storage_types = {GetFastestStorageType(env.device().GetInfo()),
632 TensorStorageType::BUFFER};
633 } else {
634 preferred_storage_types = {
635 GetStorageTypeWithMinimalMemoryConsumption(env.device().GetInfo()),
636 TensorStorageType::BUFFER};
637 }
638
639 for (TensorStorageType storage_type : preferred_storage_types) {
640 if (env.IsSupported(storage_type)) {
641 return storage_type;
642 }
643 }
644 return TensorStorageType::UNKNOWN;
645 }
646
647 class InferenceBuilderImpl : public InferenceBuilder {
648 public:
InferenceBuilderImpl(Environment * environment)649 explicit InferenceBuilderImpl(Environment* environment)
650 : environment_(environment) {}
651
Initialize(const InferenceOptions & options,const InferenceEnvironmentOptions & env_options,const GraphFloat32 & graph)652 absl::Status Initialize(const InferenceOptions& options,
653 const InferenceEnvironmentOptions& env_options,
654 const GraphFloat32& graph) {
655 context_ = absl::make_unique<InferenceContext>();
656 InferenceContext::CreateInferenceInfo create_info;
657 create_info.precision = GetPrecision(*environment_, options);
658 create_info.storage_type =
659 GetStorageTypeFromOptions(*environment_, options);
660 if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
661 create_info.hints.Add(ModelHints::kReduceKernelsCount);
662 create_info.hints.Add(ModelHints::kFastTuning);
663 } else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
664 create_info.hints.Add(ModelHints::kAllowSpecialKernels);
665 }
666 RETURN_IF_ERROR(context_->InitFromGraph(create_info, graph, environment_));
667
668 #ifdef CL_DELEGATE_ALLOW_GL
669 if (env_options.IsGlAware() &&
670 IsGlSharingSupported(environment_->device())) {
671 gl_interop_fabric_ = absl::make_unique<GlInteropFabric>(
672 env_options.egl_display, environment_);
673 }
674 tie_factory_ = absl::make_unique<TensorTieFactory>(
675 environment_, context_.get(), gl_interop_fabric_.get());
676 #else
677 tie_factory_ =
678 absl::make_unique<TensorTieFactory>(environment_, context_.get());
679 #endif
680
681 inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
682 outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
683 return absl::OkStatus();
684 }
685
Initialize(const InferenceEnvironmentOptions & env_options,const absl::Span<const uint8_t> serialized_model,std::vector<int64_t> * in_refs=nullptr,std::vector<int64_t> * out_refs=nullptr)686 absl::Status Initialize(const InferenceEnvironmentOptions& env_options,
687 const absl::Span<const uint8_t> serialized_model,
688 std::vector<int64_t>* in_refs = nullptr,
689 std::vector<int64_t>* out_refs = nullptr) {
690 context_ = absl::make_unique<InferenceContext>();
691 RETURN_IF_ERROR(
692 context_->RestoreDeserialized(serialized_model, environment_));
693
694 #ifdef CL_DELEGATE_ALLOW_GL
695 if (env_options.IsGlAware() &&
696 IsGlSharingSupported(environment_->device())) {
697 gl_interop_fabric_ = absl::make_unique<GlInteropFabric>(
698 env_options.egl_display, environment_);
699 }
700 tie_factory_ = absl::make_unique<TensorTieFactory>(
701 environment_, context_.get(), gl_interop_fabric_.get());
702 #else
703 tie_factory_ =
704 absl::make_unique<TensorTieFactory>(environment_, context_.get());
705 #endif
706
707 inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
708 outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
709 if (in_refs) {
710 *in_refs = context_->GetInputRefs();
711 }
712 if (out_refs) {
713 *out_refs = context_->GetOutputRefs();
714 }
715 return absl::OkStatus();
716 }
717
inputs() const718 std::vector<TensorObjectDef> inputs() const override {
719 return GetExternalDefinitions(inputs_);
720 }
721
outputs() const722 std::vector<TensorObjectDef> outputs() const override {
723 return GetExternalDefinitions(outputs_);
724 }
725
SetInputShape(int index,const Dimensions & dimensions)726 absl::Status SetInputShape(int index, const Dimensions& dimensions) override {
727 if (index < 0 || index >= inputs_.size()) {
728 return absl::OutOfRangeError("Index is out of range");
729 }
730 return absl::UnimplementedError("Changing input shapes is not supported");
731 }
732
SetInputObjectDef(int index,ObjectDef new_def)733 absl::Status SetInputObjectDef(int index, ObjectDef new_def) override {
734 if (index < 0 || index >= inputs_.size()) {
735 return absl::OutOfRangeError("Input index is out of range");
736 }
737 auto def = inputs_[index];
738 def.external_def.object_def = new_def;
739 if (!tie_factory_->IsSupported(def)) {
740 return absl::InvalidArgumentError(
741 "New input object definition is not supported.");
742 }
743 inputs_[index] = def;
744 return absl::OkStatus();
745 }
746
SetOutputObjectDef(int index,ObjectDef new_def)747 absl::Status SetOutputObjectDef(int index, ObjectDef new_def) override {
748 if (index < 0 || index >= outputs_.size()) {
749 return absl::OutOfRangeError("Output index is out of range");
750 }
751 auto def = outputs_[index];
752 def.external_def.object_def = new_def;
753 if (!tie_factory_->IsSupported(def)) {
754 return absl::InvalidArgumentError(
755 "New output object definition is not supported.");
756 }
757 outputs_[index] = def;
758 return absl::OkStatus();
759 }
760
Build(std::unique_ptr<InferenceRunner> * runner)761 absl::Status Build(std::unique_ptr<InferenceRunner>* runner) override {
762 #ifdef CL_DELEGATE_ALLOW_GL
763 if (gl_interop_fabric_ && !HasGlObjects()) {
764 // destroy interop layer when there are no GL objects to avoid
765 // extra synchronization cost.
766 gl_interop_fabric_.reset(nullptr);
767 }
768 auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
769 environment_, std::move(context_), std::move(gl_interop_fabric_));
770 #else
771 auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
772 environment_, std::move(context_));
773 #endif
774 RETURN_IF_ERROR(
775 runner_impl->Initialize(inputs_, outputs_, tie_factory_.get()));
776 *runner = std::move(runner_impl);
777 return absl::OkStatus();
778 }
779
780 private:
781 // Links internal tensors with external user-facing objects.
LinkTensors(const std::vector<ValueId> & ids,AccessType access)782 std::vector<TensorTieDef> LinkTensors(const std::vector<ValueId>& ids,
783 AccessType access) {
784 std::vector<TensorTieDef> links;
785 links.reserve(ids.size());
786 for (const auto& id : ids) {
787 TensorObjectDef def = TensorToDef(*context_->GetTensor(id));
788 links.push_back({id, access, def, def});
789 }
790 return links;
791 }
792
HasGlObjects() const793 bool HasGlObjects() const {
794 #ifdef CL_DELEGATE_ALLOW_GL
795 auto is_gl = [](ObjectType t) {
796 return t == ObjectType::OPENGL_SSBO || t == ObjectType::OPENGL_TEXTURE;
797 };
798 for (const TensorTieDef& def : inputs_) {
799 if (is_gl(def.external_def.object_def.object_type)) {
800 return true;
801 }
802 }
803 for (const TensorTieDef& def : outputs_) {
804 if (is_gl(def.external_def.object_def.object_type)) {
805 return true;
806 }
807 }
808 #endif
809 return false;
810 }
811
GetExternalDefinitions(const std::vector<TensorTieDef> & links)812 static std::vector<TensorObjectDef> GetExternalDefinitions(
813 const std::vector<TensorTieDef>& links) {
814 std::vector<TensorObjectDef> defs;
815 defs.reserve(links.size());
816 for (auto& desc : links) {
817 defs.push_back(desc.external_def);
818 }
819 return defs;
820 }
821
822 std::unique_ptr<InferenceContext> context_;
823 #ifdef CL_DELEGATE_ALLOW_GL
824 std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
825 #endif
826 Environment* environment_;
827
828 std::vector<TensorTieDef> inputs_;
829 std::vector<TensorTieDef> outputs_;
830 std::unique_ptr<TensorTieFactory> tie_factory_;
831 };
832
833 class InferenceEnvironmentImpl : public InferenceEnvironment {
834 public:
InferenceEnvironmentImpl(const InferenceEnvironmentOptions & options)835 explicit InferenceEnvironmentImpl(const InferenceEnvironmentOptions& options)
836 : options_(options) {}
837
Init()838 absl::Status Init() {
839 RETURN_IF_ERROR(LoadOpenCL());
840 properties_.is_opencl_available = true;
841
842 CLDevice device;
843 if (options_.device) {
844 cl_platform_id platform;
845 RETURN_IF_ERROR(GetDeviceInfo<cl_platform_id>(
846 options_.device, CL_DEVICE_PLATFORM, &platform));
847 device = CLDevice(options_.device, platform);
848 } else {
849 RETURN_IF_ERROR(CreateDefaultGPUDevice(&device));
850 }
851
852 #ifdef CL_DELEGATE_ALLOW_GL
853 properties_.is_gl_sharing_supported = IsGlSharingSupported(device);
854 properties_.is_gl_to_cl_fast_sync_supported =
855 IsClEventFromEglSyncSupported(device);
856 properties_.is_cl_to_gl_fast_sync_supported =
857 IsEglSyncFromClEventSupported();
858 #endif
859
860 CLContext context;
861 if (options_.context) {
862 #ifdef CL_DELEGATE_ALLOW_GL
863 if (options_.IsGlAware()) {
864 return absl::InvalidArgumentError(
865 "OpenCL context and EGL parameters are set in the same time.");
866 }
867 #endif
868 context = CLContext(options_.context, /* has_ownership = */ false);
869 } else {
870 #ifdef CL_DELEGATE_ALLOW_GL
871 if (options_.IsGlAware() && properties_.is_gl_sharing_supported) {
872 RETURN_IF_ERROR(CreateCLGLContext(
873 device,
874 reinterpret_cast<cl_context_properties>(options_.egl_context),
875 reinterpret_cast<cl_context_properties>(options_.egl_display),
876 &context));
877 } else {
878 RETURN_IF_ERROR(CreateCLContext(device, &context));
879 }
880 #else
881 RETURN_IF_ERROR(CreateCLContext(device, &context));
882 #endif
883 }
884
885 CLCommandQueue queue;
886 if (options_.command_queue) {
887 queue =
888 CLCommandQueue(options_.command_queue, /* has_ownership = */ false);
889 } else {
890 RETURN_IF_ERROR(CreateCLCommandQueue(device, context, &queue));
891 }
892 // Profiling queue is used for workgroup size tuning.
893 ProfilingCommandQueue profiling_queue;
894 RETURN_IF_ERROR(
895 CreateProfilingCommandQueue(device, context, &profiling_queue));
896 environment_ = Environment(std::move(device), std::move(context),
897 std::move(queue), std::move(profiling_queue));
898 return environment_.Init();
899 }
900
BuildSerializedModel(const InferenceOptions & options,GraphFloat32 model,std::vector<uint8_t> * serialized_model)901 absl::Status BuildSerializedModel(
902 const InferenceOptions& options, GraphFloat32 model,
903 std::vector<uint8_t>* serialized_model) final {
904 if (!IsValid(options)) {
905 return absl::InvalidArgumentError("InferenceOptions are invalid.");
906 }
907 InferenceOptions resolved_options = options;
908 ResolveAutoPriority(&resolved_options);
909 if (environment_.program_cache() &&
910 !options_.serialized_binary_cache.empty()) {
911 // Ignore returned error. Cache is discarded.
912 environment_.program_cache()
913 ->AddSerializedCache(environment_.context(), environment_.device(),
914 options_.serialized_binary_cache)
915 .IgnoreError();
916 }
917
918 RETURN_IF_ERROR(RunGraphTransforms(&model));
919 InferenceContext context;
920 InferenceContext::CreateInferenceInfo create_info;
921 create_info.precision = GetPrecision(environment_, options);
922 create_info.storage_type = GetStorageTypeFromOptions(environment_, options);
923 if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
924 create_info.hints.Add(ModelHints::kReduceKernelsCount);
925 create_info.hints.Add(ModelHints::kFastTuning);
926 } else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
927 create_info.hints.Add(ModelHints::kAllowSpecialKernels);
928 }
929 RETURN_IF_ERROR(context.InitFromGraph(create_info, model, &environment_,
930 serialized_model));
931 return absl::OkStatus();
932 }
933
NewInferenceBuilder(const InferenceOptions & options,GraphFloat32 model,std::unique_ptr<InferenceBuilder> * builder)934 absl::Status NewInferenceBuilder(
935 const InferenceOptions& options, GraphFloat32 model,
936 std::unique_ptr<InferenceBuilder>* builder) final {
937 if (!IsValid(options)) {
938 return absl::InvalidArgumentError("InferenceOptions are invalid.");
939 }
940 InferenceOptions resolved_options = options;
941 ResolveAutoPriority(&resolved_options);
942 if (environment_.program_cache() &&
943 !options_.serialized_binary_cache.empty()) {
944 // Ignore returned error. Cache is discarded.
945 environment_.program_cache()
946 ->AddSerializedCache(environment_.context(), environment_.device(),
947 options_.serialized_binary_cache)
948 .IgnoreError();
949 }
950
951 RETURN_IF_ERROR(RunGraphTransforms(&model));
952 auto builder_impl = absl::make_unique<InferenceBuilderImpl>(&environment_);
953 RETURN_IF_ERROR(
954 builder_impl->Initialize(resolved_options, options_, model));
955 *builder = std::move(builder_impl);
956 return absl::OkStatus();
957 }
958
NewInferenceBuilder(const absl::Span<const uint8_t> serialized_model,std::unique_ptr<InferenceBuilder> * builder,std::vector<int64_t> * in_refs,std::vector<int64_t> * out_refs)959 absl::Status NewInferenceBuilder(
960 const absl::Span<const uint8_t> serialized_model,
961 std::unique_ptr<InferenceBuilder>* builder, std::vector<int64_t>* in_refs,
962 std::vector<int64_t>* out_refs) final {
963 if (environment_.program_cache() &&
964 !options_.serialized_binary_cache.empty()) {
965 // Ignore returned error. Cache is discarded.
966 environment_.program_cache()
967 ->AddSerializedCache(environment_.context(), environment_.device(),
968 options_.serialized_binary_cache)
969 .IgnoreError();
970 }
971
972 auto builder_impl = absl::make_unique<InferenceBuilderImpl>(&environment_);
973 RETURN_IF_ERROR(builder_impl->Initialize(options_, serialized_model,
974 in_refs, out_refs));
975 *builder = std::move(builder_impl);
976 return absl::OkStatus();
977 }
978
GetSerializedBinaryCache() const979 std::vector<uint8_t> GetSerializedBinaryCache() const final {
980 std::vector<uint8_t> data;
981 // Is there was a problem, data would be empty.
982 environment_.program_cache()
983 ->GetSerializedCache(environment_.device(), &data)
984 .IgnoreError();
985 return data;
986 }
987
properties() const988 const InferenceEnvironmentProperties& properties() const {
989 return properties_;
990 }
991
992 private:
993 const InferenceEnvironmentOptions options_;
994 Environment environment_;
995 InferenceEnvironmentProperties properties_;
996 };
997
998 } // namespace
999
NewInferenceEnvironment(const InferenceEnvironmentOptions & options,std::unique_ptr<InferenceEnvironment> * environment,InferenceEnvironmentProperties * properties)1000 absl::Status NewInferenceEnvironment(
1001 const InferenceEnvironmentOptions& options,
1002 std::unique_ptr<InferenceEnvironment>* environment,
1003 InferenceEnvironmentProperties* properties) {
1004 auto env_impl = absl::make_unique<InferenceEnvironmentImpl>(options);
1005 absl::Status status = env_impl->Init();
1006 if (properties) {
1007 *properties = env_impl->properties();
1008 }
1009 RETURN_IF_ERROR(status);
1010 *environment = std::move(env_impl);
1011 return absl::OkStatus();
1012 }
1013
1014 } // namespace cl
1015 } // namespace gpu
1016 } // namespace tflite
1017