1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/api.h"
17 
18 #ifndef CL_DELEGATE_NO_GL
19 #define CL_DELEGATE_ALLOW_GL
20 #endif
21 
22 #include <algorithm>
23 #include <cstring>
24 
25 #include "absl/memory/memory.h"
26 #include "absl/types/span.h"
27 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
28 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
29 #include "tensorflow/lite/delegates/gpu/cl/cl_event.h"
30 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
31 #include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
32 #include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
33 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
34 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
35 #include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
36 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
37 #include "tensorflow/lite/delegates/gpu/common/precision.h"
38 #include "tensorflow/lite/delegates/gpu/common/shape.h"
39 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
40 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
41 
42 #ifdef CL_DELEGATE_ALLOW_GL
43 #include <EGL/eglext.h>
44 
45 #include "tensorflow/lite/delegates/gpu/cl/egl_sync.h"
46 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
47 #endif
48 
49 namespace tflite {
50 namespace gpu {
51 namespace cl {
52 namespace {
53 
54 // Both internal and external defs are identical, therefore nothing to connect
55 // here.
56 class NoopTensorTie : public TensorTie {
57  public:
NoopTensorTie(const TensorTieDef & def,TensorObject obj)58   NoopTensorTie(const TensorTieDef& def, TensorObject obj)
59       : TensorTie(def), obj_(obj) {}
60 
IsSupported(const TensorTieDef & def)61   static bool IsSupported(const TensorTieDef& def) {
62     return def.external_def == def.internal_def;
63   }
64 
SetExternalObject(TensorObject obj)65   absl::Status SetExternalObject(TensorObject obj) final {
66     if (!def().external_def.object_def.user_provided) {
67       return absl::InvalidArgumentError("Tensor object is readonly.");
68     }
69     if (!IsValid(def().external_def, obj)) {
70       return absl::InvalidArgumentError("Given object is not valid");
71     }
72     obj_ = obj;
73     return absl::OkStatus();
74   }
75 
GetExternalObject()76   TensorObject GetExternalObject() final { return obj_; }
77 
CopyToExternalObject()78   absl::Status CopyToExternalObject() final { return absl::OkStatus(); }
79 
CopyFromExternalObject()80   absl::Status CopyFromExternalObject() final { return absl::OkStatus(); }
81 
82  private:
83   TensorObject obj_;
84 };
85 
86 // Does one-step conversion between internal and external objects.
87 // It may also allocate external objects if requested.
88 class DefaultTensorTie : public TensorTie {
89  public:
DefaultTensorTie(const TensorTieDef & def,TensorObject internal_obj)90   DefaultTensorTie(const TensorTieDef& def, TensorObject internal_obj)
91       : TensorTie(def), internal_obj_(internal_obj) {}
92 
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)93   static bool IsSupported(
94       const TensorTieDef& def,
95       const TensorObjectConverterBuilder& converter_builder) {
96     auto object_type = def.external_def.object_def.object_type;
97 #ifdef CL_DELEGATE_ALLOW_GL
98     if (def.external_def.object_def.user_provided &&
99         GlClBufferCopier::IsSupported(def.external_def.object_def,
100                                       def.internal_def.object_def)) {
101       return true;
102     }
103 #endif
104     return (object_type == ObjectType::OPENCL_BUFFER ||
105             object_type == ObjectType::OPENCL_TEXTURE ||
106             object_type == ObjectType::CPU_MEMORY) &&
107            converter_builder.IsSupported(def.internal_def, def.external_def) &&
108            converter_builder.IsSupported(def.external_def, def.internal_def);
109   }
110 
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)111   static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
112                           TensorObjectConverterBuilder* converter_builder,
113                           Environment* env, std::unique_ptr<TensorTie>* tie) {
114     auto tie_impl = absl::make_unique<DefaultTensorTie>(def, internal_object);
115     RETURN_IF_ERROR(tie_impl->Init(converter_builder, env));
116     *tie = std::move(tie_impl);
117     return absl::OkStatus();
118   }
119 
CopyToExternalObject()120   absl::Status CopyToExternalObject() final {
121     if (!converter_to_) {
122       return absl::UnavailableError("Conversion is not available");
123     }
124     return converter_to_->Convert(internal_obj_, GetExternalObject());
125   }
126 
CopyFromExternalObject()127   absl::Status CopyFromExternalObject() final {
128     if (!converter_from_) {
129       return absl::UnavailableError("Conversion is not available");
130     }
131     return converter_from_->Convert(GetExternalObject(), internal_obj_);
132   }
133 
SetExternalObject(TensorObject obj)134   absl::Status SetExternalObject(TensorObject obj) final {
135     if (!def().external_def.object_def.user_provided) {
136       return absl::InvalidArgumentError("External object is read-only");
137     }
138     if (!IsValid(def().external_def, obj)) {
139       return absl::InvalidArgumentError("Given object is not valid");
140     }
141     external_obj_ = obj;
142     return absl::OkStatus();
143   }
144 
GetExternalObject()145   TensorObject GetExternalObject() final { return external_obj_; }
146 
147  private:
Init(TensorObjectConverterBuilder * converter_builder,Environment * env)148   absl::Status Init(TensorObjectConverterBuilder* converter_builder,
149                     Environment* env) {
150 #ifdef CL_DELEGATE_ALLOW_GL
151     if (def().external_def.object_def.user_provided &&
152         GlClBufferCopier::IsSupported(def().external_def.object_def,
153                                       def().internal_def.object_def)) {
154       converter_from_ = absl::make_unique<GlClBufferCopier>(
155           def().internal_def, def().external_def, env);
156     } else {
157       RETURN_IF_ERROR(converter_builder->MakeConverter(
158           def().external_def, def().internal_def, &converter_from_));
159     }
160     if (def().external_def.object_def.user_provided &&
161         GlClBufferCopier::IsSupported(def().internal_def.object_def,
162                                       def().external_def.object_def)) {
163       converter_to_ = absl::make_unique<GlClBufferCopier>(
164           def().internal_def, def().external_def, env);
165     } else {
166       RETURN_IF_ERROR(converter_builder->MakeConverter(
167           def().internal_def, def().external_def, &converter_to_));
168     }
169 #else
170     RETURN_IF_ERROR(converter_builder->MakeConverter(
171         def().external_def, def().internal_def, &converter_from_));
172     RETURN_IF_ERROR(converter_builder->MakeConverter(
173         def().internal_def, def().external_def, &converter_to_));
174 #endif
175     return MaybeAllocateExternalObject(env);
176   }
177 
MaybeAllocateExternalObject(Environment * env)178   absl::Status MaybeAllocateExternalObject(Environment* env) {
179     const TensorObjectDef& d = def().external_def;
180     if (d.object_def.user_provided) {
181       return absl::OkStatus();
182     }
183     switch (d.object_def.object_type) {
184       case ObjectType::CPU_MEMORY: {
185         size_t bytes_size = NumElements(d) * SizeOf(d.object_def.data_type);
186         cpu_memory_.resize(bytes_size);
187         external_obj_ = CpuMemory{cpu_memory_.data(), cpu_memory_.size()};
188         break;
189       }
190       case ObjectType::OPENCL_TEXTURE:
191       case ObjectType::OPENCL_BUFFER: {
192         auto& dims = d.dimensions;
193         const BHWC shape(dims.b, dims.h, dims.w, dims.c);
194         const TensorDescriptor desc{
195             d.object_def.data_type,
196             ToTensorStorageType(d.object_def.object_type,
197                                 d.object_def.data_layout),
198             Layout::BHWC};
199         RETURN_IF_ERROR(
200             AllocateTensorMemory(env->context(), shape, desc, &cl_memory_));
201         if (d.object_def.object_type == ObjectType::OPENCL_TEXTURE) {
202           external_obj_ = OpenClTexture{cl_memory_.memory()};
203         } else {
204           external_obj_ = OpenClBuffer{cl_memory_.memory()};
205         }
206         break;
207       }
208       default:
209         return absl::InternalError("Unexpected object type");
210     }
211     return absl::OkStatus();
212   }
213 
214   const TensorObject internal_obj_;
215   TensorObject external_obj_;
216   CLMemory cl_memory_;
217   std::vector<uint8_t> cpu_memory_;
218   std::unique_ptr<TensorObjectConverter> converter_to_;
219   std::unique_ptr<TensorObjectConverter> converter_from_;
220 };
221 
222 // Copies data to intermediate OpenCL buffer and then does two step conversion.
223 // It drives the following cases were one-step conversion is not supported:
224 //   - CPU BHWC -> CL buffer BHWC -> CL texture DHWC4.
225 class TwoStepTensorTie : public TensorTie {
226  public:
TwoStepTensorTie(const TensorTieDef & def)227   explicit TwoStepTensorTie(const TensorTieDef& def) : TensorTie(def) {}
228 
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)229   static bool IsSupported(
230       const TensorTieDef& def,
231       const TensorObjectConverterBuilder& converter_builder) {
232     auto defs = MakeOuterInnerDefs(def);
233     return DefaultTensorTie::IsSupported(defs.first, converter_builder) &&
234            DefaultTensorTie::IsSupported(defs.second, converter_builder);
235   }
236 
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)237   static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
238                           TensorObjectConverterBuilder* converter_builder,
239                           Environment* env, std::unique_ptr<TensorTie>* tie) {
240     auto tie_impl = absl::make_unique<TwoStepTensorTie>(def);
241     RETURN_IF_ERROR(tie_impl->Init(internal_object, converter_builder, env));
242     *tie = std::move(tie_impl);
243     return absl::OkStatus();
244   }
245 
CopyToExternalObject()246   absl::Status CopyToExternalObject() final {
247     RETURN_IF_ERROR(inner_tie_->CopyToExternalObject());
248     return outer_tie_->CopyToExternalObject();
249   }
250 
CopyFromExternalObject()251   absl::Status CopyFromExternalObject() final {
252     RETURN_IF_ERROR(outer_tie_->CopyFromExternalObject());
253     return inner_tie_->CopyFromExternalObject();
254   }
255 
SetExternalObject(TensorObject obj)256   absl::Status SetExternalObject(TensorObject obj) final {
257     return outer_tie_->SetExternalObject(obj);
258   }
259 
GetExternalObject()260   TensorObject GetExternalObject() final {
261     return outer_tie_->GetExternalObject();
262   }
263 
264  private:
MakeOuterInnerDefs(const TensorTieDef & def)265   static std::pair<TensorTieDef, TensorTieDef> MakeOuterInnerDefs(
266       const TensorTieDef& def) {
267     TensorTieDef outer_def;
268     outer_def.external_def = def.external_def;
269     outer_def.internal_def = def.external_def;
270     outer_def.internal_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
271     outer_def.internal_def.object_def.user_provided = true;
272 
273     TensorTieDef inner_def;
274     inner_def.external_def = outer_def.internal_def;
275     inner_def.external_def.object_def.user_provided = false;
276     inner_def.internal_def = def.internal_def;
277     return std::make_pair(outer_def, inner_def);
278   }
279 
Init(TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env)280   absl::Status Init(TensorObject internal_object,
281                     TensorObjectConverterBuilder* converter_builder,
282                     Environment* env) {
283     auto defs = MakeOuterInnerDefs(def());
284     RETURN_IF_ERROR(DefaultTensorTie::New(defs.second, internal_object,
285                                           converter_builder, env, &inner_tie_));
286     return DefaultTensorTie::New(defs.first, inner_tie_->GetExternalObject(),
287                                  converter_builder, env, &outer_tie_);
288   }
289 
290   std::unique_ptr<TensorTie> inner_tie_;
291   std::unique_ptr<TensorTie> outer_tie_;
292 };
293 
294 #ifdef CL_DELEGATE_ALLOW_GL
295 // Captures GL object into CL context before performing a conversion.
296 class GlBufferHolder : public TensorTie {
297  public:
GlBufferHolder(const TensorTieDef & def,GlInteropFabric * gl_interop_fabric,Environment * env)298   GlBufferHolder(const TensorTieDef& def, GlInteropFabric* gl_interop_fabric,
299                  Environment* env)
300       : TensorTie(def),
301         gl_interop_fabric_(gl_interop_fabric),
302         environment_(env) {}
303 
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)304   static bool IsSupported(
305       const TensorTieDef& def,
306       const TensorObjectConverterBuilder& converter_builder) {
307     if (!def.external_def.object_def.user_provided ||
308         def.external_def.object_def.object_type != ObjectType::OPENGL_SSBO) {
309       return false;
310     }
311     return DefaultTensorTie::IsSupported(MakeClDef(def), converter_builder);
312   }
313 
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,GlInteropFabric * gl_interop_fabric,Environment * env,std::unique_ptr<TensorTie> * tie)314   static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
315                           TensorObjectConverterBuilder* converter_builder,
316                           GlInteropFabric* gl_interop_fabric, Environment* env,
317                           std::unique_ptr<TensorTie>* tie) {
318     auto tie_impl =
319         absl::make_unique<GlBufferHolder>(def, gl_interop_fabric, env);
320     RETURN_IF_ERROR(DefaultTensorTie::New(MakeClDef(def), internal_object,
321                                           converter_builder, env,
322                                           &tie_impl->tie_));
323     *tie = std::move(tie_impl);
324     return absl::OkStatus();
325   }
326 
SetExternalObject(TensorObject obj)327   absl::Status SetExternalObject(TensorObject obj) final {
328     auto ssbo = absl::get_if<OpenGlBuffer>(&obj);
329     if (!ssbo) {
330       return absl::InvalidArgumentError("Missing OpenGL SSBO");
331     }
332     auto old_ssbo = absl::get_if<OpenGlBuffer>(&external_obj_);
333     if (old_ssbo && ssbo->id == old_ssbo->id) {
334       return absl::OkStatus();
335     }
336     if (cl_object_.memory()) {
337       gl_interop_fabric_->UnregisterMemory(cl_object_.memory());
338     }
339     RETURN_IF_ERROR(CreateClMemoryFromGlBuffer(
340         ssbo->id, def().access_type, &environment_->context(), &cl_object_));
341     external_obj_ = obj;
342     RETURN_IF_ERROR(tie_->SetExternalObject(OpenClBuffer{cl_object_.memory()}));
343     gl_interop_fabric_->RegisterMemory(cl_object_.memory());
344     return absl::OkStatus();
345   }
346 
GetExternalObject()347   TensorObject GetExternalObject() final { return external_obj_; }
348 
CopyFromExternalObject()349   absl::Status CopyFromExternalObject() final {
350     return tie_->CopyFromExternalObject();
351   }
352 
CopyToExternalObject()353   absl::Status CopyToExternalObject() final {
354     return tie_->CopyToExternalObject();
355   }
356 
357  private:
MakeClDef(const TensorTieDef & def)358   static TensorTieDef MakeClDef(const TensorTieDef& def) {
359     auto cl_def = def;
360     cl_def.external_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
361     cl_def.external_def.object_def.user_provided = true;
362     return cl_def;
363   }
364 
365   CLMemory cl_object_;
366   GlInteropFabric* gl_interop_fabric_;
367   Environment* environment_;
368   std::unique_ptr<TensorTie> tie_;
369   TensorObject external_obj_;
370 };
371 #endif
372 
TensorToObj(const Tensor & tensor)373 TensorObject TensorToObj(const Tensor& tensor) {
374   if (tensor.GetStorageType() == TensorStorageType::BUFFER) {
375     return OpenClBuffer{tensor.GetMemoryPtr()};
376   }
377   if (tensor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
378     return OpenClBuffer{tensor.GetMemoryPtrForWriting()};
379   }
380   return OpenClTexture{tensor.GetMemoryPtr()};
381 }
382 
383 // Responsible for creating new tensor objects.
384 class TensorTieFactory {
385  public:
TensorTieFactory(Environment * env,InferenceContext * context,GlInteropFabric * gl_interop_fabric)386   TensorTieFactory(Environment* env, InferenceContext* context
387 #ifdef CL_DELEGATE_ALLOW_GL
388                    ,
389                    GlInteropFabric* gl_interop_fabric
390 #endif
391                    )
392       : env_(*env),
393         context_(*context),
394 #ifdef CL_DELEGATE_ALLOW_GL
395         gl_interop_fabric_(gl_interop_fabric),
396 #endif
397         converter_builder_(NewConverterBuilder(env)) {
398   }
399 
IsSupported(const TensorTieDef & def) const400   bool IsSupported(const TensorTieDef& def) const {
401     return IsValid(def.external_def.object_def) &&
402            (NoopTensorTie::IsSupported(def) ||
403             DefaultTensorTie::IsSupported(def, *converter_builder_) ||
404 #ifdef CL_DELEGATE_ALLOW_GL
405             (gl_interop_fabric_ &&
406              GlBufferHolder::IsSupported(def, *converter_builder_)) ||
407 #endif
408             TwoStepTensorTie::IsSupported(def, *converter_builder_));
409   }
410 
NewTensorTie(const TensorTieDef & def,std::unique_ptr<TensorTie> * tie)411   absl::Status NewTensorTie(const TensorTieDef& def,
412                             std::unique_ptr<TensorTie>* tie) {
413     TensorObject internal_object = TensorToObj(*context_.GetTensor(def.id));
414     auto converter = converter_builder_.get();
415     if (NoopTensorTie::IsSupported(def)) {
416       *tie = absl::make_unique<NoopTensorTie>(def, internal_object);
417       return absl::OkStatus();
418     }
419     if (DefaultTensorTie::IsSupported(def, *converter)) {
420       return DefaultTensorTie::New(def, internal_object, converter, &env_, tie);
421     }
422 #ifdef CL_DELEGATE_ALLOW_GL
423     if (gl_interop_fabric_ && GlBufferHolder::IsSupported(def, *converter)) {
424       return GlBufferHolder::New(def, internal_object, converter,
425                                  gl_interop_fabric_, &env_, tie);
426     }
427 #endif
428     if (TwoStepTensorTie::IsSupported(def, *converter)) {
429       return TwoStepTensorTie::New(def, internal_object, converter, &env_, tie);
430     }
431     return absl::UnimplementedError("Unsupported tensor tie definition.");
432   }
433 
434  private:
435   Environment& env_;
436   InferenceContext& context_;
437 #ifdef CL_DELEGATE_ALLOW_GL
438   GlInteropFabric* gl_interop_fabric_;
439 #endif
440   std::unique_ptr<TensorObjectConverterBuilder> converter_builder_;
441 };
442 
443 class InferenceRunnerImpl : public CLInferenceRunner {
444  public:
InferenceRunnerImpl(Environment * environment,std::unique_ptr<InferenceContext> context,std::unique_ptr<GlInteropFabric> gl_interop_fabric)445   InferenceRunnerImpl(Environment* environment,
446                       std::unique_ptr<InferenceContext> context
447 #ifdef CL_DELEGATE_ALLOW_GL
448                       ,
449                       std::unique_ptr<GlInteropFabric> gl_interop_fabric
450 #endif
451                       )
452       : queue_(environment->queue()),
453         context_(std::move(context))
454 #ifdef CL_DELEGATE_ALLOW_GL
455         ,
456         gl_interop_fabric_(std::move(gl_interop_fabric))
457 #endif
458   {
459   }
460 
Initialize(const std::vector<TensorTieDef> & inputs,const std::vector<TensorTieDef> & outputs,TensorTieFactory * factory)461   absl::Status Initialize(const std::vector<TensorTieDef>& inputs,
462                           const std::vector<TensorTieDef>& outputs,
463                           TensorTieFactory* factory) {
464     RETURN_IF_ERROR(LinkTensors(inputs, factory, &inputs_));
465     return LinkTensors(outputs, factory, &outputs_);
466   }
467 
inputs() const468   std::vector<TensorObjectDef> inputs() const override {
469     return GetExternalDefinitions(inputs_);
470   }
471 
outputs() const472   std::vector<TensorObjectDef> outputs() const override {
473     return GetExternalDefinitions(outputs_);
474   }
475 
GetInputObject(int index,TensorObject * object)476   absl::Status GetInputObject(int index, TensorObject* object) override {
477     if (index < 0 || index >= inputs_.size()) {
478       return absl::OutOfRangeError("Index is out of range");
479     }
480     *object = inputs_[index]->GetExternalObject();
481     return absl::OkStatus();
482   }
483 
GetOutputObject(int index,TensorObject * object)484   absl::Status GetOutputObject(int index, TensorObject* object) override {
485     if (index < 0 || index >= outputs_.size()) {
486       return absl::OutOfRangeError("Index is out of range");
487     }
488     *object = outputs_[index]->GetExternalObject();
489     return absl::OkStatus();
490   }
491 
SetInputObject(int index,TensorObject object)492   absl::Status SetInputObject(int index, TensorObject object) override {
493     if (index < 0 || index >= inputs_.size()) {
494       return absl::OutOfRangeError("Input index is out of range");
495     }
496     return inputs_[index]->SetExternalObject(object);
497   }
498 
SetOutputObject(int index,TensorObject object)499   absl::Status SetOutputObject(int index, TensorObject object) override {
500     if (index < 0 || index >= outputs_.size()) {
501       return absl::OutOfRangeError("Output index is out of range");
502     }
503     return outputs_[index]->SetExternalObject(object);
504   }
505 
CopyFromExternalInput(int index)506   absl::Status CopyFromExternalInput(int index) override {
507     if (index > inputs_.size()) {
508       return absl::NotFoundError(
509           absl::StrCat("Input id ", index, " is an invalid input index."));
510     }
511     return inputs_[index]->CopyFromExternalObject();
512   }
513 
CopyToExternalOutput(int index)514   absl::Status CopyToExternalOutput(int index) override {
515     if (index > outputs_.size()) {
516       return absl::NotFoundError(
517           absl::StrCat("Output id ", index, " is an invalid output index"));
518     }
519     return outputs_[index]->CopyToExternalObject();
520   }
521 
Run()522   absl::Status Run() override {
523 #ifdef CL_DELEGATE_ALLOW_GL
524     if (gl_interop_fabric_) {
525       RETURN_IF_ERROR(gl_interop_fabric_->Start());
526     }
527 #endif
528     for (int i = 0; i < inputs_.size(); i++) {
529       RETURN_IF_ERROR(CopyFromExternalInput(i));
530     }
531 
532     RETURN_IF_ERROR(RunWithoutExternalBufferCopy());
533 
534     for (int i = 0; i < outputs_.size(); i++) {
535       RETURN_IF_ERROR(CopyToExternalOutput(i));
536     }
537 #ifdef CL_DELEGATE_ALLOW_GL
538     if (gl_interop_fabric_) {
539       RETURN_IF_ERROR(gl_interop_fabric_->Finish());
540     }
541 #endif
542     return absl::OkStatus();
543   }
544 
RunWithoutExternalBufferCopy()545   absl::Status RunWithoutExternalBufferCopy() override {
546     RETURN_IF_ERROR(context_->AddToQueue(queue_));
547     clFlush(queue_->queue());
548 
549     return absl::OkStatus();
550   }
551 
552  private:
LinkTensors(const std::vector<TensorTieDef> & defs,TensorTieFactory * factory,std::vector<std::unique_ptr<TensorTie>> * objects)553   static absl::Status LinkTensors(
554       const std::vector<TensorTieDef>& defs, TensorTieFactory* factory,
555       std::vector<std::unique_ptr<TensorTie>>* objects) {
556     objects->reserve(defs.size());
557     for (auto& def : defs) {
558       std::unique_ptr<TensorTie> object;
559       RETURN_IF_ERROR(factory->NewTensorTie(def, &object));
560       objects->push_back(std::move(object));
561     }
562     return absl::OkStatus();
563   }
564 
GetExternalDefinitions(const std::vector<std::unique_ptr<TensorTie>> & objects)565   static std::vector<TensorObjectDef> GetExternalDefinitions(
566       const std::vector<std::unique_ptr<TensorTie>>& objects) {
567     std::vector<TensorObjectDef> defs;
568     defs.reserve(objects.size());
569     for (auto& obj : objects) {
570       defs.push_back(obj->def().external_def);
571     }
572     return defs;
573   }
574 
575   CLCommandQueue* queue_;
576   std::unique_ptr<InferenceContext> context_;
577 #ifdef CL_DELEGATE_ALLOW_GL
578   std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
579 #endif
580   std::vector<std::unique_ptr<TensorTie>> inputs_;
581   std::vector<std::unique_ptr<TensorTie>> outputs_;
582 };
583 
TensorToDef(const Tensor & tensor)584 TensorObjectDef TensorToDef(const Tensor& tensor) {
585   TensorObjectDef def;
586   def.dimensions.b = tensor.Batch();
587   def.dimensions.h = tensor.Height();
588   def.dimensions.w = tensor.Width();
589   def.dimensions.c = tensor.Channels();
590   def.object_def.data_layout = ToDataLayout(tensor.GetStorageType());
591   def.object_def.data_type = tensor.GetDataType();
592   def.object_def.object_type = ToObjectType(tensor.GetStorageType());
593   def.object_def.user_provided = false;
594   return def;
595 }
596 
GetPrecision(const Environment & env,const InferenceOptions & options)597 CalculationsPrecision GetPrecision(const Environment& env,
598                                    const InferenceOptions& options) {
599   CalculationsPrecision precision;
600   switch (GetPosition(options, InferencePriority::MAX_PRECISION)) {
601     case 1:
602       precision = CalculationsPrecision::F32;
603       break;
604     case 2:
605       precision = CalculationsPrecision::F32_F16;
606       break;
607     case 3:
608       precision = CalculationsPrecision::F16;
609       break;
610     default:
611       precision = CalculationsPrecision::F16;
612       break;
613   }
614   // Increase precision if lower precision is not supported.
615   if (!env.IsSupported(precision)) {
616     precision = CalculationsPrecision::F32_F16;
617     if (!env.IsSupported(precision)) {
618       precision = CalculationsPrecision::F32;
619     }
620   }
621   return precision;
622 }
623 
GetStorageTypeFromOptions(const Environment & env,const InferenceOptions & options)624 TensorStorageType GetStorageTypeFromOptions(const Environment& env,
625                                             const InferenceOptions& options) {
626   // Fallback to BUFFER that should be supported by default.
627   std::vector<TensorStorageType> preferred_storage_types;
628   if (GetRelativeImportance(options, InferencePriority::MIN_LATENCY,
629                             InferencePriority::MIN_MEMORY_USAGE) ==
630       PriorityImportance::HIGHER) {
631     preferred_storage_types = {GetFastestStorageType(env.device().GetInfo()),
632                                TensorStorageType::BUFFER};
633   } else {
634     preferred_storage_types = {
635         GetStorageTypeWithMinimalMemoryConsumption(env.device().GetInfo()),
636         TensorStorageType::BUFFER};
637   }
638 
639   for (TensorStorageType storage_type : preferred_storage_types) {
640     if (env.IsSupported(storage_type)) {
641       return storage_type;
642     }
643   }
644   return TensorStorageType::UNKNOWN;
645 }
646 
647 class InferenceBuilderImpl : public InferenceBuilder {
648  public:
InferenceBuilderImpl(Environment * environment)649   explicit InferenceBuilderImpl(Environment* environment)
650       : environment_(environment) {}
651 
Initialize(const InferenceOptions & options,const InferenceEnvironmentOptions & env_options,const GraphFloat32 & graph)652   absl::Status Initialize(const InferenceOptions& options,
653                           const InferenceEnvironmentOptions& env_options,
654                           const GraphFloat32& graph) {
655     context_ = absl::make_unique<InferenceContext>();
656     InferenceContext::CreateInferenceInfo create_info;
657     create_info.precision = GetPrecision(*environment_, options);
658     create_info.storage_type =
659         GetStorageTypeFromOptions(*environment_, options);
660     if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
661       create_info.hints.Add(ModelHints::kReduceKernelsCount);
662       create_info.hints.Add(ModelHints::kFastTuning);
663     } else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
664       create_info.hints.Add(ModelHints::kAllowSpecialKernels);
665     }
666     RETURN_IF_ERROR(context_->InitFromGraph(create_info, graph, environment_));
667 
668 #ifdef CL_DELEGATE_ALLOW_GL
669     if (env_options.IsGlAware() &&
670         IsGlSharingSupported(environment_->device())) {
671       gl_interop_fabric_ = absl::make_unique<GlInteropFabric>(
672           env_options.egl_display, environment_);
673     }
674     tie_factory_ = absl::make_unique<TensorTieFactory>(
675         environment_, context_.get(), gl_interop_fabric_.get());
676 #else
677     tie_factory_ =
678         absl::make_unique<TensorTieFactory>(environment_, context_.get());
679 #endif
680 
681     inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
682     outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
683     return absl::OkStatus();
684   }
685 
Initialize(const InferenceEnvironmentOptions & env_options,const absl::Span<const uint8_t> serialized_model,std::vector<int64_t> * in_refs=nullptr,std::vector<int64_t> * out_refs=nullptr)686   absl::Status Initialize(const InferenceEnvironmentOptions& env_options,
687                           const absl::Span<const uint8_t> serialized_model,
688                           std::vector<int64_t>* in_refs = nullptr,
689                           std::vector<int64_t>* out_refs = nullptr) {
690     context_ = absl::make_unique<InferenceContext>();
691     RETURN_IF_ERROR(
692         context_->RestoreDeserialized(serialized_model, environment_));
693 
694 #ifdef CL_DELEGATE_ALLOW_GL
695     if (env_options.IsGlAware() &&
696         IsGlSharingSupported(environment_->device())) {
697       gl_interop_fabric_ = absl::make_unique<GlInteropFabric>(
698           env_options.egl_display, environment_);
699     }
700     tie_factory_ = absl::make_unique<TensorTieFactory>(
701         environment_, context_.get(), gl_interop_fabric_.get());
702 #else
703     tie_factory_ =
704         absl::make_unique<TensorTieFactory>(environment_, context_.get());
705 #endif
706 
707     inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
708     outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
709     if (in_refs) {
710       *in_refs = context_->GetInputRefs();
711     }
712     if (out_refs) {
713       *out_refs = context_->GetOutputRefs();
714     }
715     return absl::OkStatus();
716   }
717 
inputs() const718   std::vector<TensorObjectDef> inputs() const override {
719     return GetExternalDefinitions(inputs_);
720   }
721 
outputs() const722   std::vector<TensorObjectDef> outputs() const override {
723     return GetExternalDefinitions(outputs_);
724   }
725 
SetInputShape(int index,const Dimensions & dimensions)726   absl::Status SetInputShape(int index, const Dimensions& dimensions) override {
727     if (index < 0 || index >= inputs_.size()) {
728       return absl::OutOfRangeError("Index is out of range");
729     }
730     return absl::UnimplementedError("Changing input shapes is not supported");
731   }
732 
SetInputObjectDef(int index,ObjectDef new_def)733   absl::Status SetInputObjectDef(int index, ObjectDef new_def) override {
734     if (index < 0 || index >= inputs_.size()) {
735       return absl::OutOfRangeError("Input index is out of range");
736     }
737     auto def = inputs_[index];
738     def.external_def.object_def = new_def;
739     if (!tie_factory_->IsSupported(def)) {
740       return absl::InvalidArgumentError(
741           "New input object definition is not supported.");
742     }
743     inputs_[index] = def;
744     return absl::OkStatus();
745   }
746 
SetOutputObjectDef(int index,ObjectDef new_def)747   absl::Status SetOutputObjectDef(int index, ObjectDef new_def) override {
748     if (index < 0 || index >= outputs_.size()) {
749       return absl::OutOfRangeError("Output index is out of range");
750     }
751     auto def = outputs_[index];
752     def.external_def.object_def = new_def;
753     if (!tie_factory_->IsSupported(def)) {
754       return absl::InvalidArgumentError(
755           "New output object definition is not supported.");
756     }
757     outputs_[index] = def;
758     return absl::OkStatus();
759   }
760 
Build(std::unique_ptr<InferenceRunner> * runner)761   absl::Status Build(std::unique_ptr<InferenceRunner>* runner) override {
762 #ifdef CL_DELEGATE_ALLOW_GL
763     if (gl_interop_fabric_ && !HasGlObjects()) {
764       // destroy interop layer when there are no GL objects to avoid
765       // extra synchronization cost.
766       gl_interop_fabric_.reset(nullptr);
767     }
768     auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
769         environment_, std::move(context_), std::move(gl_interop_fabric_));
770 #else
771     auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
772         environment_, std::move(context_));
773 #endif
774     RETURN_IF_ERROR(
775         runner_impl->Initialize(inputs_, outputs_, tie_factory_.get()));
776     *runner = std::move(runner_impl);
777     return absl::OkStatus();
778   }
779 
780  private:
781   // Links internal tensors with external user-facing objects.
LinkTensors(const std::vector<ValueId> & ids,AccessType access)782   std::vector<TensorTieDef> LinkTensors(const std::vector<ValueId>& ids,
783                                         AccessType access) {
784     std::vector<TensorTieDef> links;
785     links.reserve(ids.size());
786     for (const auto& id : ids) {
787       TensorObjectDef def = TensorToDef(*context_->GetTensor(id));
788       links.push_back({id, access, def, def});
789     }
790     return links;
791   }
792 
HasGlObjects() const793   bool HasGlObjects() const {
794 #ifdef CL_DELEGATE_ALLOW_GL
795     auto is_gl = [](ObjectType t) {
796       return t == ObjectType::OPENGL_SSBO || t == ObjectType::OPENGL_TEXTURE;
797     };
798     for (const TensorTieDef& def : inputs_) {
799       if (is_gl(def.external_def.object_def.object_type)) {
800         return true;
801       }
802     }
803     for (const TensorTieDef& def : outputs_) {
804       if (is_gl(def.external_def.object_def.object_type)) {
805         return true;
806       }
807     }
808 #endif
809     return false;
810   }
811 
GetExternalDefinitions(const std::vector<TensorTieDef> & links)812   static std::vector<TensorObjectDef> GetExternalDefinitions(
813       const std::vector<TensorTieDef>& links) {
814     std::vector<TensorObjectDef> defs;
815     defs.reserve(links.size());
816     for (auto& desc : links) {
817       defs.push_back(desc.external_def);
818     }
819     return defs;
820   }
821 
822   std::unique_ptr<InferenceContext> context_;
823 #ifdef CL_DELEGATE_ALLOW_GL
824   std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
825 #endif
826   Environment* environment_;
827 
828   std::vector<TensorTieDef> inputs_;
829   std::vector<TensorTieDef> outputs_;
830   std::unique_ptr<TensorTieFactory> tie_factory_;
831 };
832 
833 class InferenceEnvironmentImpl : public InferenceEnvironment {
834  public:
InferenceEnvironmentImpl(const InferenceEnvironmentOptions & options)835   explicit InferenceEnvironmentImpl(const InferenceEnvironmentOptions& options)
836       : options_(options) {}
837 
Init()838   absl::Status Init() {
839     RETURN_IF_ERROR(LoadOpenCL());
840     properties_.is_opencl_available = true;
841 
842     CLDevice device;
843     if (options_.device) {
844       cl_platform_id platform;
845       RETURN_IF_ERROR(GetDeviceInfo<cl_platform_id>(
846           options_.device, CL_DEVICE_PLATFORM, &platform));
847       device = CLDevice(options_.device, platform);
848     } else {
849       RETURN_IF_ERROR(CreateDefaultGPUDevice(&device));
850     }
851 
852 #ifdef CL_DELEGATE_ALLOW_GL
853     properties_.is_gl_sharing_supported = IsGlSharingSupported(device);
854     properties_.is_gl_to_cl_fast_sync_supported =
855         IsClEventFromEglSyncSupported(device);
856     properties_.is_cl_to_gl_fast_sync_supported =
857         IsEglSyncFromClEventSupported();
858 #endif
859 
860     CLContext context;
861     if (options_.context) {
862 #ifdef CL_DELEGATE_ALLOW_GL
863       if (options_.IsGlAware()) {
864         return absl::InvalidArgumentError(
865             "OpenCL context and EGL parameters are set in the same time.");
866       }
867 #endif
868       context = CLContext(options_.context, /* has_ownership = */ false);
869     } else {
870 #ifdef CL_DELEGATE_ALLOW_GL
871       if (options_.IsGlAware() && properties_.is_gl_sharing_supported) {
872         RETURN_IF_ERROR(CreateCLGLContext(
873             device,
874             reinterpret_cast<cl_context_properties>(options_.egl_context),
875             reinterpret_cast<cl_context_properties>(options_.egl_display),
876             &context));
877       } else {
878         RETURN_IF_ERROR(CreateCLContext(device, &context));
879       }
880 #else
881       RETURN_IF_ERROR(CreateCLContext(device, &context));
882 #endif
883     }
884 
885     CLCommandQueue queue;
886     if (options_.command_queue) {
887       queue =
888           CLCommandQueue(options_.command_queue, /* has_ownership = */ false);
889     } else {
890       RETURN_IF_ERROR(CreateCLCommandQueue(device, context, &queue));
891     }
892     // Profiling queue is used for workgroup size tuning.
893     ProfilingCommandQueue profiling_queue;
894     RETURN_IF_ERROR(
895         CreateProfilingCommandQueue(device, context, &profiling_queue));
896     environment_ = Environment(std::move(device), std::move(context),
897                                std::move(queue), std::move(profiling_queue));
898     return environment_.Init();
899   }
900 
BuildSerializedModel(const InferenceOptions & options,GraphFloat32 model,std::vector<uint8_t> * serialized_model)901   absl::Status BuildSerializedModel(
902       const InferenceOptions& options, GraphFloat32 model,
903       std::vector<uint8_t>* serialized_model) final {
904     if (!IsValid(options)) {
905       return absl::InvalidArgumentError("InferenceOptions are invalid.");
906     }
907     InferenceOptions resolved_options = options;
908     ResolveAutoPriority(&resolved_options);
909     if (environment_.program_cache() &&
910         !options_.serialized_binary_cache.empty()) {
911       // Ignore returned error. Cache is discarded.
912       environment_.program_cache()
913           ->AddSerializedCache(environment_.context(), environment_.device(),
914                                options_.serialized_binary_cache)
915           .IgnoreError();
916     }
917 
918     RETURN_IF_ERROR(RunGraphTransforms(&model));
919     InferenceContext context;
920     InferenceContext::CreateInferenceInfo create_info;
921     create_info.precision = GetPrecision(environment_, options);
922     create_info.storage_type = GetStorageTypeFromOptions(environment_, options);
923     if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
924       create_info.hints.Add(ModelHints::kReduceKernelsCount);
925       create_info.hints.Add(ModelHints::kFastTuning);
926     } else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
927       create_info.hints.Add(ModelHints::kAllowSpecialKernels);
928     }
929     RETURN_IF_ERROR(context.InitFromGraph(create_info, model, &environment_,
930                                           serialized_model));
931     return absl::OkStatus();
932   }
933 
NewInferenceBuilder(const InferenceOptions & options,GraphFloat32 model,std::unique_ptr<InferenceBuilder> * builder)934   absl::Status NewInferenceBuilder(
935       const InferenceOptions& options, GraphFloat32 model,
936       std::unique_ptr<InferenceBuilder>* builder) final {
937     if (!IsValid(options)) {
938       return absl::InvalidArgumentError("InferenceOptions are invalid.");
939     }
940     InferenceOptions resolved_options = options;
941     ResolveAutoPriority(&resolved_options);
942     if (environment_.program_cache() &&
943         !options_.serialized_binary_cache.empty()) {
944       // Ignore returned error. Cache is discarded.
945       environment_.program_cache()
946           ->AddSerializedCache(environment_.context(), environment_.device(),
947                                options_.serialized_binary_cache)
948           .IgnoreError();
949     }
950 
951     RETURN_IF_ERROR(RunGraphTransforms(&model));
952     auto builder_impl = absl::make_unique<InferenceBuilderImpl>(&environment_);
953     RETURN_IF_ERROR(
954         builder_impl->Initialize(resolved_options, options_, model));
955     *builder = std::move(builder_impl);
956     return absl::OkStatus();
957   }
958 
NewInferenceBuilder(const absl::Span<const uint8_t> serialized_model,std::unique_ptr<InferenceBuilder> * builder,std::vector<int64_t> * in_refs,std::vector<int64_t> * out_refs)959   absl::Status NewInferenceBuilder(
960       const absl::Span<const uint8_t> serialized_model,
961       std::unique_ptr<InferenceBuilder>* builder, std::vector<int64_t>* in_refs,
962       std::vector<int64_t>* out_refs) final {
963     if (environment_.program_cache() &&
964         !options_.serialized_binary_cache.empty()) {
965       // Ignore returned error. Cache is discarded.
966       environment_.program_cache()
967           ->AddSerializedCache(environment_.context(), environment_.device(),
968                                options_.serialized_binary_cache)
969           .IgnoreError();
970     }
971 
972     auto builder_impl = absl::make_unique<InferenceBuilderImpl>(&environment_);
973     RETURN_IF_ERROR(builder_impl->Initialize(options_, serialized_model,
974                                              in_refs, out_refs));
975     *builder = std::move(builder_impl);
976     return absl::OkStatus();
977   }
978 
GetSerializedBinaryCache() const979   std::vector<uint8_t> GetSerializedBinaryCache() const final {
980     std::vector<uint8_t> data;
981     // Is there was a problem, data would be empty.
982     environment_.program_cache()
983         ->GetSerializedCache(environment_.device(), &data)
984         .IgnoreError();
985     return data;
986   }
987 
properties() const988   const InferenceEnvironmentProperties& properties() const {
989     return properties_;
990   }
991 
992  private:
993   const InferenceEnvironmentOptions options_;
994   Environment environment_;
995   InferenceEnvironmentProperties properties_;
996 };
997 
998 }  // namespace
999 
NewInferenceEnvironment(const InferenceEnvironmentOptions & options,std::unique_ptr<InferenceEnvironment> * environment,InferenceEnvironmentProperties * properties)1000 absl::Status NewInferenceEnvironment(
1001     const InferenceEnvironmentOptions& options,
1002     std::unique_ptr<InferenceEnvironment>* environment,
1003     InferenceEnvironmentProperties* properties) {
1004   auto env_impl = absl::make_unique<InferenceEnvironmentImpl>(options);
1005   absl::Status status = env_impl->Init();
1006   if (properties) {
1007     *properties = env_impl->properties();
1008   }
1009   RETURN_IF_ERROR(status);
1010   *environment = std::move(env_impl);
1011   return absl::OkStatus();
1012 }
1013 
1014 }  // namespace cl
1015 }  // namespace gpu
1016 }  // namespace tflite
1017