1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_
18 
19 #include <stdint.h>
20 
21 #include "tensorflow/lite/c/common.h"
22 
23 #ifdef __cplusplus
24 extern "C" {
25 #endif  // __cplusplus
26 
27 // Encapsulated compilation/runtime tradeoffs.
28 enum TfLiteGpuInferenceUsage {
29   // Delegate will be used only once, therefore, bootstrap/init time should
30   // be taken into account.
31   TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER = 0,
32 
33   // Prefer maximizing the throughput. Same delegate will be used repeatedly on
34   // multiple inputs.
35   TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED = 1,
36 };
37 
38 enum TfLiteGpuInferencePriority {
39   // AUTO priority is needed when a single priority is the most important
40   // factor. For example,
41   // priority1 = MIN_LATENCY would result in the configuration that achieves
42   // maximum performance.
43   TFLITE_GPU_INFERENCE_PRIORITY_AUTO = 0,
44   TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION = 1,
45   TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY = 2,
46   TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE = 3,
47 };
48 
49 // Used to toggle experimental flags used in the delegate. Note that this is a
50 // bitmask, so the values should be 1, 2, 4, 8, ...etc.
51 enum TfLiteGpuExperimentalFlags {
52   TFLITE_GPU_EXPERIMENTAL_FLAGS_NONE = 0,
53   // Enables inference on quantized models with the delegate.
54   // NOTE: This is enabled in TfLiteGpuDelegateOptionsV2Default.
55   TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT = 1 << 0,
56   // Enforces execution with the provided backend.
57   TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY = 1 << 1,
58   TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY = 1 << 2
59 };
60 
61 // IMPORTANT: Always use TfLiteGpuDelegateOptionsV2Default() method to create
62 // new instance of TfLiteGpuDelegateOptionsV2, otherwise every new added option
63 // may break inference.
64 typedef struct {
65   // When set to zero, computations are carried out in maximal possible
66   // precision. Otherwise, the GPU may quantify tensors, downcast values,
67   // process in FP16 to increase performance. For most models precision loss is
68   // warranted.
69   // [OBSOLETE]: to be removed
70   int32_t is_precision_loss_allowed;
71 
72   // Preference is defined in TfLiteGpuInferenceUsage.
73   int32_t inference_preference;
74 
75   // Ordered priorities provide better control over desired semantics,
76   // where priority(n) is more important than priority(n+1), therefore,
77   // each time inference engine needs to make a decision, it uses
78   // ordered priorities to do so.
79   // For example:
80   //   MAX_PRECISION at priority1 would not allow to decrease precision,
81   //   but moving it to priority2 or priority3 would result in F16 calculation.
82   //
83   // Priority is defined in TfLiteGpuInferencePriority.
84   // AUTO priority can only be used when higher priorities are fully specified.
85   // For example:
86   //   VALID:   priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO
87   //   VALID:   priority1 = MIN_LATENCY, priority2 = MAX_PRECISION,
88   //            priority3 = AUTO
89   //   INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO
90   //   INVALID: priority1 = MIN_LATENCY, priority2 = AUTO,
91   //            priority3 = MAX_PRECISION
92   // Invalid priorities will result in error.
93   int32_t inference_priority1;
94   int32_t inference_priority2;
95   int32_t inference_priority3;
96 
97   // Bitmask flags. See the comments in TfLiteGpuExperimentalFlags.
98   int64_t experimental_flags;
99 
100   // A graph could have multiple partitions that can be delegated to the GPU.
101   // This limits the maximum number of partitions to be delegated. By default,
102   // it's set to 1 in TfLiteGpuDelegateOptionsV2Default().
103   int32_t max_delegated_partitions;
104 } TfLiteGpuDelegateOptionsV2;
105 
106 // Populates TfLiteGpuDelegateOptionsV2 as follows:
107 //   is_precision_loss_allowed = false
108 //   inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER
109 //   priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION
110 //   priority2 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO
111 //   priority3 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO
112 //   experimental_flags = TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT
113 //   max_delegated_partitions = 1
114 TFL_CAPI_EXPORT TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default();
115 
116 // Creates a new delegate instance that need to be destroyed with
117 // TfLiteGpuDelegateV2Delete when delegate is no longer used by TFLite.
118 //
119 // This delegate encapsulates multiple GPU-acceleration APIs under the hood to
120 // make use of the fastest available on a device.
121 //
122 // When `options` is set to `nullptr`, then default options are used.
123 TFL_CAPI_EXPORT TfLiteDelegate* TfLiteGpuDelegateV2Create(
124     const TfLiteGpuDelegateOptionsV2* options);
125 
126 // Destroys a delegate created with `TfLiteGpuDelegateV2Create` call.
127 TFL_CAPI_EXPORT void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate);
128 
129 #ifdef __cplusplus
130 }
131 #endif  // __cplusplus
132 
133 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_
134