1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 package org.tensorflow.lite.gpu;
17 
18 import java.io.Closeable;
19 import org.tensorflow.lite.Delegate;
20 import org.tensorflow.lite.annotations.UsedByReflection;
21 
22 /**
23  * {@link Delegate} for GPU inference.
24  *
25  * <p>Note: When calling {@code Interpreter.modifyGraphWithDelegate()}/ {@code
26  * Interpreter.Options.addDelegate()} and {@code Interpreter.run()}, the caller must have an {@code
27  * EGLContext} in the <b>current thread</b> and {@code Interpreter.run()} must be called from the
28  * same {@code EGLContext}. If an {@code EGLContext} does not exist, the delegate will internally
29  * create one, but then the developer must ensure that {@code Interpreter.run()} is always called
30  * from the same thread in which {@code Interpreter.modifyGraphWithDelegate()} was called.
31  */
32 @UsedByReflection("TFLiteSupport/model/GpuDelegateProxy")
33 public class GpuDelegate implements Delegate, Closeable {
34 
35   private static final long INVALID_DELEGATE_HANDLE = 0;
36   private static final String TFLITE_GPU_LIB = "tensorflowlite_gpu_jni";
37 
38   private long delegateHandle;
39 
40   /** Delegate options. */
41   public static final class Options {
Options()42     public Options() {}
43 
44     /**
45      * Delegate will be used only once, therefore, bootstrap/init time should be taken into account.
46      */
47     public static final int INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER = 0;
48 
49     /**
50      * Prefer maximizing the throughput. Same delegate will be used repeatedly on multiple inputs.
51      */
52     public static final int INFERENCE_PREFERENCE_SUSTAINED_SPEED = 1;
53 
54     /**
55      * Sets whether precision loss is allowed.
56      *
57      * @param precisionLossAllowed When `true` (default), the GPU may quantify tensors, downcast
58      *     values, process in FP16. When `false`, computations are carried out in 32-bit floating
59      *     point.
60      */
setPrecisionLossAllowed(boolean precisionLossAllowed)61     public Options setPrecisionLossAllowed(boolean precisionLossAllowed) {
62       this.precisionLossAllowed = precisionLossAllowed;
63       return this;
64     }
65 
66     /**
67      * Enables running quantized models with the delegate. Defaults to false.
68      *
69      * <p>WARNING: This is an experimental API and subject to change.
70      *
71      * @param quantizedModelsAllowed When {@code true} (default), the GPU may run quantized models.
72      */
setQuantizedModelsAllowed(boolean quantizedModelsAllowed)73     public Options setQuantizedModelsAllowed(boolean quantizedModelsAllowed) {
74       this.quantizedModelsAllowed = quantizedModelsAllowed;
75       return this;
76     }
77 
78     /**
79      * Sets the inference preference for precision/compilation/runtime tradeoffs.
80      *
81      * @param preference One of `INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER` (default),
82      *     `INFERENCE_PREFERENCE_SUSTAINED_SPEED`.
83      */
setInferencePreference(int preference)84     public Options setInferencePreference(int preference) {
85       this.inferencePreference = preference;
86       return this;
87     }
88 
89     boolean precisionLossAllowed = true;
90     boolean quantizedModelsAllowed = true;
91     int inferencePreference = INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER;
92   }
93 
GpuDelegate(Options options)94   public GpuDelegate(Options options) {
95     delegateHandle =
96         createDelegate(
97             options.precisionLossAllowed,
98             options.quantizedModelsAllowed,
99             options.inferencePreference);
100   }
101 
102   @UsedByReflection("TFLiteSupport/model/GpuDelegateProxy")
GpuDelegate()103   public GpuDelegate() {
104     this(new Options());
105   }
106 
107   @Override
getNativeHandle()108   public long getNativeHandle() {
109     return delegateHandle;
110   }
111 
112   /**
113    * Frees TFLite resources in C runtime.
114    *
115    * <p>User is expected to call this method explicitly.
116    */
117   @Override
close()118   public void close() {
119     if (delegateHandle != INVALID_DELEGATE_HANDLE) {
120       deleteDelegate(delegateHandle);
121       delegateHandle = INVALID_DELEGATE_HANDLE;
122     }
123   }
124 
125   static {
126     System.loadLibrary(TFLITE_GPU_LIB);
127   }
128 
createDelegate( boolean precisionLossAllowed, boolean quantizedModelsAllowed, int preference)129   private static native long createDelegate(
130       boolean precisionLossAllowed, boolean quantizedModelsAllowed, int preference);
131 
deleteDelegate(long delegateHandle)132   private static native void deleteDelegate(long delegateHandle);
133 }
134