1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
17 
18 #include <string>
19 #include <vector>
20 
21 #include "tensorflow/lite/delegates/gpu/cl/util.h"
22 #include "tensorflow/lite/delegates/gpu/common/shape.h"
23 
24 namespace tflite {
25 namespace gpu {
26 namespace cl {
27 namespace {
CreateEnvironment(Environment * result,bool shared,cl_context_properties egl_context,cl_context_properties egl_display)28 absl::Status CreateEnvironment(Environment* result, bool shared,
29                                cl_context_properties egl_context,
30                                cl_context_properties egl_display) {
31   CLDevice gpu;
32   RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
33 
34   CLContext context;
35   if (shared) {
36     RETURN_IF_ERROR(CreateCLGLContext(gpu, egl_context, egl_display, &context));
37   } else {
38     RETURN_IF_ERROR(CreateCLContext(gpu, &context));
39   }
40   CLCommandQueue queue;
41   RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
42   ProfilingCommandQueue profiling_queue;
43   RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
44 
45   *result = Environment(std::move(gpu), std::move(context), std::move(queue),
46                         std::move(profiling_queue));
47 
48   return result->Init();
49 }
50 
IsGpuSupportsStorageType(const GpuInfo & gpu_info,TensorStorageType storage_type)51 bool IsGpuSupportsStorageType(const GpuInfo& gpu_info,
52                               TensorStorageType storage_type) {
53   switch (storage_type) {
54     case TensorStorageType::TEXTURE_2D:
55       return !gpu_info.IsAMD();
56     case TensorStorageType::BUFFER:
57       return true;
58     case TensorStorageType::TEXTURE_ARRAY:
59       return !gpu_info.IsAMD() && gpu_info.SupportsTextureArray();
60     case TensorStorageType::IMAGE_BUFFER:
61       return (gpu_info.IsAdreno() || gpu_info.IsAMD() || gpu_info.IsNvidia()) &&
62              gpu_info.SupportsImageBuffer();
63     case TensorStorageType::TEXTURE_3D:
64       return !gpu_info.IsAMD() && gpu_info.SupportsImage3D();
65     case TensorStorageType::SINGLE_TEXTURE_2D:
66       return false;
67     case TensorStorageType::UNKNOWN:
68       return false;
69   }
70   return false;
71 }
72 
IsGpuSupportsPrecision(const GpuInfo & gpu_info,CalculationsPrecision precision)73 bool IsGpuSupportsPrecision(const GpuInfo& gpu_info,
74                             CalculationsPrecision precision) {
75   switch (precision) {
76     case CalculationsPrecision::F32_F16:
77     case CalculationsPrecision::F16:
78       return gpu_info.SupportsFP16();
79     case CalculationsPrecision::F32:
80       return true;
81   }
82 }
83 
84 }  // namespace
85 
Environment(CLDevice && device,CLContext && context,CLCommandQueue && queue,ProfilingCommandQueue && profiling_queue)86 Environment::Environment(CLDevice&& device, CLContext&& context,
87                          CLCommandQueue&& queue,
88                          ProfilingCommandQueue&& profiling_queue)
89     : device_(std::move(device)),
90       context_(std::move(context)),
91       queue_(std::move(queue)),
92       profiling_queue_(std::move(profiling_queue)) {}
93 
Environment(Environment && environment)94 Environment::Environment(Environment&& environment)
95     : device_(std::move(environment.device_)),
96       context_(std::move(environment.context_)),
97       queue_(std::move(environment.queue_)),
98       profiling_queue_(std::move(environment.profiling_queue_)),
99       program_cache_(std::move(environment.program_cache_)) {}
100 
operator =(Environment && environment)101 Environment& Environment::operator=(Environment&& environment) {
102   if (this != &environment) {
103     device_ = std::move(environment.device_);
104     context_ = std::move(environment.context_);
105     queue_ = std::move(environment.queue_);
106     profiling_queue_ = std::move(environment.profiling_queue_);
107     program_cache_ = std::move(environment.program_cache_);
108   }
109   return *this;
110 }
111 
Init()112 absl::Status Environment::Init() {
113   if (device().GetInfo().IsAdreno() &&
114       device().GetInfo().SupportsTextureArray()) {
115     const auto& adreno_info = device().info_.adreno_info;
116     // Some Adreno < 600 have bug with one layer texture array. b/131099086
117     // If we have one layer texture array and will write smt from kernel to this
118     // texture, we will get zeroes instead of actual values.
119     // The same kernel will work, if we use texture array with more than one
120     // layer.
121     if (adreno_info.IsAdreno3xx() || adreno_info.IsAdreno4xx() ||
122         adreno_info.IsAdreno5xx()) {
123       GetDevicePtr()->DisableOneLayerTextureArray();
124     }
125   }
126   return absl::OkStatus();
127 }
128 
SetHighPerformance() const129 void Environment::SetHighPerformance() const {
130   // TODO(sorokin) use cl_perf_hint if available
131 }
132 
SetDefaultPerformance() const133 void Environment::SetDefaultPerformance() const {
134   // TODO(sorokin) use cl_perf_hint if available
135 }
136 
SetLowPerformance() const137 void Environment::SetLowPerformance() const {
138   // TODO(sorokin) use cl_perf_hint if available
139 }
140 
GetSupportedPrecisions() const141 std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const {
142   std::vector<CalculationsPrecision> precisions;
143   for (CalculationsPrecision precision :
144        {CalculationsPrecision::F32, CalculationsPrecision::F32_F16,
145         CalculationsPrecision::F16}) {
146     if (IsSupported(precision)) {
147       precisions.push_back(precision);
148     }
149   }
150   return precisions;
151 }
152 
IsSupported(CalculationsPrecision precision) const153 bool Environment::IsSupported(CalculationsPrecision precision) const {
154   return IsGpuSupportsPrecision(device_.GetInfo(), precision);
155 }
156 
GetSupportedStorages() const157 std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
158   std::vector<TensorStorageType> storage_types;
159   for (auto storage_type :
160        {TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER,
161         TensorStorageType::TEXTURE_ARRAY, TensorStorageType::IMAGE_BUFFER,
162         TensorStorageType::TEXTURE_3D}) {
163     if (IsSupported(storage_type)) {
164       storage_types.push_back(storage_type);
165     }
166   }
167   return storage_types;
168 }
169 
170 std::vector<TensorStorageType>
GetSupportedStoragesWithHWZeroClampSupport() const171 Environment::GetSupportedStoragesWithHWZeroClampSupport() const {
172   std::vector<TensorStorageType> storage_types;
173   for (auto storage_type :
174        {TensorStorageType::TEXTURE_2D, TensorStorageType::TEXTURE_ARRAY,
175         TensorStorageType::TEXTURE_3D}) {
176     if (IsSupported(storage_type)) {
177       storage_types.push_back(storage_type);
178     }
179   }
180   return storage_types;
181 }
182 
IsSupported(TensorStorageType storage_type) const183 bool Environment::IsSupported(TensorStorageType storage_type) const {
184   return IsGpuSupportsStorageType(device_.GetInfo(), storage_type);
185 }
186 
GetFastestStorageType(const GpuInfo & gpu_info)187 TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) {
188   if (gpu_info.IsAdreno()) {
189     if (gpu_info.adreno_info.IsAdreno6xxOrHigher()) {
190       return TensorStorageType::TEXTURE_ARRAY;
191     } else {
192       return TensorStorageType::TEXTURE_2D;
193     }
194   } else if (gpu_info.IsPowerVR()) {
195     return TensorStorageType::TEXTURE_2D;
196   } else if (gpu_info.IsMali()) {
197     const MaliInfo mali_info = gpu_info.mali_info;
198     if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() ||
199         mali_info.IsValhall()) {
200       return TensorStorageType::TEXTURE_2D;
201     } else {
202       return TensorStorageType::BUFFER;
203     }
204   } else if (gpu_info.IsNvidia()) {
205     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
206                                           : TensorStorageType::BUFFER;
207   } else if (gpu_info.IsAMD()) {
208     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
209                                           : TensorStorageType::BUFFER;
210   } else if (gpu_info.IsIntel()) {
211     return TensorStorageType::BUFFER;
212   }
213   return TensorStorageType::BUFFER;
214 }
215 
GetStorageTypeWithMinimalMemoryConsumption(const GpuInfo & gpu_info)216 TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(
217     const GpuInfo& gpu_info) {
218   if (gpu_info.IsAdreno()) {
219     if (gpu_info.adreno_info.IsAdreno3xx() ||
220         gpu_info.adreno_info.IsAdreno4xx()) {
221       return TensorStorageType::BUFFER;
222     } else {
223       return TensorStorageType::IMAGE_BUFFER;
224     }
225   } else if (gpu_info.IsPowerVR()) {
226     return TensorStorageType::BUFFER;
227   } else if (gpu_info.IsMali()) {
228     return TensorStorageType::BUFFER;
229   } else if (gpu_info.IsNvidia()) {
230     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
231                                           : TensorStorageType::BUFFER;
232   } else if (gpu_info.IsAMD()) {
233     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
234                                           : TensorStorageType::BUFFER;
235   } else if (gpu_info.IsIntel()) {
236     return TensorStorageType::BUFFER;
237   }
238   return TensorStorageType::BUFFER;
239 }
240 
CreateEnvironment(Environment * result)241 absl::Status CreateEnvironment(Environment* result) {
242   CLDevice gpu;
243   RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
244 
245   CLContext context;
246   RETURN_IF_ERROR(CreateCLContext(gpu, &context));
247   CLCommandQueue queue;
248   RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
249   ProfilingCommandQueue profiling_queue;
250   RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
251 
252   *result = Environment(std::move(gpu), std::move(context), std::move(queue),
253                         std::move(profiling_queue));
254   return result->Init();
255 }
256 
257 }  // namespace cl
258 }  // namespace gpu
259 }  // namespace tflite
260