1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
17
18 #include <string>
19 #include <vector>
20
21 #include "tensorflow/lite/delegates/gpu/cl/util.h"
22 #include "tensorflow/lite/delegates/gpu/common/shape.h"
23
24 namespace tflite {
25 namespace gpu {
26 namespace cl {
27 namespace {
CreateEnvironment(Environment * result,bool shared,cl_context_properties egl_context,cl_context_properties egl_display)28 absl::Status CreateEnvironment(Environment* result, bool shared,
29 cl_context_properties egl_context,
30 cl_context_properties egl_display) {
31 CLDevice gpu;
32 RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
33
34 CLContext context;
35 if (shared) {
36 RETURN_IF_ERROR(CreateCLGLContext(gpu, egl_context, egl_display, &context));
37 } else {
38 RETURN_IF_ERROR(CreateCLContext(gpu, &context));
39 }
40 CLCommandQueue queue;
41 RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
42 ProfilingCommandQueue profiling_queue;
43 RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
44
45 *result = Environment(std::move(gpu), std::move(context), std::move(queue),
46 std::move(profiling_queue));
47
48 return result->Init();
49 }
50
IsGpuSupportsStorageType(const GpuInfo & gpu_info,TensorStorageType storage_type)51 bool IsGpuSupportsStorageType(const GpuInfo& gpu_info,
52 TensorStorageType storage_type) {
53 switch (storage_type) {
54 case TensorStorageType::TEXTURE_2D:
55 return !gpu_info.IsAMD();
56 case TensorStorageType::BUFFER:
57 return true;
58 case TensorStorageType::TEXTURE_ARRAY:
59 return !gpu_info.IsAMD() && gpu_info.SupportsTextureArray();
60 case TensorStorageType::IMAGE_BUFFER:
61 return (gpu_info.IsAdreno() || gpu_info.IsAMD() || gpu_info.IsNvidia()) &&
62 gpu_info.SupportsImageBuffer();
63 case TensorStorageType::TEXTURE_3D:
64 return !gpu_info.IsAMD() && gpu_info.SupportsImage3D();
65 case TensorStorageType::SINGLE_TEXTURE_2D:
66 return false;
67 case TensorStorageType::UNKNOWN:
68 return false;
69 }
70 return false;
71 }
72
IsGpuSupportsPrecision(const GpuInfo & gpu_info,CalculationsPrecision precision)73 bool IsGpuSupportsPrecision(const GpuInfo& gpu_info,
74 CalculationsPrecision precision) {
75 switch (precision) {
76 case CalculationsPrecision::F32_F16:
77 case CalculationsPrecision::F16:
78 return gpu_info.SupportsFP16();
79 case CalculationsPrecision::F32:
80 return true;
81 }
82 }
83
84 } // namespace
85
Environment(CLDevice && device,CLContext && context,CLCommandQueue && queue,ProfilingCommandQueue && profiling_queue)86 Environment::Environment(CLDevice&& device, CLContext&& context,
87 CLCommandQueue&& queue,
88 ProfilingCommandQueue&& profiling_queue)
89 : device_(std::move(device)),
90 context_(std::move(context)),
91 queue_(std::move(queue)),
92 profiling_queue_(std::move(profiling_queue)) {}
93
Environment(Environment && environment)94 Environment::Environment(Environment&& environment)
95 : device_(std::move(environment.device_)),
96 context_(std::move(environment.context_)),
97 queue_(std::move(environment.queue_)),
98 profiling_queue_(std::move(environment.profiling_queue_)),
99 program_cache_(std::move(environment.program_cache_)) {}
100
operator =(Environment && environment)101 Environment& Environment::operator=(Environment&& environment) {
102 if (this != &environment) {
103 device_ = std::move(environment.device_);
104 context_ = std::move(environment.context_);
105 queue_ = std::move(environment.queue_);
106 profiling_queue_ = std::move(environment.profiling_queue_);
107 program_cache_ = std::move(environment.program_cache_);
108 }
109 return *this;
110 }
111
Init()112 absl::Status Environment::Init() {
113 if (device().GetInfo().IsAdreno() &&
114 device().GetInfo().SupportsTextureArray()) {
115 const auto& adreno_info = device().info_.adreno_info;
116 // Some Adreno < 600 have bug with one layer texture array. b/131099086
117 // If we have one layer texture array and will write smt from kernel to this
118 // texture, we will get zeroes instead of actual values.
119 // The same kernel will work, if we use texture array with more than one
120 // layer.
121 if (adreno_info.IsAdreno3xx() || adreno_info.IsAdreno4xx() ||
122 adreno_info.IsAdreno5xx()) {
123 GetDevicePtr()->DisableOneLayerTextureArray();
124 }
125 }
126 return absl::OkStatus();
127 }
128
SetHighPerformance() const129 void Environment::SetHighPerformance() const {
130 // TODO(sorokin) use cl_perf_hint if available
131 }
132
SetDefaultPerformance() const133 void Environment::SetDefaultPerformance() const {
134 // TODO(sorokin) use cl_perf_hint if available
135 }
136
SetLowPerformance() const137 void Environment::SetLowPerformance() const {
138 // TODO(sorokin) use cl_perf_hint if available
139 }
140
GetSupportedPrecisions() const141 std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const {
142 std::vector<CalculationsPrecision> precisions;
143 for (CalculationsPrecision precision :
144 {CalculationsPrecision::F32, CalculationsPrecision::F32_F16,
145 CalculationsPrecision::F16}) {
146 if (IsSupported(precision)) {
147 precisions.push_back(precision);
148 }
149 }
150 return precisions;
151 }
152
IsSupported(CalculationsPrecision precision) const153 bool Environment::IsSupported(CalculationsPrecision precision) const {
154 return IsGpuSupportsPrecision(device_.GetInfo(), precision);
155 }
156
GetSupportedStorages() const157 std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
158 std::vector<TensorStorageType> storage_types;
159 for (auto storage_type :
160 {TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER,
161 TensorStorageType::TEXTURE_ARRAY, TensorStorageType::IMAGE_BUFFER,
162 TensorStorageType::TEXTURE_3D}) {
163 if (IsSupported(storage_type)) {
164 storage_types.push_back(storage_type);
165 }
166 }
167 return storage_types;
168 }
169
170 std::vector<TensorStorageType>
GetSupportedStoragesWithHWZeroClampSupport() const171 Environment::GetSupportedStoragesWithHWZeroClampSupport() const {
172 std::vector<TensorStorageType> storage_types;
173 for (auto storage_type :
174 {TensorStorageType::TEXTURE_2D, TensorStorageType::TEXTURE_ARRAY,
175 TensorStorageType::TEXTURE_3D}) {
176 if (IsSupported(storage_type)) {
177 storage_types.push_back(storage_type);
178 }
179 }
180 return storage_types;
181 }
182
IsSupported(TensorStorageType storage_type) const183 bool Environment::IsSupported(TensorStorageType storage_type) const {
184 return IsGpuSupportsStorageType(device_.GetInfo(), storage_type);
185 }
186
GetFastestStorageType(const GpuInfo & gpu_info)187 TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) {
188 if (gpu_info.IsAdreno()) {
189 if (gpu_info.adreno_info.IsAdreno6xxOrHigher()) {
190 return TensorStorageType::TEXTURE_ARRAY;
191 } else {
192 return TensorStorageType::TEXTURE_2D;
193 }
194 } else if (gpu_info.IsPowerVR()) {
195 return TensorStorageType::TEXTURE_2D;
196 } else if (gpu_info.IsMali()) {
197 const MaliInfo mali_info = gpu_info.mali_info;
198 if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() ||
199 mali_info.IsValhall()) {
200 return TensorStorageType::TEXTURE_2D;
201 } else {
202 return TensorStorageType::BUFFER;
203 }
204 } else if (gpu_info.IsNvidia()) {
205 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
206 : TensorStorageType::BUFFER;
207 } else if (gpu_info.IsAMD()) {
208 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
209 : TensorStorageType::BUFFER;
210 } else if (gpu_info.IsIntel()) {
211 return TensorStorageType::BUFFER;
212 }
213 return TensorStorageType::BUFFER;
214 }
215
GetStorageTypeWithMinimalMemoryConsumption(const GpuInfo & gpu_info)216 TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(
217 const GpuInfo& gpu_info) {
218 if (gpu_info.IsAdreno()) {
219 if (gpu_info.adreno_info.IsAdreno3xx() ||
220 gpu_info.adreno_info.IsAdreno4xx()) {
221 return TensorStorageType::BUFFER;
222 } else {
223 return TensorStorageType::IMAGE_BUFFER;
224 }
225 } else if (gpu_info.IsPowerVR()) {
226 return TensorStorageType::BUFFER;
227 } else if (gpu_info.IsMali()) {
228 return TensorStorageType::BUFFER;
229 } else if (gpu_info.IsNvidia()) {
230 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
231 : TensorStorageType::BUFFER;
232 } else if (gpu_info.IsAMD()) {
233 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
234 : TensorStorageType::BUFFER;
235 } else if (gpu_info.IsIntel()) {
236 return TensorStorageType::BUFFER;
237 }
238 return TensorStorageType::BUFFER;
239 }
240
CreateEnvironment(Environment * result)241 absl::Status CreateEnvironment(Environment* result) {
242 CLDevice gpu;
243 RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
244
245 CLContext context;
246 RETURN_IF_ERROR(CreateCLContext(gpu, &context));
247 CLCommandQueue queue;
248 RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
249 ProfilingCommandQueue profiling_queue;
250 RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
251
252 *result = Environment(std::move(gpu), std::move(context), std::move(queue),
253 std::move(profiling_queue));
254 return result->Init();
255 }
256
257 } // namespace cl
258 } // namespace gpu
259 } // namespace tflite
260