1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
18 
19 #include <cstdint>
20 #include <string>
21 #include <vector>
22 
23 #include "absl/types/span.h"
24 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
25 #include "tensorflow/lite/delegates/gpu/common/shape.h"
26 #include "tensorflow/lite/delegates/gpu/common/status.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
28 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
29 #include "tensorflow/lite/delegates/gpu/common/types.h"
30 #include "tensorflow/lite/delegates/gpu/common/util.h"
31 
32 namespace tflite {
33 namespace gpu {
34 
35 template <DataType S, typename T>
RearrangeWeightsToOHWIOGroupI4O4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)36 void RearrangeWeightsToOHWIOGroupI4O4(
37     const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
38     absl::Span<T> dst) {
39   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
40   const int src_slices = DivideRoundUp(weights.shape.i, 4);
41   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
42 
43   int counter = 0;
44   for (int d = 0; d < dst_groups; ++d) {
45     for (int y = 0; y < weights.shape.h; ++y) {
46       for (int x = 0; x < weights.shape.w; ++x) {
47         for (int s = 0; s < src_slices; ++s) {
48           for (int d_group = 0; d_group < out_group_size; ++d_group) {
49             for (int j = 0; j < 4; ++j) {
50               T filter;
51               for (int i = 0; i < 4; ++i) {
52                 const int s_ch = s * 4 + j;
53                 const int d_ch = (d * out_group_size + d_group) * 4 + i;
54                 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
55                   const int f_index =
56                       weights.shape.LinearIndex({d_ch, y, x, s_ch});
57                   filter[i] = weights.data[f_index];
58                 } else {
59                   filter[i] = 0.0f;
60                 }
61               }
62               dst[counter++] = filter;
63             }
64           }
65         }
66       }
67     }
68   }
69 }
70 
71 template <DataType S, typename T>
RearrangeWeightsToOHWIOGroupO4I4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)72 void RearrangeWeightsToOHWIOGroupO4I4(
73     const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
74     absl::Span<T> dst) {
75   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
76   const int src_slices = DivideRoundUp(weights.shape.i, 4);
77   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
78 
79   int counter = 0;
80   for (int d = 0; d < dst_groups; ++d) {
81     for (int y = 0; y < weights.shape.h; ++y) {
82       for (int x = 0; x < weights.shape.w; ++x) {
83         for (int s = 0; s < src_slices; ++s) {
84           for (int d_group = 0; d_group < out_group_size; ++d_group) {
85             for (int j = 0; j < 4; ++j) {
86               T filter;
87               for (int i = 0; i < 4; ++i) {
88                 const int s_ch = s * 4 + i;
89                 const int d_ch = (d * out_group_size + d_group) * 4 + j;
90                 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
91                   const int f_index =
92                       weights.shape.LinearIndex({d_ch, y, x, s_ch});
93                   filter[i] = weights.data[f_index];
94                 } else {
95                   filter[i] = 0.0f;
96                 }
97               }
98               dst[counter++] = filter;
99             }
100           }
101         }
102       }
103     }
104   }
105 }
106 
107 template <DataType S, typename T>
RearrangeWeightsToODHWIOGroupI4O4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)108 void RearrangeWeightsToODHWIOGroupI4O4(
109     const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
110     absl::Span<T> dst) {
111   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
112   const int src_slices = DivideRoundUp(weights.shape.i, 4);
113   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
114 
115   int counter = 0;
116   for (int d = 0; d < dst_groups; ++d) {
117     for (int z = 0; z < weights.shape.d; ++z) {
118       for (int y = 0; y < weights.shape.h; ++y) {
119         for (int x = 0; x < weights.shape.w; ++x) {
120           for (int s = 0; s < src_slices; ++s) {
121             for (int d_group = 0; d_group < out_group_size; ++d_group) {
122               for (int j = 0; j < 4; ++j) {
123                 T filter;
124                 for (int i = 0; i < 4; ++i) {
125                   const int s_ch = s * 4 + j;
126                   const int d_ch = (d * out_group_size + d_group) * 4 + i;
127                   if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
128                     const int f_index =
129                         weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
130                     filter[i] = weights.data[f_index];
131                   } else {
132                     filter[i] = 0.0f;
133                   }
134                 }
135                 dst[counter++] = filter;
136               }
137             }
138           }
139         }
140       }
141     }
142   }
143 }
144 
145 template <DataType S, typename T>
RearrangeWeightsToI4HWIOOGroupO4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)146 void RearrangeWeightsToI4HWIOOGroupO4(
147     const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
148     absl::Span<T> dst) {
149   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
150   const int src_slices = DivideRoundUp(weights.shape.i, 4);
151   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
152 
153   int counter = 0;
154   for (int j = 0; j < 4; ++j) {
155     for (int y = 0; y < weights.shape.h; ++y) {
156       for (int x = 0; x < weights.shape.w; ++x) {
157         for (int s = 0; s < src_slices; ++s) {
158           for (int d = 0; d < dst_groups; ++d) {
159             for (int d_group = 0; d_group < out_group_size; ++d_group) {
160               T filter;
161               for (int i = 0; i < 4; ++i) {
162                 const int s_ch = s * 4 + j;
163                 const int d_ch = (d * out_group_size + d_group) * 4 + i;
164                 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
165                   const int f_index =
166                       weights.shape.LinearIndex({d_ch, y, x, s_ch});
167                   filter[i] = weights.data[f_index];
168                 } else {
169                   filter[i] = 0.0f;
170                 }
171               }
172               dst[counter++] = filter;
173             }
174           }
175         }
176       }
177     }
178   }
179 }
180 
181 template <DataType S, typename T>
RearrangeWeightsToO4HWIOOGroupI4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)182 void RearrangeWeightsToO4HWIOOGroupI4(
183     const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
184     absl::Span<T> dst) {
185   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
186   const int src_slices = DivideRoundUp(weights.shape.i, 4);
187   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
188 
189   int counter = 0;
190   for (int j = 0; j < 4; ++j) {
191     for (int y = 0; y < weights.shape.h; ++y) {
192       for (int x = 0; x < weights.shape.w; ++x) {
193         for (int s = 0; s < src_slices; ++s) {
194           for (int d = 0; d < dst_groups; ++d) {
195             for (int d_group = 0; d_group < out_group_size; ++d_group) {
196               T filter;
197               for (int i = 0; i < 4; ++i) {
198                 const int s_ch = s * 4 + i;
199                 const int d_ch = (d * out_group_size + d_group) * 4 + j;
200                 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
201                   const int f_index =
202                       weights.shape.LinearIndex({d_ch, y, x, s_ch});
203                   filter[i] = weights.data[f_index];
204                 } else {
205                   filter[i] = 0.0f;
206                 }
207               }
208               dst[counter++] = filter;
209             }
210           }
211         }
212       }
213     }
214   }
215 }
216 
217 template <DataType S, typename T>
RearrangeWeightsToI4DHWIOOGroupO4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)218 void RearrangeWeightsToI4DHWIOOGroupO4(
219     const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
220     absl::Span<T> dst) {
221   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
222   const int src_slices = DivideRoundUp(weights.shape.i, 4);
223   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
224 
225   int counter = 0;
226   for (int j = 0; j < 4; ++j) {
227     for (int z = 0; z < weights.shape.d; ++z) {
228       for (int y = 0; y < weights.shape.h; ++y) {
229         for (int x = 0; x < weights.shape.w; ++x) {
230           for (int s = 0; s < src_slices; ++s) {
231             for (int d = 0; d < dst_groups; ++d) {
232               for (int d_group = 0; d_group < out_group_size; ++d_group) {
233                 T filter;
234                 for (int i = 0; i < 4; ++i) {
235                   const int s_ch = s * 4 + j;
236                   const int d_ch = (d * out_group_size + d_group) * 4 + i;
237                   if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
238                     const int f_index =
239                         weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
240                     filter[i] = weights.data[f_index];
241                   } else {
242                     filter[i] = 0.0f;
243                   }
244                 }
245                 dst[counter++] = filter;
246               }
247             }
248           }
249         }
250       }
251     }
252   }
253 }
254 
255 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialI4O4(const tflite::gpu::Tensor<OHWI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)256 void RearrangeWeightsToOICustomSpatialI4O4(
257     const tflite::gpu::Tensor<OHWI, S>& weights,
258     const std::vector<int>& spatial_remap, absl::Span<T> dst) {
259   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
260   const int src_slices = DivideRoundUp(weights.shape.i, 4);
261 
262   int counter = 0;
263   for (int d = 0; d < dst_slices; ++d) {
264     for (int s = 0; s < src_slices; ++s) {
265       for (int y = 0; y < weights.shape.h; ++y) {
266         for (int x = 0; x < weights.shape.w; ++x) {
267           const int kernel_index = spatial_remap[y * weights.shape.w + x];
268           const int kernel_index_x = kernel_index % weights.shape.w;
269           const int kernel_index_y = kernel_index / weights.shape.w;
270           for (int i = 0; i < 4; ++i) {
271             T filter;
272             for (int j = 0; j < 4; ++j) {
273               const int s_ch = s * 4 + i;
274               const int d_ch = d * 4 + j;
275               if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
276                 const int f_index = weights.shape.LinearIndex(
277                     {d_ch, kernel_index_y, kernel_index_x, s_ch});
278                 filter[j] = weights.data[f_index];
279               } else {
280                 filter[j] = 0.0f;
281               }
282             }
283             dst[counter++] = filter;
284           }
285         }
286       }
287     }
288   }
289 }
290 
291 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialO4I4(const tflite::gpu::Tensor<OHWI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)292 void RearrangeWeightsToOICustomSpatialO4I4(
293     const tflite::gpu::Tensor<OHWI, S>& weights,
294     const std::vector<int>& spatial_remap, absl::Span<T> dst) {
295   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
296   const int src_slices = DivideRoundUp(weights.shape.i, 4);
297 
298   int counter = 0;
299   for (int d = 0; d < dst_slices; ++d) {
300     for (int s = 0; s < src_slices; ++s) {
301       for (int y = 0; y < weights.shape.h; ++y) {
302         for (int x = 0; x < weights.shape.w; ++x) {
303           const int kernel_index = spatial_remap[y * weights.shape.w + x];
304           const int kernel_index_x = kernel_index % weights.shape.w;
305           const int kernel_index_y = kernel_index / weights.shape.w;
306           for (int i = 0; i < 4; ++i) {
307             T filter;
308             for (int j = 0; j < 4; ++j) {
309               const int s_ch = s * 4 + j;
310               const int d_ch = d * 4 + i;
311               if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
312                 const int f_index = weights.shape.LinearIndex(
313                     {d_ch, kernel_index_y, kernel_index_x, s_ch});
314                 filter[j] = weights.data[f_index];
315               } else {
316                 filter[j] = 0.0f;
317               }
318             }
319             dst[counter++] = filter;
320           }
321         }
322       }
323     }
324   }
325 }
326 
327 uint GetTotalElementsCountForLayout(const WeightsDescription& weight_desc,
328                                     const OHWI& shape);
329 
330 void RearrangeWeights(
331     const tflite::gpu::Tensor<OHWI, DataType::FLOAT32>& weights,
332     const WeightsDescription& dst_weight_desc, DataType dst_type,
333     absl::Span<uint8_t> dst);
334 
335 }  // namespace gpu
336 }  // namespace tflite
337 
338 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
339