1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/convert.h"
17 
18 #include <stdint.h>
19 #include <string.h>
20 
21 #include <string>
22 #include <vector>
23 
24 #include <fp16.h>
25 #include "absl/strings/str_cat.h"
26 #include "absl/types/span.h"
27 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
28 #include "tensorflow/lite/delegates/gpu/common/shape.h"
29 #include "tensorflow/lite/delegates/gpu/common/status.h"
30 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
31 #include "tensorflow/lite/delegates/gpu/common/types.h"
32 #include "tensorflow/lite/delegates/gpu/common/util.h"
33 
34 namespace tflite {
35 namespace gpu {
36 namespace {
37 
38 constexpr int kPhwc4ChannelsInPlane = 4;
39 constexpr int kPhwo4i4ChannelsInPlane = 4;
40 constexpr int kPiohw4ChannelsInPlane = 4;
41 
42 // Layout is Po,H,W,OI4x4.
ConvertToPHWO4I4(absl::Span<const float> in,const OHWI & shape,absl::Span<float> out,bool reverse_space)43 absl::Status ConvertToPHWO4I4(absl::Span<const float> in, const OHWI& shape,
44                               absl::Span<float> out, bool reverse_space) {
45   if (in.size() != shape.DimensionsProduct()) {
46     return absl::InvalidArgumentError(absl::StrCat(
47         "ConvertToPHWO4I4: Input data size does not match expected size: ",
48         in.size(), " != ", shape.DimensionsProduct()));
49   }
50   if (out.size() != GetElementsSizeForPHWO4I4(shape)) {
51     return absl::InvalidArgumentError(absl::StrCat(
52         "ConvertToPHWO4I4: Output data size does not match expected size: ",
53         out.size(), " != ", GetElementsSizeForPHWO4I4(shape)));
54   }
55 
56   float* output = out.data();
57   for (int p = 0; p < DivideRoundUp(shape.o, kPhwo4i4ChannelsInPlane); ++p) {
58     for (int h = 0; h < shape.h; ++h) {
59       for (int w = 0; w < shape.w; ++w) {
60         for (int c = 0; c < DivideRoundUp(shape.i, kPhwo4i4ChannelsInPlane);
61              ++c) {
62           for (int co = 0; co < kPhwo4i4ChannelsInPlane; ++co) {
63             for (int ci = 0; ci < kPhwo4i4ChannelsInPlane; ++ci) {
64               float value = 0;
65               if (c * kPhwo4i4ChannelsInPlane + ci < shape.i &&
66                   p * kPhwo4i4ChannelsInPlane + co < shape.o) {
67                 // tensor is in OHWI
68                 int tensor_o = p * kPhwo4i4ChannelsInPlane + co;
69                 int tensor_i = c * kPhwo4i4ChannelsInPlane + ci;
70                 const int in_h = reverse_space ? shape.h - 1 - h : h;
71                 const int in_w = reverse_space ? shape.w - 1 - w : w;
72                 value = in[shape.LinearIndex({tensor_o, in_h, in_w, tensor_i})];
73               }
74               (*output++) = value;
75             }
76           }
77         }
78       }
79     }
80   }
81   return absl::OkStatus();
82 }
83 
84 }  // namespace
85 
GetElementsSizeForPHWO4I4(const OHWI & shape)86 uint32_t GetElementsSizeForPHWO4I4(const OHWI& shape) {
87   return AlignByN(shape.i, kPhwo4i4ChannelsInPlane) *
88          AlignByN(shape.o, kPhwo4i4ChannelsInPlane) * shape.h * shape.w;
89 }
90 
GetElementsSizeForPHWO4I4(const IHWO & shape)91 uint32_t GetElementsSizeForPHWO4I4(const IHWO& shape) {
92   return AlignByN(shape.i, kPhwo4i4ChannelsInPlane) *
93          AlignByN(shape.o, kPhwo4i4ChannelsInPlane) * shape.h * shape.w;
94 }
95 
ConvertToPHWO4I4(const Tensor<OHWI,DataType::FLOAT32> & tensor)96 std::vector<float> ConvertToPHWO4I4(
97     const Tensor<OHWI, DataType::FLOAT32>& tensor) {
98   std::vector<float> transposed(GetElementsSizeForPHWO4I4(tensor.shape));
99   ConvertToPHWO4I4(tensor.data, tensor.shape,
100                    absl::MakeSpan(transposed.data(), transposed.size()),
101                    /*reverse_space=*/false)
102       .IgnoreError();
103   return transposed;
104 }
105 
ConvertToPHWO4I4Transposed(const Tensor<OHWI,DataType::FLOAT32> & tensor)106 std::vector<float> ConvertToPHWO4I4Transposed(
107     const Tensor<OHWI, DataType::FLOAT32>& tensor) {
108   std::vector<float> transposed(GetElementsSizeForPHWO4I4(tensor.shape));
109   ConvertToPHWO4I4(tensor.data, tensor.shape,
110                    absl::MakeSpan(transposed.data(), transposed.size()),
111                    /*reverse_space=*/true)
112       .IgnoreError();
113   return transposed;
114 }
115 
Get3DSizeForPHWO4I4(const OHWI & shape)116 uint3 Get3DSizeForPHWO4I4(const OHWI& shape) {
117   return uint3(AlignByN(shape.i, 4), shape.h * shape.w,
118                DivideRoundUp(shape.o, 4));
119 }
120 
121 // Layout is Po,H,W,OI4x4.
ConvertToPHWO4I4(absl::Span<const float> in,const IHWO & shape,absl::Span<float> out)122 absl::Status ConvertToPHWO4I4(absl::Span<const float> in, const IHWO& shape,
123                               absl::Span<float> out) {
124   if (in.size() != shape.DimensionsProduct()) {
125     return absl::InvalidArgumentError(absl::StrCat(
126         "ConvertToPHWO4I4: Input data size does not match expected size: ",
127         in.size(), " != ", shape.DimensionsProduct()));
128   }
129   if (out.size() != GetElementsSizeForPHWO4I4(shape)) {
130     return absl::InvalidArgumentError(absl::StrCat(
131         "ConvertToPHWO4I4: Output data size does not match expected size: ",
132         out.size(), " != ", GetElementsSizeForPHWO4I4(shape)));
133   }
134 
135   const int dst_depth = DivideRoundUp(shape.o, 4);
136   const int src_depth = DivideRoundUp(shape.i, 4);
137 
138   float* output = out.data();
139   for (int f = 0; f < dst_depth; ++f) {
140     for (int y = 0; y < shape.h; ++y) {
141       for (int x = 0; x < shape.w; ++x) {
142         for (int ch = 0; ch < src_depth; ++ch) {
143           for (int co = 0; co < 4; ++co) {
144             for (int ci = 0; ci < 4; ++ci) {
145               const int src_channel = ch * 4 + ci;
146               const int dst_channel = f * 4 + co;
147               float value = 0;
148               if (src_channel < shape.i && dst_channel < shape.o) {
149                 // tensor is in IHWO
150                 value = in[shape.LinearIndex({src_channel, y, x, dst_channel})];
151               }
152               (*output++) = value;
153             }
154           }
155         }
156       }
157     }
158   }
159   return absl::OkStatus();
160 }
161 
ConvertToPHWO4I4(const Tensor<IHWO,DataType::FLOAT32> & tensor)162 std::vector<float> ConvertToPHWO4I4(
163     const Tensor<IHWO, DataType::FLOAT32>& tensor) {
164   std::vector<float> transposed(GetElementsSizeForPHWO4I4(tensor.shape));
165   ConvertToPHWO4I4(tensor.data, tensor.shape,
166                    absl::MakeSpan(transposed.data(), transposed.size()))
167       .IgnoreError();
168   return transposed;
169 }
170 
GetElementsSizeForPIOHW4(const OHWI & shape)171 uint32_t GetElementsSizeForPIOHW4(const OHWI& shape) {
172   return AlignByN(shape.o * shape.i, kPiohw4ChannelsInPlane) * shape.h *
173          shape.w;
174 }
175 
ConvertToPIOHW4(absl::Span<const float> in,const OHWI & shape,absl::Span<float> out)176 absl::Status ConvertToPIOHW4(absl::Span<const float> in, const OHWI& shape,
177                              absl::Span<float> out) {
178   if (in.size() != shape.DimensionsProduct()) {
179     return absl::InvalidArgumentError(absl::StrCat(
180         "ConvertToPIOHW4: Input data size does not match expected size: ",
181         in.size(), " != ", shape.DimensionsProduct()));
182   }
183   if (out.size() != GetElementsSizeForPIOHW4(shape)) {
184     return absl::InvalidArgumentError(absl::StrCat(
185         "ConvertToPIOHW4: Output data size does not match expected size: ",
186         out.size(), " != ", GetElementsSizeForPIOHW4(shape)));
187   }
188 
189   int32_t output_channels = shape.o * shape.i;
190   int32_t num_planes = DivideRoundUp(output_channels, kPiohw4ChannelsInPlane);
191   float* output = out.data();
192   for (int p = 0; p < num_planes; ++p) {
193     for (int h = 0; h < shape.h; ++h) {
194       for (int w = 0; w < shape.w; ++w) {
195         for (int c = 0; c < kPiohw4ChannelsInPlane; ++c) {
196           int output_c = p * kPiohw4ChannelsInPlane + c;
197           (*output++) = output_c >= output_channels
198                             ? 0
199                             : in[shape.LinearIndex({output_c % shape.o, h, w,
200                                                     output_c / shape.o})];
201         }
202       }
203     }
204   }
205   return absl::OkStatus();
206 }
207 
ConvertToPIOHW4(const Tensor<OHWI,DataType::FLOAT32> & tensor)208 std::vector<float> ConvertToPIOHW4(
209     const Tensor<OHWI, DataType::FLOAT32>& tensor) {
210   std::vector<float> transposed(GetElementsSizeForPIOHW4(tensor.shape));
211   ConvertToPIOHW4(tensor.data, tensor.shape,
212                   absl::MakeSpan(transposed.data(), transposed.size()))
213       .IgnoreError();
214   return transposed;
215 }
216 
217 template <typename T>
ValidateConvertToPHWC4(absl::Span<const float> in,const BHWC & shape,absl::Span<T> out)218 absl::Status ValidateConvertToPHWC4(absl::Span<const float> in,
219                                     const BHWC& shape, absl::Span<T> out) {
220   if (in.size() != shape.DimensionsProduct()) {
221     return absl::InvalidArgumentError(absl::StrCat(
222         "ConvertToPHWC4: Input data size does not match expected size: ",
223         in.size(), " != ", shape.DimensionsProduct()));
224   }
225   if (out.size() != GetElementsSizeForPHWC4(shape)) {
226     return absl::InvalidArgumentError(absl::StrCat(
227         "ConvertToPHWC4: Output data size does not match expected size: ",
228         out.size(), " != ", GetElementsSizeForPHWC4(shape)));
229   }
230   return absl::OkStatus();
231 }
232 
233 // Layout is Pc,H,W,C4 where P - is a plane based on channels.
ConvertToPHWC4(absl::Span<const float> in,const BHWC & shape,absl::Span<float> out)234 absl::Status ConvertToPHWC4(absl::Span<const float> in, const BHWC& shape,
235                             absl::Span<float> out) {
236   RETURN_IF_ERROR(ValidateConvertToPHWC4(in, shape, out));
237   if (shape.c == 4) {
238     std::memcpy(out.data(), in.data(),
239                 shape.DimensionsProduct() * sizeof(float));
240     return absl::OkStatus();
241   }
242   // Layout is Pc,H,W,C4 where P - is a plane based on channels.
243   int num_planes = DivideRoundUp(shape.c, kPhwc4ChannelsInPlane);
244   const int num_pixels = shape.h * shape.w;
245   // A layer is a set of kPhwc4ChannelsInPlane channels images.
246   const int num_full_planes = shape.c / kPhwc4ChannelsInPlane;
247   for (int b = 0; b < shape.b; b++) {
248     float* dest =
249         out.data() + b * num_pixels * num_planes * kPhwc4ChannelsInPlane;
250     for (int p = 0; p < num_full_planes; p++) {
251       const float* src =
252           in.data() + shape.LinearIndex({b, 0, 0, p * kPhwc4ChannelsInPlane});
253       for (int i = 0; i < num_pixels; i++) {
254         std::memcpy(dest, src, kPhwc4ChannelsInPlane * sizeof(float));
255         src += shape.c;
256         dest += kPhwc4ChannelsInPlane;
257       }
258     }
259   }
260 
261   // Padding last kPhwc4ChannelsInPlane-channel layer to multiple of
262   // kPhwc4ChannelsInPlane.
263   const int padded_size = num_pixels * num_planes * kPhwc4ChannelsInPlane;
264   const int remaining_channels =
265       shape.c - num_full_planes * kPhwc4ChannelsInPlane;
266   if (remaining_channels == 0) {
267     return absl::OkStatus();
268   }
269   for (int b = 0; b < shape.b; b++) {
270     const float* src =
271         in.data() +
272         shape.LinearIndex({b, 0, 0, num_full_planes * kPhwc4ChannelsInPlane});
273     float* dest = out.data() + b * padded_size +
274                   num_pixels * num_full_planes * kPhwc4ChannelsInPlane;
275     for (int p = 0; p < num_pixels; p++) {
276       std::memcpy(dest, src, remaining_channels * sizeof(float));
277       std::memset(dest + remaining_channels, 0,
278                   (4 - remaining_channels) * sizeof(float));
279       src += shape.c;
280       dest += kPhwc4ChannelsInPlane;
281     }
282   }
283   return absl::OkStatus();
284 }
285 
286 // Layout is Pc,H,W,C4 where P - is a plane based on channels.
ConvertToPHWC4Half(absl::Span<const float> in,const BHWC & shape,absl::Span<HalfBits> out)287 absl::Status ConvertToPHWC4Half(absl::Span<const float> in, const BHWC& shape,
288                                 absl::Span<HalfBits> out) {
289   RETURN_IF_ERROR(ValidateConvertToPHWC4(in, shape, out));
290 
291   // Layout is Pc,H,W,C4 where P - is a plane based on channels.
292   int num_planes = DivideRoundUp(shape.c, kPhwc4ChannelsInPlane);
293   const int num_pixels = shape.h * shape.w;
294   // A layer is a set of kPhwc4ChannelsInPlane channels images.
295   const int num_full_planes = shape.c / kPhwc4ChannelsInPlane;
296   for (int b = 0; b < shape.b; b++) {
297     HalfBits* dest =
298         out.data() + b * num_pixels * num_planes * kPhwc4ChannelsInPlane;
299     for (int p = 0; p < num_full_planes; p++) {
300       const float* src =
301           in.data() + shape.LinearIndex({b, 0, 0, p * kPhwc4ChannelsInPlane});
302       for (int i = 0; i < num_pixels; i++) {
303         dest[0] = fp16_ieee_from_fp32_value(src[0]);
304         dest[1] = fp16_ieee_from_fp32_value(src[1]);
305         dest[2] = fp16_ieee_from_fp32_value(src[2]);
306         dest[3] = fp16_ieee_from_fp32_value(src[3]);
307         src += shape.c;
308         dest += kPhwc4ChannelsInPlane;
309       }
310     }
311   }
312 
313   // Padding last kPhwc4ChannelsInPlane-channel layer to multiple of
314   // kPhwc4ChannelsInPlane.
315   const int padded_size = num_pixels * num_planes * kPhwc4ChannelsInPlane;
316   const int remaining_channels =
317       shape.c - num_full_planes * kPhwc4ChannelsInPlane;
318   if (remaining_channels == 0) {
319     return absl::OkStatus();
320   }
321 
322   for (int b = 0; b < shape.b; b++) {
323     const float* src =
324         in.data() +
325         shape.LinearIndex({b, 0, 0, num_full_planes * kPhwc4ChannelsInPlane});
326     HalfBits* dest = out.data() + b * padded_size +
327                      num_pixels * num_full_planes * kPhwc4ChannelsInPlane;
328     switch (remaining_channels) {
329       case 1:
330         for (int p = 0; p < num_pixels; p++) {
331           dest[0] = fp16_ieee_from_fp32_value(src[0]);
332           dest[1] = 0;
333           dest[2] = 0;
334           dest[3] = 0;
335           src += shape.c;
336           dest += kPhwc4ChannelsInPlane;
337         }
338         break;
339       case 2:
340         for (int p = 0; p < num_pixels; p++) {
341           dest[0] = fp16_ieee_from_fp32_value(src[0]);
342           dest[1] = fp16_ieee_from_fp32_value(src[1]);
343           dest[2] = 0;
344           dest[3] = 0;
345           src += shape.c;
346           dest += kPhwc4ChannelsInPlane;
347         }
348         break;
349       case 3:
350         for (int p = 0; p < num_pixels; p++) {
351           dest[0] = fp16_ieee_from_fp32_value(src[0]);
352           dest[1] = fp16_ieee_from_fp32_value(src[1]);
353           dest[2] = fp16_ieee_from_fp32_value(src[2]);
354           dest[3] = 0;
355           src += shape.c;
356           dest += kPhwc4ChannelsInPlane;
357         }
358         break;
359       default:
360         return absl::UnimplementedError(
361             "ConvertToPHWC4Half: Unsupported channels per planes count.");
362     }
363   }
364   return absl::OkStatus();
365 }
366 
ConvertToPHWC4(const Tensor<BHWC,DataType::FLOAT32> & tensor)367 std::vector<float> ConvertToPHWC4(
368     const Tensor<BHWC, DataType::FLOAT32>& tensor) {
369   std::vector<float> transposed(GetElementsSizeForPHWC4(tensor.shape));
370   ConvertToPHWC4(tensor.data, tensor.shape,
371                  absl::MakeSpan(transposed.data(), transposed.size()))
372       .IgnoreError();
373   // TODO(akulik): Maybe safer to return Status.
374   return transposed;
375 }
376 
ConvertToPHWC4(const Tensor<HWC,DataType::FLOAT32> & tensor)377 std::vector<float> ConvertToPHWC4(
378     const Tensor<HWC, DataType::FLOAT32>& tensor) {
379   const BHWC batched_shape =
380       BHWC(1, tensor.shape.h, tensor.shape.w, tensor.shape.c);
381   std::vector<float> transposed(GetElementsSizeForPHWC4(batched_shape));
382   ConvertToPHWC4(tensor.data, batched_shape,
383                  absl::MakeSpan(transposed.data(), transposed.size()))
384       .IgnoreError();
385   // TODO(akulik): Maybe safer to return Status.
386   return transposed;
387 }
388 
GetElementsSizeForPHWC4(const BHWC & shape)389 uint32_t GetElementsSizeForPHWC4(const BHWC& shape) {
390   return shape.b * shape.h * shape.w * AlignByN(shape.c, kPhwc4ChannelsInPlane);
391 }
392 
393 template <typename T>
ValidateConvertFromPHWC4(absl::Span<const T> in,const BHWC & shape,absl::Span<float> out)394 absl::Status ValidateConvertFromPHWC4(absl::Span<const T> in, const BHWC& shape,
395                                       absl::Span<float> out) {
396   if (in.size() != GetElementsSizeForPHWC4(shape)) {
397     return absl::InvalidArgumentError(absl::StrCat(
398         "ConvertFromPHWC4: Input data size does not match expected size: ",
399         in.size(), " != ", GetElementsSizeForPHWC4(shape)));
400   }
401   if (out.size() != shape.DimensionsProduct()) {
402     return absl::InvalidArgumentError(absl::StrCat(
403         "ConvertFromPHWC4: Output data size does not match expected size: ",
404         out.size(), " != ", shape.DimensionsProduct()));
405   }
406   return absl::OkStatus();
407 }
408 
ConvertFromPHWC4(absl::Span<const float> in,const BHWC & shape,absl::Span<float> out)409 absl::Status ConvertFromPHWC4(absl::Span<const float> in, const BHWC& shape,
410                               absl::Span<float> out) {
411   RETURN_IF_ERROR(ValidateConvertFromPHWC4(in, shape, out));
412   if (shape.c == 4) {
413     std::memcpy(out.data(), in.data(),
414                 shape.DimensionsProduct() * sizeof(float));
415     return absl::OkStatus();
416   }
417 
418   int num_planes = DivideRoundUp(shape.c, kPhwc4ChannelsInPlane);
419   const int num_pixels = shape.h * shape.w;
420   const int padded_size = num_pixels * num_planes * kPhwc4ChannelsInPlane;
421   // A layer is a set of kPhwc4ChannelsInPlane channels images.
422   const int num_full_planes = shape.c / kPhwc4ChannelsInPlane;
423   for (int b = 0; b < shape.b; b++) {
424     const float* src = in.data() + b * padded_size;
425     for (int p = 0; p < num_full_planes; p++) {
426       float* dest =
427           out.data() + shape.LinearIndex({b, 0, 0, p * kPhwc4ChannelsInPlane});
428       for (int i = 0; i < num_pixels; i++) {
429         std::memcpy(dest, src, kPhwc4ChannelsInPlane * sizeof(float));
430         src += kPhwc4ChannelsInPlane;
431         dest += shape.c;
432       }
433     }
434   }
435 
436   // Unpadding last kPhwc4ChannelsInPlane-channel plane
437   const int remaining_channels =
438       shape.c - num_full_planes * kPhwc4ChannelsInPlane;
439   if (remaining_channels == 0) {
440     return absl::OkStatus();
441   }
442   for (int b = 0; b < shape.b; b++) {
443     const float* src = in.data() + b * padded_size +
444                        num_pixels * num_full_planes * kPhwc4ChannelsInPlane;
445     float* dest =
446         out.data() +
447         shape.LinearIndex({b, 0, 0, num_full_planes * kPhwc4ChannelsInPlane});
448     for (int p = 0; p < num_pixels; p++) {
449       std::memcpy(dest, src, remaining_channels * sizeof(float));
450       src += kPhwc4ChannelsInPlane;
451       dest += shape.c;
452     }
453   }
454   return absl::OkStatus();
455 }
456 
ConvertFromPHWC4Half(absl::Span<const HalfBits> in,const BHWC & shape,absl::Span<float> out)457 absl::Status ConvertFromPHWC4Half(absl::Span<const HalfBits> in,
458                                   const BHWC& shape, absl::Span<float> out) {
459   RETURN_IF_ERROR(ValidateConvertFromPHWC4(in, shape, out));
460   int num_planes = DivideRoundUp(shape.c, kPhwc4ChannelsInPlane);
461   const int num_pixels = shape.h * shape.w;
462   const int padded_size = num_pixels * num_planes * kPhwc4ChannelsInPlane;
463   // A layer is a set of kPhwc4ChannelsInPlane channels images.
464   const int num_full_planes = shape.c / kPhwc4ChannelsInPlane;
465   for (int b = 0; b < shape.b; b++) {
466     const HalfBits* src = in.data() + b * padded_size;
467     for (int p = 0; p < num_full_planes; p++) {
468       float* dest =
469           out.data() + shape.LinearIndex({b, 0, 0, p * kPhwc4ChannelsInPlane});
470       for (int i = 0; i < num_pixels; i++) {
471         dest[0] = fp16_ieee_to_fp32_value(src[0]);
472         dest[1] = fp16_ieee_to_fp32_value(src[1]);
473         dest[2] = fp16_ieee_to_fp32_value(src[2]);
474         dest[3] = fp16_ieee_to_fp32_value(src[3]);
475         src += kPhwc4ChannelsInPlane;
476         dest += shape.c;
477       }
478     }
479   }
480 
481   // Unpadding last kPhwc4ChannelsInPlane-channel plane
482   const int remaining_channels =
483       shape.c - num_full_planes * kPhwc4ChannelsInPlane;
484   if (remaining_channels == 0) {
485     return absl::OkStatus();
486   }
487   for (int b = 0; b < shape.b; b++) {
488     const HalfBits* src = in.data() + b * padded_size +
489                           num_pixels * num_full_planes * kPhwc4ChannelsInPlane;
490     float* dest =
491         out.data() +
492         shape.LinearIndex({b, 0, 0, num_full_planes * kPhwc4ChannelsInPlane});
493     switch (remaining_channels) {
494       case 1:
495         for (int p = 0; p < num_pixels; p++) {
496           dest[0] = fp16_ieee_to_fp32_value(src[0]);
497           src += kPhwc4ChannelsInPlane;
498           dest += shape.c;
499         }
500         break;
501       case 2:
502         for (int p = 0; p < num_pixels; p++) {
503           dest[0] = fp16_ieee_to_fp32_value(src[0]);
504           dest[1] = fp16_ieee_to_fp32_value(src[1]);
505           src += kPhwc4ChannelsInPlane;
506           dest += shape.c;
507         }
508         break;
509       case 3:
510         for (int p = 0; p < num_pixels; p++) {
511           dest[0] = fp16_ieee_to_fp32_value(src[0]);
512           dest[1] = fp16_ieee_to_fp32_value(src[1]);
513           dest[2] = fp16_ieee_to_fp32_value(src[2]);
514           src += kPhwc4ChannelsInPlane;
515           dest += shape.c;
516         }
517         break;
518       default:
519         return absl::UnimplementedError(
520             "ConvertToPHWC4Half: Unsupported channels per planes count.");
521     }
522   }
523   return absl::OkStatus();
524 }
525 
526 }  // namespace gpu
527 }  // namespace tflite
528