1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
17 
18 #include <cmath>
19 #include <vector>
20 
21 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
22 #include "tensorflow/lite/delegates/gpu/common/shape.h"
23 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
24 
25 namespace tflite {
26 namespace gpu {
27 namespace {
28 // Matrices for Winograd trasformations were computed with the method described
29 // here https://openreview.net/pdf?id=H1ZaRZVKg
GetTransposedMatrixForWinograd(int width,int height)30 std::vector<float> GetTransposedMatrixForWinograd(int width, int height) {
31   const float kDelta = std::sqrt(2.0f) / 2.0f;
32   std::vector<float> px(width);
33 
34   px[0] = 0.0f;
35   const int points_count = (width - 1) / 2;
36   for (int i = 0; i < points_count; ++i) {
37     px[i * 2 + 1] = kDelta * (i + 1.0f);
38     px[i * 2 + 2] = -kDelta * (i + 1.0f);
39   }
40   px[width - 1] = 1.0f;
41 
42   std::vector<float> py(width, 1.0f);
43   py[width - 1] = 0.0f;
44 
45   std::vector<float> result(height * width);
46   for (int y = 0; y < width; ++y) {
47     for (int x = 0; x < height; ++x) {
48       result[x * width + y] =
49           std::pow(px[y], 1.0f * x) * std::pow(py[y], (height - 1.0f) - x);
50     }
51   }
52   return result;
53 }
54 
GetInversedMatrixForWinograd(int rank)55 std::vector<float> GetInversedMatrixForWinograd(int rank) {
56   auto matrix = GetTransposedMatrixForWinograd(rank, rank);
57   std::vector<float> inverted(rank * rank, 0.0f);
58   for (int i = 0; i < rank; ++i) {
59     inverted[i * rank + i] = 1.0f;
60   }
61 
62   for (int i = 1; i < rank - 1; ++i) {
63     float inv_t = 1.0f / matrix[i * rank + i];
64     for (int x = i; x < rank; ++x) {
65       matrix[i * rank + x] *= inv_t;
66     }
67     for (int x = 0; x < rank; ++x) {
68       inverted[i * rank + x] *= inv_t;
69     }
70 
71     for (int y = 0; y < rank; ++y) {
72       if (y == i) continue;
73       float t = matrix[y * rank + i];
74       for (int x = i; x < rank; ++x) {
75         matrix[y * rank + x] -= t * matrix[i * rank + x];
76       }
77       for (int x = 0; x < rank; ++x) {
78         inverted[y * rank + x] -= t * inverted[i * rank + x];
79       }
80     }
81   }
82 
83   return inverted;
84 }
85 
Multiply(const std::vector<float> & a_mat,const std::vector<float> & b_mat,int m,int n,int k)86 std::vector<float> Multiply(const std::vector<float>& a_mat,
87                             const std::vector<float>& b_mat, int m, int n,
88                             int k) {
89   std::vector<float> result(m * k);
90   for (int y = 0; y < m; ++y) {
91     for (int x = 0; x < k; ++x) {
92       float sum = 0.0f;
93       for (int i = 0; i < n; ++i) {
94         sum += a_mat[y * n + i] * b_mat[i * k + x];
95       }
96       result[y * k + x] = sum;
97     }
98   }
99   return result;
100 }
101 }  // namespace
102 
AtMatrixForWinograd4x4To6x6()103 std::vector<float> AtMatrixForWinograd4x4To6x6() {
104   return GetTransposedMatrixForWinograd(6, 4);
105 }
106 
BtMatrixForWinograd4x4To6x6()107 std::vector<float> BtMatrixForWinograd4x4To6x6() {
108   return GetInversedMatrixForWinograd(6);
109 }
110 
RearrangeWeightsToWinograd4x4To6x6Weights(const Tensor<OHWI,DataType::FLOAT32> & src_weights,Tensor<OHWI,DataType::FLOAT32> * dst_weights)111 void RearrangeWeightsToWinograd4x4To6x6Weights(
112     const Tensor<OHWI, DataType::FLOAT32>& src_weights,
113     Tensor<OHWI, DataType::FLOAT32>* dst_weights) {
114   OHWI dst_shape;
115   dst_shape.o = src_weights.shape.o;
116   dst_shape.h = 6;
117   dst_shape.w = 6;
118   dst_shape.i = src_weights.shape.i;
119   dst_weights->shape = dst_shape;
120   dst_weights->data.resize(dst_shape.DimensionsProduct());
121 
122   auto gt_mat = GetTransposedMatrixForWinograd(6, 3);
123   std::vector<float> g_mat(gt_mat.size());
124   for (int y = 0; y < 3; ++y) {
125     for (int x = 0; x < 6; ++x) {
126       g_mat[x * 3 + y] = gt_mat[y * 6 + x];
127     }
128   }
129 
130   for (int d = 0; d < src_weights.shape.o; ++d) {
131     for (int s = 0; s < src_weights.shape.i; ++s) {
132       std::vector<float> in_vals(9);
133       for (int y = 0; y < 3; ++y) {
134         for (int x = 0; x < 3; ++x) {
135           const int f_index = src_weights.shape.LinearIndex({d, y, x, s});
136           in_vals[y * 3 + x] = src_weights.data[f_index];
137         }
138       }
139 
140       auto temp_vals = Multiply(g_mat, in_vals, 6, 3, 3);
141       auto out_vals = Multiply(temp_vals, gt_mat, 6, 3, 6);
142       for (int y = 0; y < 6; ++y) {
143         for (int x = 0; x < 6; ++x) {
144           const int f_index = dst_shape.LinearIndex({d, y, x, s});
145           dst_weights->data[f_index] = out_vals[y * 6 + x];
146         }
147       }
148     }
149   }
150 }
151 
IsSuitableForWinograd4x4To6x6(const Convolution2DAttributes & attr)152 bool IsSuitableForWinograd4x4To6x6(const Convolution2DAttributes& attr) {
153   return attr.weights.shape.w == 3 && attr.weights.shape.h == 3 &&
154          attr.dilations == HW(1, 1) && attr.strides == HW(1, 1);
155 }
156 
157 }  // namespace gpu
158 }  // namespace tflite
159