1 // Copyright 2016 The Gemmlowp Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef GEMMLOWP_META_STREAMS_H_
16 #define GEMMLOWP_META_STREAMS_H_
17 
18 #include <iostream>
19 #include <typeinfo>
20 #include "base.h"
21 
22 namespace gemmlowp {
23 namespace meta {
24 
25 struct RowMajor {
26  public:
27   int count;
28   int stride;
29 };
30 
31 struct RowMajorWithSum {
32  public:
33   int count;
34   int stride;
35   int multiplicative_sum_offset;
36   int additive_sum_offset;
37 };
38 
39 struct ColumnMajorWithSum {
40  public:
41   int count;
42   int stride;
43   int multiplicative_sum_offset;
44   int additive_sum_offset;
45 };
46 
47 template <typename InType>
48 class StreamUtil<InType, RowMajor> {
49  public:
Offset(const RowMajor & params,const InType * source,int offset_stride,int offset_advance)50   static const InType* Offset(const RowMajor& params, const InType* source,
51                               int offset_stride, int offset_advance) {
52     return reinterpret_cast<const InType*>(
53         reinterpret_cast<const std::uint8_t*>(source) +
54         offset_stride * params.stride + offset_advance * sizeof(InType));
55   }
56 
Offset(const RowMajor & params,InType * source,int offset_stride,int offset_advance)57   static InType* Offset(const RowMajor& params, InType* source,
58                         int offset_stride, int offset_advance) {
59     return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) +
60                                      offset_stride * params.stride +
61                                      offset_advance * sizeof(InType));
62   }
63 
Scratch(const RowMajor & params,int lanes_count,int pack_size)64   static int Scratch(const RowMajor& params, int lanes_count, int pack_size) {
65     return AlignTo<64>(lanes_count * AlignTo(pack_size, params.stride));
66   }
67 };
68 
69 template <typename InType>
70 class StreamUtil<InType, RowMajorWithSum> {
71  public:
Offset(const RowMajorWithSum & params,const InType * source,int offset_stride,int offset_advance)72   static const InType* Offset(const RowMajorWithSum& params,
73                               const InType* source, int offset_stride,
74                               int offset_advance) {
75     return reinterpret_cast<const InType*>(
76         reinterpret_cast<const std::uint8_t*>(source) +
77         offset_stride * params.stride + offset_advance * sizeof(InType));
78   }
79 
Offset(const RowMajorWithSum & params,InType * source,int offset_stride,int offset_advance)80   static InType* Offset(const RowMajorWithSum& params, InType* source,
81                         int offset_stride, int offset_advance) {
82     return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) +
83                                      offset_stride * params.stride +
84                                      offset_advance * sizeof(InType));
85   }
86 
Scratch(const RowMajorWithSum & params,int lanes_count,int pack_size)87   static int Scratch(const RowMajorWithSum& params, int lanes_count,
88                      int pack_size) {
89     return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
90                             AlignTo(pack_size, params.count));
91   }
92 };
93 
94 template <typename InType>
95 class StreamUtil<InType, ColumnMajorWithSum> {
96  public:
Offset(const ColumnMajorWithSum & params,const InType * source,int offset_stride,int offset_advance)97   static const InType* Offset(const ColumnMajorWithSum& params,
98                               const InType* source, int offset_stride,
99                               int offset_advance) {
100     return reinterpret_cast<const InType*>(
101         reinterpret_cast<const std::uint8_t*>(source) +
102         params.stride * offset_advance + offset_stride * sizeof(InType));
103   }
104 
Offset(const ColumnMajorWithSum & params,InType * source,int offset_stride,int offset_advance)105   static const InType* Offset(const ColumnMajorWithSum& params, InType* source,
106                               int offset_stride, int offset_advance) {
107     return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) +
108                                      params.stride * offset_advance +
109                                      offset_stride * sizeof(InType));
110   }
111 
Scratch(const ColumnMajorWithSum & params,int lanes_count,int pack_size)112   static int Scratch(const ColumnMajorWithSum& params, int lanes_count,
113                      int pack_size) {
114     return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
115                             AlignTo(pack_size, params.count));
116   }
117 };
118 
119 template <typename InType, int lanes_count, int pack_size, int leftovers>
120 class Stream<InType, lanes_count, pack_size, leftovers, RowMajor> {
121  public:
Pack(const InType * in,const RowMajor & params,InType * out)122   static void Pack(const InType* in, const RowMajor& params, InType* out) {
123 #ifdef DEBUG
124 #ifdef DEBUG_METAGEMM_VERBOSE
125     std::cout << "RowMajor(" << std::string(typeid(InType).name())
126               << ")::Pack() -- " << lanes_count << "x" << pack_size << " + "
127               << leftovers << std::endl;
128 #endif
129 #else
130     if (lanes_count != 0) {
131       std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl;
132       std::exit(1);
133     }
134 #endif
135   }
136 
UnpackedAdvance(const RowMajor & params)137   static int UnpackedAdvance(const RowMajor& params) {
138     return sizeof(InType) * pack_size;
139   }
140 
PackedAdvance(const RowMajor & params)141   static int PackedAdvance(const RowMajor& params) {
142     return sizeof(InType) * pack_size * lanes_count;
143   }
144 
UnpackedStride(const RowMajor & params)145   static int UnpackedStride(const RowMajor& params) {
146     return lanes_count * params.stride;
147   }
148 
PackedStride(const RowMajor & params)149   static int PackedStride(const RowMajor& params) {
150     return AlignTo<32>(lanes_count * AlignTo<pack_size>(params.stride));
151   }
152 
Scratch(const RowMajor & params)153   static int Scratch(const RowMajor& params) { return PackedStride(params); }
154 
155 #ifdef DEBUG
156 #ifdef DEBUG_METAGEMM_VERBOSE
Debug(const RowMajor & params)157   static void Debug(const RowMajor& params) {
158     std::cout << "RowMajor(" << typeid(InType).name() << ")" << std::endl;
159     std::cout << "  dims: " << lanes_count << "x" << pack_size << " + "
160               << leftovers << std::endl;
161     std::cout << "  scratch: " << Scratch(params) << std::endl;
162     std::cout << "  unpacked advance: " << UnpackedAdvance(params) << std::endl;
163     std::cout << "  packed advance: " << PackedAdvance(params) << std::endl;
164     std::cout << "  unpacked stride: " << UnpackedStride(params) << std::endl;
165     std::cout << "  packed stride: " << PackedStride(params) << std::endl;
166     std::cout << "  params:" << std::endl;
167     std::cout << "    count: " << params.count << std::endl;
168     std::cout << "    stride: " << params.stride << std::endl;
169   }
170 #endif
171 #endif
172 };
173 
174 template <typename InType, int lanes_count, int pack_size, int leftovers>
175 class Stream<InType, lanes_count, pack_size, leftovers, RowMajorWithSum> {
176  public:
Pack(const InType * in,const RowMajorWithSum & params,InType * out)177   static void Pack(const InType* in, const RowMajorWithSum& params,
178                    InType* out) {
179 #ifdef DEBUG
180 #ifdef DEBUG_METAGEMM_VERBOSE
181     std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")::Pack() -- "
182               << lanes_count << "x" << pack_size << " + " << leftovers
183               << std::endl;
184 #endif
185 #else
186     if (lanes_count != 0) {
187       std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl;
188       std::exit(1);
189     }
190 #endif
191   }
192 
UnpackedAdvance(const RowMajorWithSum & params)193   static int UnpackedAdvance(const RowMajorWithSum& params) {
194     return sizeof(InType) * pack_size;
195   }
196 
PackedAdvance(const RowMajorWithSum & params)197   static int PackedAdvance(const RowMajorWithSum& params) {
198     return sizeof(InType) * pack_size * lanes_count;
199   }
200 
UnpackedStride(const RowMajorWithSum & params)201   static int UnpackedStride(const RowMajorWithSum& params) {
202     return sizeof(InType) * lanes_count * params.stride;
203   }
204 
PackedStride(const RowMajorWithSum & params)205   static int PackedStride(const RowMajorWithSum& params) {
206     return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
207                             AlignTo<pack_size>(params.count));
208   }
209 
Scratch(const RowMajorWithSum & params)210   static int Scratch(const RowMajorWithSum& params) {
211     return PackedStride(params);
212   }
213 
214 #ifdef DEBUG
215 #ifdef DEBUG_METAGEMM_VERBOSE
Debug(const RowMajorWithSum & params)216   static void Debug(const RowMajorWithSum& params) {
217     std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")"
218               << std::endl;
219     std::cout << "  dims: " << lanes_count << "x" << pack_size << " + "
220               << leftovers << std::endl;
221     std::cout << "  scratch: " << Scratch(params) << std::endl;
222     std::cout << "  unpacked advance: " << UnpackedAdvance(params) << std::endl;
223     std::cout << "  packed advance: " << PackedAdvance(params) << std::endl;
224     std::cout << "  unpacked stride: " << UnpackedStride(params) << std::endl;
225     std::cout << "  packed stride: " << PackedStride(params) << std::endl;
226     std::cout << "  params:" << std::endl;
227     std::cout << "    count: " << params.count << std::endl;
228     std::cout << "    stride: " << params.stride << std::endl;
229     std::cout << "    multiplicative_sum_offset: "
230               << params.multiplicative_sum_offset << std::endl;
231     std::cout << "    additive_sum_offset: " << params.additive_sum_offset
232               << std::endl;
233   }
234 #endif
235 #endif
236 };
237 
238 template <typename InType, int lanes_count, int pack_size, int leftovers>
239 class Stream<InType, lanes_count, pack_size, leftovers, ColumnMajorWithSum> {
240  public:
Pack(const InType * in,const ColumnMajorWithSum & params,InType * out)241   static void Pack(const InType* in, const ColumnMajorWithSum& params,
242                    InType* out) {
243 #ifdef DEBUG
244 #ifdef DEBUG_METAGEMM_VERBOSE
245     std::cout << "ColumnMajorWithSum(" << typeid(InType).name()
246               << ")::Pack() -- " << lanes_count << "x" << pack_size << " + "
247               << leftovers << std::endl;
248 #endif
249 #else
250     if (lanes_count != 0) {
251       std::cerr << "FATAL: ColumnMajorWithSum::Pack not implemented."
252                 << std::endl;
253       std::exit(1);
254     }
255 #endif
256   }
257 
UnpackedAdvance(const ColumnMajorWithSum & params)258   static int UnpackedAdvance(const ColumnMajorWithSum& params) {
259     return sizeof(InType) * pack_size * params.stride;
260   }
261 
PackedAdvance(const ColumnMajorWithSum & params)262   static int PackedAdvance(const ColumnMajorWithSum& params) {
263     return sizeof(InType) * pack_size * lanes_count;
264   }
265 
UnpackedStride(const ColumnMajorWithSum & params)266   static int UnpackedStride(const ColumnMajorWithSum& params) {
267     return sizeof(InType) * lanes_count;
268   }
269 
PackedStride(const ColumnMajorWithSum & params)270   static int PackedStride(const ColumnMajorWithSum& params) {
271     return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
272                             AlignTo<pack_size>(params.count));
273   }
274 
Scratch(const ColumnMajorWithSum & params)275   static int Scratch(const ColumnMajorWithSum& params) {
276     return PackedStride(params);
277   }
278 
279 #ifdef DEBUG
280 #ifdef DEBUG_METAGEMM_VERBOSE
Debug(const ColumnMajorWithSum & params)281   static void Debug(const ColumnMajorWithSum& params) {
282     std::cout << "ColumnMajorWithSum(" << typeid(InType).name() << ")"
283               << std::endl;
284     std::cout << "  dims: " << lanes_count << "x" << pack_size << " + "
285               << leftovers << std::endl;
286     std::cout << "  scratch: " << Scratch(params) << std::endl;
287     std::cout << "  unpacked advance: " << UnpackedAdvance(params) << std::endl;
288     std::cout << "  packed advance: " << PackedAdvance(params) << std::endl;
289     std::cout << "  unpacked stride: " << UnpackedStride(params) << std::endl;
290     std::cout << "  packed stride: " << PackedStride(params) << std::endl;
291     std::cout << "  params:" << std::endl;
292     std::cout << "    count: " << params.count << std::endl;
293     std::cout << "    stride: " << params.stride << std::endl;
294     std::cout << "    multiplicative_sum_offset: "
295               << params.multiplicative_sum_offset << std::endl;
296     std::cout << "    additive_sum_offset: " << params.additive_sum_offset
297               << std::endl;
298   }
299 #endif
300 #endif
301 };
302 
303 }  // namespace meta
304 }  // namespace gemmlowp
305 
306 #ifdef GEMMLOWP_NEON_32
307 #include "streams_arm_32.h"
308 #elif defined(GEMMLOWP_NEON_64)
309 #include "streams_arm_64.h"
310 #endif
311 
312 #endif  // GEMMLOWP_META_STREAMS_H_
313