1 // Copyright 2016 The Gemmlowp Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef GEMMLOWP_META_STREAMS_H_ 16 #define GEMMLOWP_META_STREAMS_H_ 17 18 #include <iostream> 19 #include <typeinfo> 20 #include "base.h" 21 22 namespace gemmlowp { 23 namespace meta { 24 25 struct RowMajor { 26 public: 27 int count; 28 int stride; 29 }; 30 31 struct RowMajorWithSum { 32 public: 33 int count; 34 int stride; 35 int multiplicative_sum_offset; 36 int additive_sum_offset; 37 }; 38 39 struct ColumnMajorWithSum { 40 public: 41 int count; 42 int stride; 43 int multiplicative_sum_offset; 44 int additive_sum_offset; 45 }; 46 47 template <typename InType> 48 class StreamUtil<InType, RowMajor> { 49 public: Offset(const RowMajor & params,const InType * source,int offset_stride,int offset_advance)50 static const InType* Offset(const RowMajor& params, const InType* source, 51 int offset_stride, int offset_advance) { 52 return reinterpret_cast<const InType*>( 53 reinterpret_cast<const std::uint8_t*>(source) + 54 offset_stride * params.stride + offset_advance * sizeof(InType)); 55 } 56 Offset(const RowMajor & params,InType * source,int offset_stride,int offset_advance)57 static InType* Offset(const RowMajor& params, InType* source, 58 int offset_stride, int offset_advance) { 59 return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) + 60 offset_stride * params.stride + 61 offset_advance * sizeof(InType)); 62 } 63 Scratch(const RowMajor & params,int lanes_count,int pack_size)64 static int Scratch(const RowMajor& params, int lanes_count, int pack_size) { 65 return AlignTo<64>(lanes_count * AlignTo(pack_size, params.stride)); 66 } 67 }; 68 69 template <typename InType> 70 class StreamUtil<InType, RowMajorWithSum> { 71 public: Offset(const RowMajorWithSum & params,const InType * source,int offset_stride,int offset_advance)72 static const InType* Offset(const RowMajorWithSum& params, 73 const InType* source, int offset_stride, 74 int offset_advance) { 75 return reinterpret_cast<const InType*>( 76 reinterpret_cast<const std::uint8_t*>(source) + 77 offset_stride * params.stride + offset_advance * sizeof(InType)); 78 } 79 Offset(const RowMajorWithSum & params,InType * source,int offset_stride,int offset_advance)80 static InType* Offset(const RowMajorWithSum& params, InType* source, 81 int offset_stride, int offset_advance) { 82 return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) + 83 offset_stride * params.stride + 84 offset_advance * sizeof(InType)); 85 } 86 Scratch(const RowMajorWithSum & params,int lanes_count,int pack_size)87 static int Scratch(const RowMajorWithSum& params, int lanes_count, 88 int pack_size) { 89 return 32 + AlignTo<32>(sizeof(InType) * lanes_count * 90 AlignTo(pack_size, params.count)); 91 } 92 }; 93 94 template <typename InType> 95 class StreamUtil<InType, ColumnMajorWithSum> { 96 public: Offset(const ColumnMajorWithSum & params,const InType * source,int offset_stride,int offset_advance)97 static const InType* Offset(const ColumnMajorWithSum& params, 98 const InType* source, int offset_stride, 99 int offset_advance) { 100 return reinterpret_cast<const InType*>( 101 reinterpret_cast<const std::uint8_t*>(source) + 102 params.stride * offset_advance + offset_stride * sizeof(InType)); 103 } 104 Offset(const ColumnMajorWithSum & params,InType * source,int offset_stride,int offset_advance)105 static const InType* Offset(const ColumnMajorWithSum& params, InType* source, 106 int offset_stride, int offset_advance) { 107 return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) + 108 params.stride * offset_advance + 109 offset_stride * sizeof(InType)); 110 } 111 Scratch(const ColumnMajorWithSum & params,int lanes_count,int pack_size)112 static int Scratch(const ColumnMajorWithSum& params, int lanes_count, 113 int pack_size) { 114 return 32 + AlignTo<32>(sizeof(InType) * lanes_count * 115 AlignTo(pack_size, params.count)); 116 } 117 }; 118 119 template <typename InType, int lanes_count, int pack_size, int leftovers> 120 class Stream<InType, lanes_count, pack_size, leftovers, RowMajor> { 121 public: Pack(const InType * in,const RowMajor & params,InType * out)122 static void Pack(const InType* in, const RowMajor& params, InType* out) { 123 #ifdef DEBUG 124 #ifdef DEBUG_METAGEMM_VERBOSE 125 std::cout << "RowMajor(" << std::string(typeid(InType).name()) 126 << ")::Pack() -- " << lanes_count << "x" << pack_size << " + " 127 << leftovers << std::endl; 128 #endif 129 #else 130 if (lanes_count != 0) { 131 std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl; 132 std::exit(1); 133 } 134 #endif 135 } 136 UnpackedAdvance(const RowMajor & params)137 static int UnpackedAdvance(const RowMajor& params) { 138 return sizeof(InType) * pack_size; 139 } 140 PackedAdvance(const RowMajor & params)141 static int PackedAdvance(const RowMajor& params) { 142 return sizeof(InType) * pack_size * lanes_count; 143 } 144 UnpackedStride(const RowMajor & params)145 static int UnpackedStride(const RowMajor& params) { 146 return lanes_count * params.stride; 147 } 148 PackedStride(const RowMajor & params)149 static int PackedStride(const RowMajor& params) { 150 return AlignTo<32>(lanes_count * AlignTo<pack_size>(params.stride)); 151 } 152 Scratch(const RowMajor & params)153 static int Scratch(const RowMajor& params) { return PackedStride(params); } 154 155 #ifdef DEBUG 156 #ifdef DEBUG_METAGEMM_VERBOSE Debug(const RowMajor & params)157 static void Debug(const RowMajor& params) { 158 std::cout << "RowMajor(" << typeid(InType).name() << ")" << std::endl; 159 std::cout << " dims: " << lanes_count << "x" << pack_size << " + " 160 << leftovers << std::endl; 161 std::cout << " scratch: " << Scratch(params) << std::endl; 162 std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl; 163 std::cout << " packed advance: " << PackedAdvance(params) << std::endl; 164 std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl; 165 std::cout << " packed stride: " << PackedStride(params) << std::endl; 166 std::cout << " params:" << std::endl; 167 std::cout << " count: " << params.count << std::endl; 168 std::cout << " stride: " << params.stride << std::endl; 169 } 170 #endif 171 #endif 172 }; 173 174 template <typename InType, int lanes_count, int pack_size, int leftovers> 175 class Stream<InType, lanes_count, pack_size, leftovers, RowMajorWithSum> { 176 public: Pack(const InType * in,const RowMajorWithSum & params,InType * out)177 static void Pack(const InType* in, const RowMajorWithSum& params, 178 InType* out) { 179 #ifdef DEBUG 180 #ifdef DEBUG_METAGEMM_VERBOSE 181 std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")::Pack() -- " 182 << lanes_count << "x" << pack_size << " + " << leftovers 183 << std::endl; 184 #endif 185 #else 186 if (lanes_count != 0) { 187 std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl; 188 std::exit(1); 189 } 190 #endif 191 } 192 UnpackedAdvance(const RowMajorWithSum & params)193 static int UnpackedAdvance(const RowMajorWithSum& params) { 194 return sizeof(InType) * pack_size; 195 } 196 PackedAdvance(const RowMajorWithSum & params)197 static int PackedAdvance(const RowMajorWithSum& params) { 198 return sizeof(InType) * pack_size * lanes_count; 199 } 200 UnpackedStride(const RowMajorWithSum & params)201 static int UnpackedStride(const RowMajorWithSum& params) { 202 return sizeof(InType) * lanes_count * params.stride; 203 } 204 PackedStride(const RowMajorWithSum & params)205 static int PackedStride(const RowMajorWithSum& params) { 206 return 32 + AlignTo<32>(sizeof(InType) * lanes_count * 207 AlignTo<pack_size>(params.count)); 208 } 209 Scratch(const RowMajorWithSum & params)210 static int Scratch(const RowMajorWithSum& params) { 211 return PackedStride(params); 212 } 213 214 #ifdef DEBUG 215 #ifdef DEBUG_METAGEMM_VERBOSE Debug(const RowMajorWithSum & params)216 static void Debug(const RowMajorWithSum& params) { 217 std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")" 218 << std::endl; 219 std::cout << " dims: " << lanes_count << "x" << pack_size << " + " 220 << leftovers << std::endl; 221 std::cout << " scratch: " << Scratch(params) << std::endl; 222 std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl; 223 std::cout << " packed advance: " << PackedAdvance(params) << std::endl; 224 std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl; 225 std::cout << " packed stride: " << PackedStride(params) << std::endl; 226 std::cout << " params:" << std::endl; 227 std::cout << " count: " << params.count << std::endl; 228 std::cout << " stride: " << params.stride << std::endl; 229 std::cout << " multiplicative_sum_offset: " 230 << params.multiplicative_sum_offset << std::endl; 231 std::cout << " additive_sum_offset: " << params.additive_sum_offset 232 << std::endl; 233 } 234 #endif 235 #endif 236 }; 237 238 template <typename InType, int lanes_count, int pack_size, int leftovers> 239 class Stream<InType, lanes_count, pack_size, leftovers, ColumnMajorWithSum> { 240 public: Pack(const InType * in,const ColumnMajorWithSum & params,InType * out)241 static void Pack(const InType* in, const ColumnMajorWithSum& params, 242 InType* out) { 243 #ifdef DEBUG 244 #ifdef DEBUG_METAGEMM_VERBOSE 245 std::cout << "ColumnMajorWithSum(" << typeid(InType).name() 246 << ")::Pack() -- " << lanes_count << "x" << pack_size << " + " 247 << leftovers << std::endl; 248 #endif 249 #else 250 if (lanes_count != 0) { 251 std::cerr << "FATAL: ColumnMajorWithSum::Pack not implemented." 252 << std::endl; 253 std::exit(1); 254 } 255 #endif 256 } 257 UnpackedAdvance(const ColumnMajorWithSum & params)258 static int UnpackedAdvance(const ColumnMajorWithSum& params) { 259 return sizeof(InType) * pack_size * params.stride; 260 } 261 PackedAdvance(const ColumnMajorWithSum & params)262 static int PackedAdvance(const ColumnMajorWithSum& params) { 263 return sizeof(InType) * pack_size * lanes_count; 264 } 265 UnpackedStride(const ColumnMajorWithSum & params)266 static int UnpackedStride(const ColumnMajorWithSum& params) { 267 return sizeof(InType) * lanes_count; 268 } 269 PackedStride(const ColumnMajorWithSum & params)270 static int PackedStride(const ColumnMajorWithSum& params) { 271 return 32 + AlignTo<32>(sizeof(InType) * lanes_count * 272 AlignTo<pack_size>(params.count)); 273 } 274 Scratch(const ColumnMajorWithSum & params)275 static int Scratch(const ColumnMajorWithSum& params) { 276 return PackedStride(params); 277 } 278 279 #ifdef DEBUG 280 #ifdef DEBUG_METAGEMM_VERBOSE Debug(const ColumnMajorWithSum & params)281 static void Debug(const ColumnMajorWithSum& params) { 282 std::cout << "ColumnMajorWithSum(" << typeid(InType).name() << ")" 283 << std::endl; 284 std::cout << " dims: " << lanes_count << "x" << pack_size << " + " 285 << leftovers << std::endl; 286 std::cout << " scratch: " << Scratch(params) << std::endl; 287 std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl; 288 std::cout << " packed advance: " << PackedAdvance(params) << std::endl; 289 std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl; 290 std::cout << " packed stride: " << PackedStride(params) << std::endl; 291 std::cout << " params:" << std::endl; 292 std::cout << " count: " << params.count << std::endl; 293 std::cout << " stride: " << params.stride << std::endl; 294 std::cout << " multiplicative_sum_offset: " 295 << params.multiplicative_sum_offset << std::endl; 296 std::cout << " additive_sum_offset: " << params.additive_sum_offset 297 << std::endl; 298 } 299 #endif 300 #endif 301 }; 302 303 } // namespace meta 304 } // namespace gemmlowp 305 306 #ifdef GEMMLOWP_NEON_32 307 #include "streams_arm_32.h" 308 #elif defined(GEMMLOWP_NEON_64) 309 #include "streams_arm_64.h" 310 #endif 311 312 #endif // GEMMLOWP_META_STREAMS_H_ 313