1 // Copyright 2016 The Gemmlowp Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <unistd.h> 16 #ifdef __APPLE__ 17 #include <sys/time.h> 18 #endif 19 20 #include <cstdint> 21 #include <cstdlib> 22 #include <ctime> 23 #include <iomanip> 24 #include <iostream> 25 #include <map> 26 #include <memory> 27 #include <vector> 28 29 #include "streams.h" 30 31 #define MUL_OFFSET (3) 32 #define ADD_OFFSET (100) 33 34 using namespace gemmlowp::meta; 35 36 void prepare_row_major_data(int rows, int elements, int stride, std::uint8_t* data) { 37 for (int i = 0; i < rows * stride; ++i) { 38 data[i] = 255; 39 } 40 for (int i = 0; i < rows; ++i) { 41 for (int j = 0; j < elements; ++j) { 42 data[i * stride + j] = j % 256; 43 } 44 } 45 } 46 47 void prepare_column_major_data(int columns, int elements, int stride, 48 std::uint8_t* data) { 49 for (int i = 0; i < elements * stride; ++i) { 50 data[i] = 255; 51 } 52 for (int i = 0; i < elements; ++i) { 53 for (int j = 0; j < columns; ++j) { 54 data[i * stride + j] = i % 256; 55 } 56 } 57 } 58 59 void print_out(std::uint8_t* result, int rows, int elements) { 60 int size = rows * ((elements + 7) / 8) * 8; 61 for (int i = 0; i < size; ++i) { 62 std::cout << static_cast<int>(result[i]) << " "; 63 } 64 std::cout << std::endl << std::flush; 65 } 66 67 bool check(std::uint8_t* result, int rows, int elements) { 68 int chunks = elements / 8; 69 int leftover = elements % 8; 70 for (int i = 0; i < chunks; ++i) { 71 int chunk_index = i * rows * 8; 72 int chunk_start_value = i * 8; 73 for (int j = 0; j < rows; ++j) { 74 for (int k = 0; k < 8; ++k) { 75 if (result[chunk_index + j * 8 + k] != chunk_start_value + k) { 76 return false; 77 } 78 } 79 } 80 } 81 82 int leftover_index = chunks * rows * 8; 83 int leftover_start_value = chunks * 8; 84 for (int i = 0; i < rows; ++i) { 85 for (int j = 0; j < leftover; ++j) { 86 if (result[leftover_index + i * 8 + j] != leftover_start_value + j) { 87 return false; 88 } 89 } 90 } 91 92 int expected_sum = 93 ((elements * (elements - 1)) / 2) * MUL_OFFSET + ADD_OFFSET; 94 int sums_offset = rows * ((elements + 7) / 8) * 8; 95 std::int32_t* sums = reinterpret_cast<std::int32_t*>(result + sums_offset); 96 for (int i = 0; i < rows; ++i) { 97 if (sums[i] != expected_sum) { 98 return false; 99 } 100 } 101 102 return true; 103 } 104 105 template <int lanes, int leftover> 106 void test_2(std::uint8_t* in, std::uint8_t* out) { 107 for (int elements = 8; elements < 64; elements += 8) { 108 int all_elements = elements + leftover; 109 for (int stride = all_elements; stride < all_elements + 4; ++stride) { 110 RowMajorWithSum params; 111 params.count = all_elements; 112 params.stride = stride; 113 params.multiplicative_sum_offset = MUL_OFFSET; 114 params.additive_sum_offset = ADD_OFFSET; 115 116 prepare_row_major_data(lanes, all_elements, stride, in); 117 Stream<std::uint8_t, lanes, 8, leftover, RowMajorWithSum>::Pack(in, params, 118 out); 119 if (check(out, lanes, all_elements)) { 120 // std::cout << "Row: " << lanes << "x8x" << leftover << " : " 121 // << all_elements << "@" << stride << " -- OK" << 122 // std::endl; 123 } else { 124 std::cout << "Row: " << lanes << "x8x" << leftover << " : " 125 << all_elements << "@" << stride << " -- ERROR" << std::endl; 126 std::cout << "Exiting." << std::endl; 127 std::exit(1); 128 } 129 } 130 131 for (int stride = lanes; stride < lanes + 4; ++stride) { 132 ColumnMajorWithSum params; 133 params.count = all_elements; 134 params.stride = stride; 135 params.multiplicative_sum_offset = MUL_OFFSET; 136 params.additive_sum_offset = ADD_OFFSET; 137 138 prepare_column_major_data(lanes, all_elements, stride, in); 139 Stream<std::uint8_t, lanes, 8, leftover, ColumnMajorWithSum>::Pack(in, params, 140 out); 141 if (check(out, lanes, all_elements)) { 142 // std::cout << "Column: " << lanes << "x8x" << leftover << " : " 143 // << all_elements << "@" << stride << " -- OK" << 144 // std::endl; 145 } else { 146 std::cout << "Column: " << lanes << "x8x" << leftover << " : " 147 << all_elements << "@" << stride << " -- ERROR" << std::endl; 148 std::cout << "Exiting." << std::endl; 149 std::exit(1); 150 } 151 } 152 } 153 } 154 155 template <int lanes> 156 void test(std::uint8_t* in, std::uint8_t* out) { 157 test_2<lanes, 0>(in, out); 158 test_2<lanes, 1>(in, out); 159 test_2<lanes, 2>(in, out); 160 test_2<lanes, 3>(in, out); 161 test_2<lanes, 4>(in, out); 162 test_2<lanes, 5>(in, out); 163 test_2<lanes, 6>(in, out); 164 test_2<lanes, 7>(in, out); 165 } 166 167 int main() { 168 std::unique_ptr<std::uint8_t> in(new std::uint8_t[128 * 1024]); 169 std::unique_ptr<std::uint8_t> out(new std::uint8_t[128 * 1024]); 170 171 test<1>(in.get(), out.get()); 172 test<2>(in.get(), out.get()); 173 test<3>(in.get(), out.get()); 174 test<4>(in.get(), out.get()); 175 test<5>(in.get(), out.get()); 176 test<6>(in.get(), out.get()); 177 test<7>(in.get(), out.get()); 178 test<8>(in.get(), out.get()); 179 180 std::cout << "Ok." << std::endl; 181 return 0; 182 } 183