/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-scalar-2x1.c | 95 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 137 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 147 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 148 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 232 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 242 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 243 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 250 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 314 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() [all …]
|
D | 5x5p2-minmax-scalar-3x1.c | 102 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 153 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 165 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 167 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 169 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 177 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 280 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 292 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 294 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 296 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() [all …]
|
D | 5x5p2-minmax-scalar-1x1-acc4.c | 88 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local 121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 127 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 187 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 193 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 199 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 243 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
|
D | 5x5p2-minmax-scalar-1x1-acc2.c | 88 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local 121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 127 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 185 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 191 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 197 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 239 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
|
D | 5x5p2-minmax-scalar-1x1-acc3.c | 88 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local 121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 127 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 186 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 192 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 198 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 241 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1.c | 88 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local 121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 127 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 184 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 190 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 196 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 237 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
|
D | 3x3p1-minmax-scalar-5x1.c | 93 float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local 127 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 135 vo4p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 139 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 143 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 151 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 213 vo4p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 217 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 221 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 102 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 153 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 165 vo2p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 167 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 169 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 177 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 283 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 295 vo2p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 297 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 299 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() [all …]
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 95 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 137 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 147 vo1p2 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 148 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 236 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 246 vo1p2 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 247 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 254 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 322 vo1p2 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() [all …]
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 95 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 137 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 147 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 148 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 234 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 244 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 245 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 252 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 318 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() [all …]
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 88 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 127 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 188 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 194 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 200 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 245 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
|
D | 3x3p1-minmax-scalar-6x1.c | 100 float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local 139 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 148 vo4p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 153 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 158 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 167 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 239 vo4p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 244 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 249 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
|
D | 3x3p1-minmax-scalar-4x1.c | 86 float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local 115 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 125 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 128 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 135 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 190 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 193 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
|
D | 5x5s2p2-minmax-scalar-3x1.c | 121 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local 196 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 201 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 206 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 214 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 320 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 325 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 330 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 397 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 402 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() [all …]
|
D | 5x5s2p2-minmax-scalar-3x1-acc2.c | 121 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 196 vo2p1 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 201 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 206 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 214 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 323 vo2p1 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 328 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 333 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 403 vo2p1 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 408 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() [all …]
|
D | 3x3s2p1-minmax-scalar-3x1.c | 98 const float vi4x1 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local 129 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 134 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 175 const float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local 191 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 196 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
|
D | 5x5s2p2-minmax-scalar-2x1.c | 109 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 170 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 173 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 180 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 260 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 263 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 314 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 317 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
|
D | 5x5s2p2-minmax-scalar-2x1-acc2.c | 109 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 170 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 173 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 180 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 262 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 265 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 318 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 321 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 109 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 170 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 173 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 180 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 264 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 267 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 322 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 325 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
|
D | 3x3p1-minmax-scalar-3x1.c | 79 float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local 103 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 113 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 119 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 165 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
|
D | 3x3s2p1-minmax-scalar-4x1.c | 110 const float vi4x1 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local 150 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 157 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 207 const float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local 228 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 235 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-wasmsimd-2x2.c | 117 const v128_t vi4x1 = wasm_v128_load(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 136 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk20c0, wasm_v32x4_shuffle(vi4x1, vi4x1, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 204 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk21c0, wasm_v32x4_shuffle(vi4x1, vi4x1, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 225 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk21c1, wasm_v32x4_shuffle(vi4x1, vi4x1, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 253 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk21c2, wasm_v32x4_shuffle(vi4x1, vi4x1, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 274 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk22c0, wasm_v32x4_shuffle(vi4x1, vi4x1, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 365 v128_t vi4x1 = wasm_v128_load(i4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 389 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk20c0, wasm_v32x4_shuffle(vi4x1, vi4x1, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 469 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk21c0, wasm_v32x4_shuffle(vi4x1, vi4x1, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 496 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk21c1, wasm_v32x4_shuffle(vi4x1, vi4x1, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() [all …]
|
D | 3x3s2p1c3x4-sse-2x2.c | 116 const __m128 vi4x1 = _mm_loadu_ps(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 135 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk20c0, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 203 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk21c0, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 224 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk21c1, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 252 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk21c2, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 273 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk22c0, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 365 __m128 vi4x1 = _mm_loadu_ps(i4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 389 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk20c0, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 469 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk21c0, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 496 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk21c1, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() [all …]
|
/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p1c3x8-neon-2x1.c | 173 const float32x4_t vi4x1 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 195 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c0x0123, vget_low_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 197 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk21c0x4567, vget_low_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 219 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c1x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 221 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk21c1x4567, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 243 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 245 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk21c2x4567, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 267 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk22c0x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 269 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk22c0x4567, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 330 vi4x0 = vcombine_f32(vget_high_f32(vi4x1), vi4x2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() [all …]
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 175 const float32x4_t vi4x1 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 197 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c0x0123, vget_low_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 199 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk21c0x4567, vget_low_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 221 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c1x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 223 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk21c1x4567, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 245 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 247 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk21c2x4567, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 269 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk22c0x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 271 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk22c0x4567, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 332 vi4x0 = vcombine_f32(vget_high_f32(vi4x1), vi4x2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() [all …]
|