/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-scalar-3x1.c | 51 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 166 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 167 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 168 vo2p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 293 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 294 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 295 vo2p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 390 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 391 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 392 vo2p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 51 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 166 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 167 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 168 vo2p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 296 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 297 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 298 vo2p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 396 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 397 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 398 vo2p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
|
D | 5x5s2p2-minmax-scalar-3x1.c | 52 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local 203 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 204 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 205 vo2p0 += vi7x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 327 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 328 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 329 vo2p0 += vi7x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 404 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 405 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 406 vo2p0 += vi7x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
|
D | 5x5s2p2-minmax-scalar-3x1-acc2.c | 52 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 203 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 204 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 205 vo2p0 += vi7x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 330 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 331 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 332 vo2p0 += vi7x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 410 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 411 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 412 vo2p0 += vi7x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
|
D | 5x5p2-minmax-scalar-2x1.c | 51 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 146 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 147 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 241 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 242 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 313 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 314 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
|
D | 5x5s2p2-minmax-scalar-2x1.c | 52 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 171 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 172 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 261 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 262 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 315 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 316 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 51 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 146 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 147 vo1p2 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 245 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 246 vo1p2 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 321 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 322 vo1p2 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 51 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 146 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 147 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 243 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 244 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 317 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 318 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1-acc2.c | 52 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 171 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 172 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 263 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 264 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 319 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 320 vo1p0 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 52 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 171 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 172 vo1p2 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 265 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 266 vo1p2 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 323 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 324 vo1p2 += vi5x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
|
D | 5x5s2p2-minmax-scalar-1x1-acc4.c | 52 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local 138 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 197 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 231 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
|
D | 5x5s2p2-minmax-scalar-1x1.c | 52 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local 138 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 194 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 225 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
|
D | 5x5p2-minmax-scalar-1x1-acc4.c | 51 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local 126 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 192 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 242 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
|
D | 5x5p2-minmax-scalar-1x1-acc2.c | 51 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local 126 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 190 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 238 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
|
D | 5x5p2-minmax-scalar-1x1-acc3.c | 51 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local 126 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 191 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 240 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
|
D | 5x5s2p2-minmax-scalar-1x1-acc2.c | 52 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local 138 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 195 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 227 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
|
D | 5x5s2p2-minmax-scalar-1x1-acc3.c | 52 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local 138 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 196 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 229 vo0p2 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1.c | 51 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local 126 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 189 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 236 vo0p0 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 51 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 126 vo0p3 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 193 vo0p3 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 244 vo0p3 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
|
D | 5x5s2p2-minmax-scalar-1x1-acc5.c | 52 const float vk31 = weights[17]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local 138 vo0p3 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 198 vo0p3 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 233 vo0p3 += vi3x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c | 62 const v128_t vk31 = wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 213 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 214 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 215 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 216 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 217 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 467 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 468 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 469 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 470 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() [all …]
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c | 62 const v128_t vk31 = wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local 213 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 214 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 215 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 216 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 217 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 467 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 468 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 469 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 470 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() [all …]
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-3x4.c | 62 const v128_t vk31 = wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local 175 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 176 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 177 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 355 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 356 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 357 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 530 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 531 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() 532 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c | 62 const v128_t vk31 = wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local 175 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 176 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 177 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 358 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 359 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 360 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 536 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 537 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() 538 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-3x4.c | 62 const v128_t vk31 = wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local 175 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 176 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 177 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 355 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 356 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 357 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 530 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 531 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() 532 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
|