/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-scalar-5x1.c | 85 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local 111 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 115 vo3p0 += vi4x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 119 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 127 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 197 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 201 vo3p0 += vi4x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 205 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
|
D | 5x5p2-minmax-scalar-3x1.c | 94 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 141 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 143 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 145 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 153 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 268 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 270 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 272 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 280 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 373 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() [all …]
|
D | 5x5p2-minmax-scalar-2x1.c | 88 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 129 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 130 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 137 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 224 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 225 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 232 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 303 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 304 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 94 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 141 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 143 vo1p1 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 145 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 153 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 271 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 273 vo1p1 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 275 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 283 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 379 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() [all …]
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 88 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 129 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 130 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 137 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 228 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 229 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 236 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 311 vo1p0 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 312 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 88 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 129 vo1p1 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 130 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 137 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 226 vo1p1 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 227 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 234 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 307 vo1p1 += vi4x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 308 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-3x1.c | 111 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local 170 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 175 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 180 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 188 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 304 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 309 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 314 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 381 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 386 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() [all …]
|
D | 5x5s2p2-minmax-scalar-3x1-acc2.c | 111 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 170 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 175 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 180 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 188 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 307 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 312 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 317 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 387 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 392 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() [all …]
|
D | 3x3s2p1-minmax-scalar-3x1.c | 88 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local 104 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 109 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 141 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 181 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 186 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
|
D | 5x5p2-minmax-scalar-1x1-acc4.c | 82 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local 115 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 181 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 187 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 237 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
|
D | 5x5p2-minmax-scalar-1x1-acc2.c | 82 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local 115 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 179 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 185 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 233 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
|
D | 5x5p2-minmax-scalar-1x1-acc3.c | 82 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local 115 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 180 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 186 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 235 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1.c | 82 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local 115 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 178 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 184 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 231 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
|
D | 3x3p1-minmax-scalar-6x1.c | 91 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local 120 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 125 vo3p0 += vi4x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 130 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 139 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 220 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 225 vo3p0 += vi4x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 230 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 82 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 115 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 182 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 188 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 239 float vo0p4 = vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
|
D | 3x3p1-minmax-scalar-4x1.c | 79 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local 105 vo3p0 += vi4x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 108 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 115 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 177 vo3p0 += vi4x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 180 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
|
D | 5x5s2p2-minmax-scalar-2x1.c | 101 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 151 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 154 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 161 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 249 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 252 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 303 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 306 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
|
D | 5x5s2p2-minmax-scalar-2x1-acc2.c | 101 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 151 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 154 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 161 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 251 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 254 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 307 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 310 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 101 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 151 float vo1p2 = vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 154 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 161 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 253 float vo1p2 = vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 256 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 311 float vo1p2 = vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 314 vo0p1 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
|
D | 3x3s2p1-minmax-scalar-4x1.c | 98 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local 118 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 125 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 165 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 215 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 222 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
|
D | 5x5s2p2-minmax-scalar-1x1-acc4.c | 90 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local 127 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 192 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 226 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
|
D | 5x5s2p2-minmax-scalar-1x1.c | 90 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local 127 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 189 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 220 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
|
D | 5x5s2p2-minmax-scalar-1x1-acc2.c | 90 float vi4x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local 127 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 190 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 222 vo0p0 += vi4x0 * vk40; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
|
/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 89 float32x4_t vi4x0 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 118 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 120 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk20c0x4567, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 142 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 144 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk20c1x4567, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 166 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 168 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk20c2x4567, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 190 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c0x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 192 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk21c0x4567, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 332 vi4x0 = vcombine_f32(vget_high_f32(vi4x1), vi4x2); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 87 float32x4_t vi4x0 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 116 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c0x0123, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 118 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk20c0x4567, vget_low_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 140 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c1x0123, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 142 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk20c1x4567, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 164 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk20c2x0123, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 166 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk20c2x4567, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 188 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c0x0123, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 190 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk21c0x4567, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 330 vi4x0 = vcombine_f32(vget_high_f32(vi4x1), vi4x2); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() [all …]
|