/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-neon-c32-acc2.c | 62 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 143 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 144 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 145 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 146 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 147 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 148 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 149 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 150 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 151 const int64x2_t vprodGH = vmull_s32(vget_low_s32(vaccGHIJ), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() [all …]
|
D | 7x-minmax-neon-c24-acc2.c | 62 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() 126 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() 127 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() 128 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() 129 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() 130 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() 131 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() 132 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() 133 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() 134 const int64x2_t vprodGH = vmull_s32(vget_low_s32(vaccGHIJ), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() [all …]
|
D | 7x-minmax-neon-c16-acc2.c | 62 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() 109 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() 110 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() 111 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() 112 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() 113 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() 114 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() 115 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() 116 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() 118 const int64x2_t vadjprod01 = vaddw_s32(vprod01, vget_low_s32(vsgnacc0123)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() [all …]
|
D | 7x-minmax-neon-c8-acc2.c | 62 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() 92 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() 93 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() 94 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() 95 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() 97 const int64x2_t vadjprod01 = vaddw_s32(vprod01, vget_low_s32(vsgnacc0123)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() 98 const int64x2_t vadjprod23 = vaddw_high_s32(vprod23, vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() 99 const int64x2_t vadjprod45 = vaddw_s32(vprod45, vget_low_s32(vsgnacc4567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() 100 const int64x2_t vadjprod67 = vaddw_high_s32(vprod67, vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() 102 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() [all …]
|
D | 7p7x-minmax-neon-c16-acc2.c | 177 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 229 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 230 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 231 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 232 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 233 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 234 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 235 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 236 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 238 const int64x2_t vadjprod01 = vaddw_s32(vprod01, vget_low_s32(vsgnacc0123)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() [all …]
|
D | 7p7x-minmax-neon-c32-acc2.c | 310 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 400 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 401 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 402 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 403 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 404 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 405 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 406 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 407 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 408 const int64x2_t vprodGH = vmull_s32(vget_low_s32(vaccGHIJ), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() [all …]
|
D | 7p7x-minmax-neon-c24-acc2.c | 274 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 345 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 346 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 347 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 348 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 349 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 350 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 351 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 352 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 353 const int64x2_t vprodGH = vmull_s32(vget_low_s32(vaccGHIJ), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() [all …]
|
D | 7p7x-minmax-neon-c8-acc2.c | 141 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 174 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 175 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 176 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 177 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 179 const int64x2_t vadjprod01 = vaddw_s32(vprod01, vget_low_s32(vsgnacc0123)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 180 const int64x2_t vadjprod23 = vaddw_high_s32(vprod23, vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 181 const int64x2_t vadjprod45 = vaddw_s32(vprod45, vget_low_s32(vsgnacc4567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 182 const int64x2_t vadjprod67 = vaddw_high_s32(vprod67, vsgnacc4567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 184 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() [all …]
|
/external/XNNPACK/src/qs8-requantization/ |
D | precise-neon.c | 45 const int64x2_t vshift = vdupq_n_s64(-shift); in xnn_qs8_requantize_precise__neon() 61 const int64x2_t x01_product = vmull_s32(vget_low_s32(x), vget_low_s32(vmultiplier)); in xnn_qs8_requantize_precise__neon() 62 const int64x2_t x23_product = vmull_high_s32(x, vmultiplier); in xnn_qs8_requantize_precise__neon() 63 const int64x2_t y01_product = vmull_s32(vget_low_s32(y), vget_low_s32(vmultiplier)); in xnn_qs8_requantize_precise__neon() 64 const int64x2_t y23_product = vmull_high_s32(y, vmultiplier); in xnn_qs8_requantize_precise__neon() 65 const int64x2_t z01_product = vmull_s32(vget_low_s32(z), vget_low_s32(vmultiplier)); in xnn_qs8_requantize_precise__neon() 66 const int64x2_t z23_product = vmull_high_s32(z, vmultiplier); in xnn_qs8_requantize_precise__neon() 67 const int64x2_t w01_product = vmull_s32(vget_low_s32(w), vget_low_s32(vmultiplier)); in xnn_qs8_requantize_precise__neon() 68 const int64x2_t w23_product = vmull_high_s32(w, vmultiplier); in xnn_qs8_requantize_precise__neon() 70 const int64x2_t x01_product = vmull_s32(vget_low_s32(x), vmultiplier); in xnn_qs8_requantize_precise__neon() [all …]
|
/external/XNNPACK/src/qu8-requantization/ |
D | precise-neon.c | 45 const int64x2_t vshift = vdupq_n_s64(-shift); in xnn_qu8_requantize_precise__neon() 61 const int64x2_t x01_product = vmull_s32(vget_low_s32(x), vget_low_s32(vmultiplier)); in xnn_qu8_requantize_precise__neon() 62 const int64x2_t x23_product = vmull_high_s32(x, vmultiplier); in xnn_qu8_requantize_precise__neon() 63 const int64x2_t y01_product = vmull_s32(vget_low_s32(y), vget_low_s32(vmultiplier)); in xnn_qu8_requantize_precise__neon() 64 const int64x2_t y23_product = vmull_high_s32(y, vmultiplier); in xnn_qu8_requantize_precise__neon() 65 const int64x2_t z01_product = vmull_s32(vget_low_s32(z), vget_low_s32(vmultiplier)); in xnn_qu8_requantize_precise__neon() 66 const int64x2_t z23_product = vmull_high_s32(z, vmultiplier); in xnn_qu8_requantize_precise__neon() 67 const int64x2_t w01_product = vmull_s32(vget_low_s32(w), vget_low_s32(vmultiplier)); in xnn_qu8_requantize_precise__neon() 68 const int64x2_t w23_product = vmull_high_s32(w, vmultiplier); in xnn_qu8_requantize_precise__neon() 70 const int64x2_t x01_product = vmull_s32(vget_low_s32(x), vmultiplier); in xnn_qu8_requantize_precise__neon() [all …]
|
/external/XNNPACK/src/qu8-gavgpool/ |
D | 7x-minmax-neon-c8.c | 62 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() 90 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vget_low_s32(vmultiplier)); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() 91 const int64x2_t vproduct23 = vmull_high_s32(vacc_lo, vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() 92 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vget_low_s32(vmultiplier)); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() 93 const int64x2_t vproduct67 = vmull_high_s32(vacc_hi, vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() 95 const int64x2_t vadjusted_product01 = vaddw_s32(vproduct01, vget_low_s32(vneg_mask_lo)); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() 96 const int64x2_t vadjusted_product23 = vaddw_high_s32(vproduct23, vneg_mask_lo); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() 97 const int64x2_t vadjusted_product45 = vaddw_s32(vproduct45, vget_low_s32(vneg_mask_hi)); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() 98 const int64x2_t vadjusted_product67 = vaddw_high_s32(vproduct67, vneg_mask_hi); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() 100 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() [all …]
|
D | 7p7x-minmax-neon-c8.c | 108 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 166 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vget_low_s32(vmultiplier)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 167 const int64x2_t vproduct23 = vmull_high_s32(vacc_lo, vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 168 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vget_low_s32(vmultiplier)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 169 const int64x2_t vproduct67 = vmull_high_s32(vacc_hi, vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 171 const int64x2_t vadjusted_product01 = vaddw_s32(vproduct01, vget_low_s32(vneg_mask_lo)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 172 const int64x2_t vadjusted_product23 = vaddw_high_s32(vproduct23, vneg_mask_lo); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 173 const int64x2_t vadjusted_product45 = vaddw_s32(vproduct45, vget_low_s32(vneg_mask_hi)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 174 const int64x2_t vadjusted_product67 = vaddw_high_s32(vproduct67, vneg_mask_hi); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 176 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() [all …]
|
/external/XNNPACK/src/qu8-avgpool/ |
D | 9x-minmax-neon-c8.c | 40 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() 145 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vget_low_s32(vmultiplier)); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() 146 const int64x2_t vproduct23 = vmull_high_s32(vacc_lo, vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() 147 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vget_low_s32(vmultiplier)); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() 148 const int64x2_t vproduct67 = vmull_high_s32(vacc_hi, vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() 150 const int64x2_t vadjusted_product01 = vaddw_s32(vproduct01, vget_low_s32(vneg_mask_lo)); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() 151 const int64x2_t vadjusted_product23 = vaddw_high_s32(vproduct23, vneg_mask_lo); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() 152 const int64x2_t vadjusted_product45 = vaddw_s32(vproduct45, vget_low_s32(vneg_mask_hi)); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() 153 const int64x2_t vadjusted_product67 = vaddw_high_s32(vproduct67, vneg_mask_hi); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() 155 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() [all …]
|
D | 9p8x-minmax-neon-c8.c | 40 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 289 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vget_low_s32(vmultiplier)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 290 const int64x2_t vproduct23 = vmull_high_s32(vacc_lo, vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 291 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vget_low_s32(vmultiplier)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 292 const int64x2_t vproduct67 = vmull_high_s32(vacc_hi, vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 294 const int64x2_t vadjusted_product01 = vaddw_s32(vproduct01, vget_low_s32(vneg_mask_lo)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 295 const int64x2_t vadjusted_product23 = vaddw_high_s32(vproduct23, vneg_mask_lo); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 296 const int64x2_t vadjusted_product45 = vaddw_s32(vproduct45, vget_low_s32(vneg_mask_hi)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 297 const int64x2_t vadjusted_product67 = vaddw_high_s32(vproduct67, vneg_mask_hi); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 299 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() [all …]
|
/external/clang/test/CodeGen/ |
D | aarch64-neon-2velem.c | 518 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { in test_vmlal_lane_s32() 544 int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { in test_vmlal_laneq_s32() 572 int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { in test_vmlal_high_lane_s32() 600 int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { in test_vmlal_high_laneq_s32() 626 int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { in test_vmlsl_lane_s32() 652 int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { in test_vmlsl_laneq_s32() 680 int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { in test_vmlsl_high_lane_s32() 708 int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { in test_vmlsl_high_laneq_s32() 734 int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { in test_vmlal_lane_u32() 760 int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { in test_vmlal_laneq_u32() [all …]
|
D | arm64-vrnd.c | 9 int64x2_t rnd5(float64x2_t a) { return vrndq_f64(a); } in rnd5() 17 int64x2_t rnd9(float64x2_t a) { return vrndnq_f64(a); } in rnd9() 19 int64x2_t rnd10(float64x2_t a) { return vrndnq_f64(a); } in rnd10() 26 int64x2_t rnd13(float64x2_t a) { return vrndmq_f64(a); } in rnd13() 28 int64x2_t rnd14(float64x2_t a) { return vrndmq_f64(a); } in rnd14() 35 int64x2_t rnd18(float64x2_t a) { return vrndpq_f64(a); } in rnd18() 42 int64x2_t rnd22(float64x2_t a) { return vrndaq_f64(a); } in rnd22() 49 int64x2_t rnd25(float64x2_t a) { return vrndxq_f64(a); } in rnd25()
|
D | aarch64-neon-3v.c | 59 int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) { in test_vandq_s64() 171 int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) { in test_vorrq_s64() 283 int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) { in test_veorq_s64() 403 int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { in test_vbicq_s64() 531 int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { in test_vornq_s64()
|
/external/llvm-project/clang/test/CodeGen/ |
D | aarch64-neon-2velem.c | 675 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { in test_vmlal_lane_s32() 705 int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { in test_vmlal_laneq_s32() 737 int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { in test_vmlal_high_lane_s32() 769 int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { in test_vmlal_high_laneq_s32() 799 int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { in test_vmlsl_lane_s32() 829 int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { in test_vmlsl_laneq_s32() 861 int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { in test_vmlsl_high_lane_s32() 893 int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { in test_vmlsl_high_laneq_s32() 923 int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { in test_vmlal_lane_u32() 953 int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { in test_vmlal_laneq_u32() [all …]
|
D | aarch64-neon-3v.c | 59 int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) { in test_vandq_s64() 171 int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) { in test_vorrq_s64() 283 int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) { in test_veorq_s64() 403 int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { in test_vbicq_s64() 531 int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { in test_vornq_s64()
|
D | aarch64-neon-misc.c | 93 uint64x2_t test_vceqzq_s64(int64x2_t a) { in test_vceqzq_s64() 303 uint64x2_t test_vcgezq_s64(int64x2_t a) { in test_vcgezq_s64() 409 uint64x2_t test_vclezq_s64(int64x2_t a) { in test_vclezq_s64() 515 uint64x2_t test_vcgtzq_s64(int64x2_t a) { in test_vcgtzq_s64() 621 uint64x2_t test_vcltzq_s64(int64x2_t a) { in test_vcltzq_s64() 978 int64x2_t test_vpaddlq_s32(int32x4_t a) { in test_vpaddlq_s32() 1088 int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) { in test_vpadalq_s32() 1176 int64x2_t test_vqabsq_s64(int64x2_t a) { in test_vqabsq_s64() 1235 int64x2_t test_vqnegq_s64(int64x2_t a) { in test_vqnegq_s64() 1284 int64x2_t test_vnegq_s64(int64x2_t a) { in test_vnegq_s64() [all …]
|
/external/libvpx/libvpx/vp9/common/arm/neon/ |
D | vp9_highbd_iht8x8_add_neon.c | 25 const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(sum), c, 0); in highbd_iadst_half_butterfly_neon() 26 const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(sub), c, 0); in highbd_iadst_half_butterfly_neon() 27 const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(sum), c, 0); in highbd_iadst_half_butterfly_neon() 28 const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(sub), c, 0); in highbd_iadst_half_butterfly_neon() 41 int64x2_t *const s0, in highbd_iadst_butterfly_lane_0_1_neon() 42 int64x2_t *const s1) { in highbd_iadst_butterfly_lane_0_1_neon() 43 const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(in0), c, 0); in highbd_iadst_butterfly_lane_0_1_neon() 44 const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(in0), c, 1); in highbd_iadst_butterfly_lane_0_1_neon() 45 const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(in0), c, 0); in highbd_iadst_butterfly_lane_0_1_neon() 46 const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(in0), c, 1); in highbd_iadst_butterfly_lane_0_1_neon() [all …]
|
/external/XNNPACK/src/qs8-gavgpool/ |
D | unipass-neon.c.in | 50 const int64x2_t vleft_shift = vld1q_dup_s64(¶ms->neon.left_shift); 90 …const int64x2_t vprod${ABC[C:C+2]} = vmull_s32(vget_low_s32(vacc${ABC[C:C+4]}), vget_low_s32(vmult… 91 const int64x2_t vprod${ABC[C+2:C+4]} = vmull_high_s32(vacc${ABC[C:C+4]}, vmultiplier); 94 …const int64x2_t vadjprod${ABC[C:C+2]} = vaddw_s32(vprod${ABC[C:C+2]}, vget_low_s32(vsgnacc${ABC[C:… 95 …const int64x2_t vadjprod${ABC[C+2:C+4]} = vaddw_high_s32(vprod${ABC[C+2:C+4]}, vsgnacc${ABC[C:C+4]… 98 const int64x2_t vprod${ABC[C:C+2]} = vmull_s32(vget_low_s32(vacc${ABC[C:C+4]}), vmultiplier); 99 … const int64x2_t vprod${ABC[C+2:C+4]} = vmull_s32(vget_high_s32(vacc${ABC[C:C+4]}), vmultiplier); 102 …const int64x2_t vadjprod${ABC[C:C+2]} = vaddw_s32(vprod${ABC[C:C+2]}, vget_low_s32(vsgnacc${ABC[C:… 103 …const int64x2_t vadjprod${ABC[C+2:C+4]} = vaddw_s32(vprod${ABC[C+2:C+4]}, vget_high_s32(vsgnacc${A… 107 const int64x2_t vacc${ABC[C:C+2]} = vrshlq_s64(vadjprod${ABC[C:C+2]}, vleft_shift); [all …]
|
/external/llvm-project/clang/test/CodeGen/arm-mve-intrinsics/ |
D | reinterpret.c | 71 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) in test_vreinterpretq_f16_s64() 232 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) in test_vreinterpretq_f32_s64() 393 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) in test_vreinterpretq_s16_s64() 553 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) in test_vreinterpretq_s32_s64() 661 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) in test_vreinterpretq_s64_f16() 680 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) in test_vreinterpretq_s64_f32() 699 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) in test_vreinterpretq_s64_s16() 718 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) in test_vreinterpretq_s64_s32() 737 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) in test_vreinterpretq_s64_s8() 756 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) in test_vreinterpretq_s64_u16() [all …]
|
D | vqdmulltq.c | 25 int64x2_t test_vqdmulltq_s32(int32x4_t a, int32x4_t b) { in test_vqdmulltq_s32() 55 int64x2_t test_vqdmulltq_m_s32(int64x2_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { in test_vqdmulltq_m_s32() 85 int64x2_t test_vqdmulltq_n_s32(int32x4_t a, int32_t b) { in test_vqdmulltq_n_s32() 119 int64x2_t test_vqdmulltq_m_n_s32(int64x2_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { in test_vqdmulltq_m_n_s32()
|
D | vqdmullbq.c | 25 int64x2_t test_vqdmullbq_s32(int32x4_t a, int32x4_t b) { in test_vqdmullbq_s32() 55 int64x2_t test_vqdmullbq_m_s32(int64x2_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { in test_vqdmullbq_m_s32() 85 int64x2_t test_vqdmullbq_n_s32(int32x4_t a, int32_t b) { in test_vqdmullbq_n_s32() 119 int64x2_t test_vqdmullbq_m_n_s32(int64x2_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { in test_vqdmullbq_m_n_s32()
|