/external/XNNPACK/src/f32-spmm/gen/ |
D | 4x4-minmax-neonfma.c | 141 vout01n0 = vmax_f32(vout01n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 142 vout01n1 = vmax_f32(vout01n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 143 vout01n2 = vmax_f32(vout01n2, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 144 vout01n3 = vmax_f32(vout01n3, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 172 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 212 vout0n0 = vmax_f32(vout0n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 213 vout0n1 = vmax_f32(vout0n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 214 vout0n2 = vmax_f32(vout0n2, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 215 vout0n3 = vmax_f32(vout0n3, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma() 243 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x4__neonfma()
|
D | 8x4-minmax-neonfma.c | 240 vout01n0 = vmax_f32(vout01n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 241 vout01n1 = vmax_f32(vout01n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 242 vout01n2 = vmax_f32(vout01n2, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 243 vout01n3 = vmax_f32(vout01n3, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 271 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 311 vout0n0 = vmax_f32(vout0n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 312 vout0n1 = vmax_f32(vout0n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 313 vout0n2 = vmax_f32(vout0n2, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 314 vout0n3 = vmax_f32(vout0n3, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma() 342 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x4__neonfma()
|
D | 4x2-minmax-neonfma.c | 123 vout01n0 = vmax_f32(vout01n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 124 vout01n1 = vmax_f32(vout01n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 148 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 182 vout0n0 = vmax_f32(vout0n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 183 vout0n1 = vmax_f32(vout0n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma() 207 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x2__neonfma()
|
D | 8x2-minmax-neonfma.c | 200 vout01n0 = vmax_f32(vout01n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 201 vout01n1 = vmax_f32(vout01n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 225 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 259 vout0n0 = vmax_f32(vout0n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 260 vout0n1 = vmax_f32(vout0n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma() 284 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x2__neonfma()
|
D | 12x4-minmax-neonfma.c | 366 vout01n0 = vmax_f32(vout01n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 367 vout01n1 = vmax_f32(vout01n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 368 vout01n2 = vmax_f32(vout01n2, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 369 vout01n3 = vmax_f32(vout01n3, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 397 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 437 vout0n0 = vmax_f32(vout0n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 438 vout0n1 = vmax_f32(vout0n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 439 vout0n2 = vmax_f32(vout0n2, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 440 vout0n3 = vmax_f32(vout0n3, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() 468 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma()
|
D | 16x4-minmax-neonfma.c | 393 vout01n0 = vmax_f32(vout01n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 394 vout01n1 = vmax_f32(vout01n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 395 vout01n2 = vmax_f32(vout01n2, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 396 vout01n3 = vmax_f32(vout01n3, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 424 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 464 vout0n0 = vmax_f32(vout0n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 465 vout0n1 = vmax_f32(vout0n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 466 vout0n2 = vmax_f32(vout0n2, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 467 vout0n3 = vmax_f32(vout0n3, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() 495 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma()
|
D | 12x2-minmax-neonfma.c | 294 vout01n0 = vmax_f32(vout01n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 295 vout01n1 = vmax_f32(vout01n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 319 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 353 vout0n0 = vmax_f32(vout0n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 354 vout0n1 = vmax_f32(vout0n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() 378 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma()
|
D | 16x2-minmax-neonfma.c | 311 vout01n0 = vmax_f32(vout01n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 312 vout01n1 = vmax_f32(vout01n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 336 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 370 vout0n0 = vmax_f32(vout0n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 371 vout0n1 = vmax_f32(vout0n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() 395 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma()
|
D | 4x1-minmax-neonfma.c | 83 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neonfma() 109 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neonfma()
|
D | 4x1-minmax-neon.c | 83 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neon() 109 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neon()
|
D | 32x4-minmax-neonfma.c | 656 vout01n0 = vmax_f32(vout01n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 657 vout01n1 = vmax_f32(vout01n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 658 vout01n2 = vmax_f32(vout01n2, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 659 vout01n3 = vmax_f32(vout01n3, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 687 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 727 vout0n0 = vmax_f32(vout0n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 728 vout0n1 = vmax_f32(vout0n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 729 vout0n2 = vmax_f32(vout0n2, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 730 vout0n3 = vmax_f32(vout0n3, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 758 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
|
D | 4x1-minmax-neon-pipelined.c | 89 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined() 117 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neon_pipelined()
|
D | 4x1-minmax-neonfma-pipelined.c | 89 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined() 117 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neonfma_pipelined()
|
D | 4x1-minmax-neonfma-x2.c | 103 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2() 129 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neonfma_x2()
|
D | 4x1-minmax-neon-x2.c | 103 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neon_x2() 129 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_4x1__neon_x2()
|
D | 8x1-minmax-neonfma.c | 115 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma() 141 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma()
|
D | 8x1-minmax-neon.c | 115 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x1__neon() 141 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x1__neon()
|
D | 8x1-minmax-neon-pipelined.c | 124 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined() 152 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined()
|
D | 8x1-minmax-neonfma-pipelined.c | 124 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined() 152 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined()
|
D | 32x2-minmax-neonfma.c | 492 vout01n0 = vmax_f32(vout01n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 493 vout01n1 = vmax_f32(vout01n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 517 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 551 vout0n0 = vmax_f32(vout0n0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 552 vout0n1 = vmax_f32(vout0n1, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 576 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x2-minmax-neon-lane-ld64.c | 107 vacc0x01 = vmax_f32(vacc0x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 108 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 109 vacc2x01 = vmax_f32(vacc2x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 110 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64()
|
D | 4x2-minmax-neonfma-lane-ld64.c | 129 vacc0x01 = vmax_f32(vacc0x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 130 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 131 vacc2x01 = vmax_f32(vacc2x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 132 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x2-minmax-neon-lane-ld64.c | 131 vacc0x01 = vmax_f32(vacc0x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 132 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 133 vacc2x01 = vmax_f32(vacc2x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 134 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64()
|
D | 4x2-minmax-neonfma-lane-ld64.c | 153 vacc0x01 = vmax_f32(vacc0x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 154 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 155 vacc2x01 = vmax_f32(vacc2x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 156 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64()
|
/external/XNNPACK/src/f32-rmax/ |
D | neon.c | 45 float32x2_t vmax_lo = vmax_f32(vget_low_f32(vmax), vget_high_f32(vmax)); in xnn_f32_rmax_ukernel__neon() 50 vmax_lo = vmax_f32(vmax_lo, vx); in xnn_f32_rmax_ukernel__neon()
|