/external/XNNPACK/src/f32-ppmm/gen/ |
D | 8x8-minmax-neonfma.c | 86 const float32x4_t va4567 = vld1q_f32(a); a += 4; in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() local 96 vacc4x0123 = vfmaq_laneq_f32(vacc4x0123, vb0123, va4567, 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() 97 vacc5x0123 = vfmaq_laneq_f32(vacc5x0123, vb0123, va4567, 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() 98 vacc6x0123 = vfmaq_laneq_f32(vacc6x0123, vb0123, va4567, 2); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() 99 vacc7x0123 = vfmaq_laneq_f32(vacc7x0123, vb0123, va4567, 3); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() 104 vacc4x4567 = vfmaq_laneq_f32(vacc4x4567, vb4567, va4567, 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() 105 vacc5x4567 = vfmaq_laneq_f32(vacc5x4567, vb4567, va4567, 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() 106 vacc6x4567 = vfmaq_laneq_f32(vacc6x4567, vb4567, va4567, 2); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() 107 vacc7x4567 = vfmaq_laneq_f32(vacc7x4567, vb4567, va4567, 3); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() 113 const float32x4_t va4444 = vdupq_lane_f32(vget_low_f32(va4567), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() [all …]
|
D | 8x8-minmax-neon.c | 86 const float32x4_t va4567 = vld1q_f32(a); a += 4; in xnn_f32_ppmm_minmax_ukernel_8x8__neon() local 95 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123, vget_low_f32(va4567), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon() 96 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123, vget_low_f32(va4567), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon() 97 vacc6x0123 = vmlaq_lane_f32(vacc6x0123, vb0123, vget_high_f32(va4567), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon() 98 vacc7x0123 = vmlaq_lane_f32(vacc7x0123, vb0123, vget_high_f32(va4567), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon() 103 vacc4x4567 = vmlaq_lane_f32(vacc4x4567, vb4567, vget_low_f32(va4567), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon() 104 vacc5x4567 = vmlaq_lane_f32(vacc5x4567, vb4567, vget_low_f32(va4567), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon() 105 vacc6x4567 = vmlaq_lane_f32(vacc6x4567, vb4567, vget_high_f32(va4567), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon() 106 vacc7x4567 = vmlaq_lane_f32(vacc7x4567, vb4567, vget_high_f32(va4567), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
|
/external/XNNPACK/src/f32-vbinary/gen/ |
D | vmaxc-wasmsimd-x86-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vmaxc_ukernel__wasmsimd_x86_x8() local 38 const v128_t vm4567 = wasm_f32x4_le(va4567, vb); in xnn_f32_vmaxc_ukernel__wasmsimd_x86_x8() 41 v128_t vy4567 = wasm_v128_bitselect(vb, va4567, vm4567); in xnn_f32_vmaxc_ukernel__wasmsimd_x86_x8()
|
D | vminc-wasmsimd-x86-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vminc_ukernel__wasmsimd_x86_x8() local 38 const v128_t vm4567 = wasm_f32x4_lt(va4567, vb); in xnn_f32_vminc_ukernel__wasmsimd_x86_x8() 41 v128_t vy4567 = wasm_v128_bitselect(va4567, vb, vm4567); in xnn_f32_vminc_ukernel__wasmsimd_x86_x8()
|
D | vmin-wasmsimd-x86-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vmin_ukernel__wasmsimd_x86_x8() local 42 const v128_t vm4567 = wasm_f32x4_lt(va4567, vb4567); in xnn_f32_vmin_ukernel__wasmsimd_x86_x8() 45 v128_t vy4567 = wasm_v128_bitselect(va4567, vb4567, vm4567); in xnn_f32_vmin_ukernel__wasmsimd_x86_x8()
|
D | vmax-wasmsimd-x86-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vmax_ukernel__wasmsimd_x86_x8() local 42 const v128_t vm4567 = wasm_f32x4_le(va4567, vb4567); in xnn_f32_vmax_ukernel__wasmsimd_x86_x8() 45 v128_t vy4567 = wasm_v128_bitselect(vb4567, va4567, vm4567); in xnn_f32_vmax_ukernel__wasmsimd_x86_x8()
|
D | vminc-wasmsimd-x86-x16.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vminc_ukernel__wasmsimd_x86_x16() local 40 const v128_t vm4567 = wasm_f32x4_lt(va4567, vb); in xnn_f32_vminc_ukernel__wasmsimd_x86_x16() 45 v128_t vy4567 = wasm_v128_bitselect(va4567, vb, vm4567); in xnn_f32_vminc_ukernel__wasmsimd_x86_x16()
|
D | vmaxc-wasmsimd-x86-x16.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vmaxc_ukernel__wasmsimd_x86_x16() local 40 const v128_t vm4567 = wasm_f32x4_le(va4567, vb); in xnn_f32_vmaxc_ukernel__wasmsimd_x86_x16() 45 v128_t vy4567 = wasm_v128_bitselect(vb, va4567, vm4567); in xnn_f32_vmaxc_ukernel__wasmsimd_x86_x16()
|
D | vmax-wasmsimd-x86-x16.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vmax_ukernel__wasmsimd_x86_x16() local 46 const v128_t vm4567 = wasm_f32x4_le(va4567, vb4567); in xnn_f32_vmax_ukernel__wasmsimd_x86_x16() 51 v128_t vy4567 = wasm_v128_bitselect(vb4567, va4567, vm4567); in xnn_f32_vmax_ukernel__wasmsimd_x86_x16()
|
D | vmin-wasmsimd-x86-x16.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vmin_ukernel__wasmsimd_x86_x16() local 46 const v128_t vm4567 = wasm_f32x4_lt(va4567, vb4567); in xnn_f32_vmin_ukernel__wasmsimd_x86_x16() 51 v128_t vy4567 = wasm_v128_bitselect(va4567, vb4567, vm4567); in xnn_f32_vmin_ukernel__wasmsimd_x86_x16()
|
D | vsubc-wasmsimd-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vsubc_ukernel__wasmsimd_x8() local 38 v128_t vy4567 = wasm_f32x4_sub(va4567, vb); in xnn_f32_vsubc_ukernel__wasmsimd_x8()
|
D | vmulc-wasmsimd-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vmulc_ukernel__wasmsimd_x8() local 38 v128_t vy4567 = wasm_f32x4_mul(va4567, vb); in xnn_f32_vmulc_ukernel__wasmsimd_x8()
|
D | vrsubc-wasmsimd-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vrsubc_ukernel__wasmsimd_x8() local 38 v128_t vy4567 = wasm_f32x4_sub(vb, va4567); in xnn_f32_vrsubc_ukernel__wasmsimd_x8()
|
D | vminc-wasmsimd-arm-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vminc_ukernel__wasmsimd_arm_x8() local 38 v128_t vy4567 = wasm_f32x4_min(va4567, vb); in xnn_f32_vminc_ukernel__wasmsimd_arm_x8()
|
D | vdivc-wasmsimd-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vdivc_ukernel__wasmsimd_x8() local 38 v128_t vy4567 = wasm_f32x4_div(va4567, vb); in xnn_f32_vdivc_ukernel__wasmsimd_x8()
|
D | vrdivc-wasmsimd-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vrdivc_ukernel__wasmsimd_x8() local 38 v128_t vy4567 = wasm_f32x4_div(vb, va4567); in xnn_f32_vrdivc_ukernel__wasmsimd_x8()
|
D | vmaxc-wasmsimd-arm-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vmaxc_ukernel__wasmsimd_arm_x8() local 38 v128_t vy4567 = wasm_f32x4_max(va4567, vb); in xnn_f32_vmaxc_ukernel__wasmsimd_arm_x8()
|
D | vaddc-wasmsimd-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vaddc_ukernel__wasmsimd_x8() local 38 v128_t vy4567 = wasm_f32x4_add(va4567, vb); in xnn_f32_vaddc_ukernel__wasmsimd_x8()
|
D | vmaxc-sse-x8.c | 36 const __m128 va4567 = _mm_loadu_ps(a + 4); in xnn_f32_vmaxc_ukernel__sse_x8() local 40 __m128 vy4567 = _mm_max_ps(va4567, vb); in xnn_f32_vmaxc_ukernel__sse_x8()
|
D | vminc-sse-x8.c | 36 const __m128 va4567 = _mm_loadu_ps(a + 4); in xnn_f32_vminc_ukernel__sse_x8() local 40 __m128 vy4567 = _mm_min_ps(va4567, vb); in xnn_f32_vminc_ukernel__sse_x8()
|
D | vminc-neon-x8.c | 35 const float32x4_t va4567 = vld1q_f32(a); a += 4; in xnn_f32_vminc_ukernel__neon_x8() local 38 float32x4_t vy4567 = vminq_f32(va4567, vb); in xnn_f32_vminc_ukernel__neon_x8()
|
D | vmaxc-neon-x8.c | 35 const float32x4_t va4567 = vld1q_f32(a); a += 4; in xnn_f32_vmaxc_ukernel__neon_x8() local 38 float32x4_t vy4567 = vmaxq_f32(va4567, vb); in xnn_f32_vmaxc_ukernel__neon_x8()
|
D | vsqrdiffc-wasmsimd-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vsqrdiffc_ukernel__wasmsimd_x8() local 38 v128_t vy4567 = wasm_f32x4_sub(va4567, vb); in xnn_f32_vsqrdiffc_ukernel__wasmsimd_x8()
|
D | vmul-wasmsimd-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vmul_ukernel__wasmsimd_x8() local 42 v128_t vy4567 = wasm_f32x4_mul(va4567, vb4567); in xnn_f32_vmul_ukernel__wasmsimd_x8()
|
D | vsub-wasmsimd-x8.c | 34 const v128_t va4567 = wasm_v128_load(a + 4); in xnn_f32_vsub_ukernel__wasmsimd_x8() local 42 v128_t vy4567 = wasm_f32x4_sub(va4567, vb4567); in xnn_f32_vsub_ukernel__wasmsimd_x8()
|