/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x25-minmax-neonfma-acc2.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 167 float32x4_t vacc4567p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 170 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 171 const float32x4_t vi0x4567 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 172 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 173 const float32x4_t vk0x4567 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 177 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 178 const float32x4_t vi1x4567 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() [all …]
|
D | up8x25-minmax-neon-acc2.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 167 float32x4_t vacc4567p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 170 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 171 const float32x4_t vi0x4567 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 172 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 173 const float32x4_t vk0x4567 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 177 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 178 const float32x4_t vi1x4567 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() [all …]
|
D | up8x25-minmax-neonfma.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 167 float32x4_t vacc4567p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 170 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 171 const float32x4_t vi0x4567 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 172 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 173 const float32x4_t vk0x4567 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 177 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 178 const float32x4_t vi1x4567 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() [all …]
|
D | up8x25-minmax-neon.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 167 float32x4_t vacc4567p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 170 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 171 const float32x4_t vi0x4567 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 172 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 173 const float32x4_t vk0x4567 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 177 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 178 const float32x4_t vi1x4567 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() [all …]
|
D | up8x9-minmax-neonfma-acc2.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 87 float32x4_t vacc4567p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 90 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 91 const float32x4_t vi0x4567 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 92 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 93 const float32x4_t vk0x4567 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 97 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 98 const float32x4_t vi1x4567 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() [all …]
|
D | up8x9-minmax-neon-acc2.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 87 float32x4_t vacc4567p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 90 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 91 const float32x4_t vi0x4567 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 92 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 93 const float32x4_t vk0x4567 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 97 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 98 const float32x4_t vi1x4567 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() [all …]
|
D | up8x9-minmax-neon.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 87 float32x4_t vacc4567p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 90 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 91 const float32x4_t vi0x4567 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 92 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 93 const float32x4_t vk0x4567 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 97 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 98 const float32x4_t vi1x4567 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() [all …]
|
D | up8x9-minmax-neonfma.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 87 float32x4_t vacc4567p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 90 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 91 const float32x4_t vi0x4567 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 92 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 93 const float32x4_t vk0x4567 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 97 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 98 const float32x4_t vi1x4567 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() [all …]
|
D | up4x25-minmax-neonfma-acc2.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 169 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 170 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 173 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 174 const float32x4_t vk1x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 175 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 177 const float32x4_t vi2x0123 = vld1q_f32(i2); i2 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 178 const float32x4_t vk2x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() [all …]
|
D | up4x25-minmax-neon-acc2.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 169 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 170 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 173 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 174 const float32x4_t vk1x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 175 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 177 const float32x4_t vi2x0123 = vld1q_f32(i2); i2 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 178 const float32x4_t vk2x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() [all …]
|
D | up4x25-minmax-neon.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 169 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 170 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 173 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 174 const float32x4_t vk1x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 177 const float32x4_t vi2x0123 = vld1q_f32(i2); i2 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 178 const float32x4_t vk2x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 181 const float32x4_t vi3x0123 = vld1q_f32(i3); i3 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() [all …]
|
D | up4x25-minmax-neonfma.c | 32 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 33 const float32x4_t vmin = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 169 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 170 const float32x4_t vk0x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 173 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 174 const float32x4_t vk1x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 177 const float32x4_t vi2x0123 = vld1q_f32(i2); i2 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 178 const float32x4_t vk2x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 181 const float32x4_t vi3x0123 = vld1q_f32(i3); i3 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() [all …]
|
/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr1rsqrts1fma1adj-x40.c | 28 const float32x4_t vhalf = vmovq_n_f32(0.5f); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 30 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 32 const float32x4_t vx89AB = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 34 const float32x4_t vxGHIJ = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 38 const float32x4_t vxWXYZ = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() [all …]
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 28 const float32x4_t vhalf = vmovq_n_f32(0.5f); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 30 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 32 const float32x4_t vx89AB = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 34 const float32x4_t vxGHIJ = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 38 const float32x4_t vxWXYZ = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() [all …]
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 28 const float32x4_t vhalf = vmovq_n_f32(0.5f); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 30 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 32 const float32x4_t vx89AB = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 34 const float32x4_t vxGHIJ = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 39 float32x4_t vrsqrtx0123 = vrsqrteq_f32(vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() [all …]
|
D | neonfma-nr1rsqrts1fma1adj-x28.c | 28 const float32x4_t vhalf = vmovq_n_f32(0.5f); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 30 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 32 const float32x4_t vx89AB = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 34 const float32x4_t vxGHIJ = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 38 float32x4_t vrsqrtx0123 = vrsqrteq_f32(vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 39 float32x4_t vrsqrtx4567 = vrsqrteq_f32(vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() [all …]
|
D | neonfma-nr1rsqrts1fma1adj-x24.c | 28 const float32x4_t vhalf = vmovq_n_f32(0.5f); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 30 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 32 const float32x4_t vx89AB = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 34 const float32x4_t vxGHIJ = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 37 float32x4_t vrsqrtx0123 = vrsqrteq_f32(vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 38 float32x4_t vrsqrtx4567 = vrsqrteq_f32(vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 39 float32x4_t vrsqrtx89AB = vrsqrteq_f32(vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() [all …]
|
D | neonfma-nr2fma1adj-x40.c | 28 const float32x4_t vhalf = vmovq_n_f32(0.5f); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 30 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 31 const float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 32 const float32x4_t vx89AB = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 33 const float32x4_t vxCDEF = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 34 const float32x4_t vxGHIJ = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 35 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 38 const float32x4_t vxWXYZ = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() [all …]
|
/external/XNNPACK/src/f32-gavgpool/ |
D | 7p7x-minmax-neon-c4.c | 39 const float32x4_t vi0 = vld1q_f32(i0); i0 += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 40 const float32x4_t vi1 = vld1q_f32(i1); i1 += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 41 const float32x4_t vi2 = vld1q_f32(i2); i2 += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 42 const float32x4_t vi3 = vld1q_f32(i3); i3 += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 43 const float32x4_t vi4 = vld1q_f32(i4); i4 += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 44 const float32x4_t vi5 = vld1q_f32(i5); i5 += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 45 const float32x4_t vi6 = vld1q_f32(i6); i6 += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 47 const float32x4_t vsum01 = vaddq_f32(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 48 const float32x4_t vsum23 = vaddq_f32(vi2, vi3); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 49 const float32x4_t vsum45 = vaddq_f32(vi4, vi5); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() [all …]
|
/external/XNNPACK/src/f32-ibilinear/gen/ |
D | neonfma-c8.c | 40 const float32x4_t valphah = vdupq_lane_f32(valphahv, 0); in xnn_f32_ibilinear_ukernel__neonfma_c8() 41 const float32x4_t valphav = vdupq_lane_f32(valphahv, 1); in xnn_f32_ibilinear_ukernel__neonfma_c8() 46 const float32x4_t vtl0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_ibilinear_ukernel__neonfma_c8() 47 const float32x4_t vtr0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_ibilinear_ukernel__neonfma_c8() 48 const float32x4_t vbl0123 = vld1q_f32(i2); i2 += 4; in xnn_f32_ibilinear_ukernel__neonfma_c8() 49 const float32x4_t vbr0123 = vld1q_f32(i3); i3 += 4; in xnn_f32_ibilinear_ukernel__neonfma_c8() 50 const float32x4_t vtl4567 = vld1q_f32(i0); i0 += 4; in xnn_f32_ibilinear_ukernel__neonfma_c8() 51 const float32x4_t vtr4567 = vld1q_f32(i1); i1 += 4; in xnn_f32_ibilinear_ukernel__neonfma_c8() 52 const float32x4_t vbl4567 = vld1q_f32(i2); i2 += 4; in xnn_f32_ibilinear_ukernel__neonfma_c8() 53 const float32x4_t vbr4567 = vld1q_f32(i3); i3 += 4; in xnn_f32_ibilinear_ukernel__neonfma_c8() [all …]
|
/external/XNNPACK/src/f32-maxpool/ |
D | 9p8x-minmax-neon-c4.c | 28 const float32x4_t voutput_max = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 29 const float32x4_t voutput_min = vld1q_dup_f32(¶ms->scalar.min); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 78 const float32x4_t vi0 = vld1q_f32(i0); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 80 const float32x4_t vi1 = vld1q_f32(i1); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 82 const float32x4_t vi2 = vld1q_f32(i2); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 84 const float32x4_t vi3 = vld1q_f32(i3); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 86 const float32x4_t vi4 = vld1q_f32(i4); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 88 const float32x4_t vi5 = vld1q_f32(i5); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 90 const float32x4_t vi6 = vld1q_f32(i6); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 92 const float32x4_t vi7 = vld1q_f32(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() [all …]
|
/external/webrtc/common_audio/third_party/ooura/fft_size_128/ |
D | ooura_fft_neon.cc | 27 const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign); in cft1st_128_neon() 31 float32x4_t a00v = vld1q_f32(&a[j + 0]); in cft1st_128_neon() 32 float32x4_t a04v = vld1q_f32(&a[j + 4]); in cft1st_128_neon() 33 float32x4_t a08v = vld1q_f32(&a[j + 8]); in cft1st_128_neon() 34 float32x4_t a12v = vld1q_f32(&a[j + 12]); in cft1st_128_neon() 35 float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v)); in cft1st_128_neon() 36 float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v)); in cft1st_128_neon() 37 float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v)); in cft1st_128_neon() 38 float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v)); in cft1st_128_neon() 39 const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]); in cft1st_128_neon() [all …]
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-div-x24.c | 26 const float32x4_t vmagic_bias = vmovq_n_f32(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 28 const float32x4_t vln2 = vmovq_n_f32(0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 29 const float32x4_t vc5 = vmovq_n_f32(-0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 30 const float32x4_t vc4 = vmovq_n_f32(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 31 const float32x4_t vc3 = vmovq_n_f32(-0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 32 const float32x4_t vc2 = vmovq_n_f32(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 33 const float32x4_t vc1 = vmovq_n_f32(-0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 34 const float32x4_t vone = vmovq_n_f32(1.0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 35 const float32x4_t vdenorm_cutoff = vmovq_n_f32(-0x1.5D589Ep+6f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() [all …]
|
/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-neonfma-1x4-acc3.c | 34 const float32x4_t vmax = vld1q_dup_f32(¶ms->neon.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() 35 const float32x4_t vmin = vld1q_dup_f32(¶ms->neon.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() 37 const float32x4_t vw0123 = vld1q_f32(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() 38 const float32x4_t vw4567 = vld1q_f32(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() 39 const float32x4_t vw89AB = vld1q_f32(weights + 8); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() 40 const float32x4_t vwCDEF = vld1q_f32(weights + 12); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() 41 const float32x4_t vwGHIJ = vld1q_f32(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() 42 const float32x4_t vwKLMN = vld1q_f32(weights + 20); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() 64 float32x4_t vi0x0123 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() 65 float32x4_t vi1x0123 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() [all …]
|
D | 5x5p2-minmax-neon-1x4-acc3.c | 34 const float32x4_t vmax = vld1q_dup_f32(¶ms->neon.max); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() 35 const float32x4_t vmin = vld1q_dup_f32(¶ms->neon.min); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() 37 const float32x4_t vw0123 = vld1q_f32(weights); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() 38 const float32x4_t vw4567 = vld1q_f32(weights + 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() 39 const float32x4_t vw89AB = vld1q_f32(weights + 8); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() 40 const float32x4_t vwCDEF = vld1q_f32(weights + 12); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() 41 const float32x4_t vwGHIJ = vld1q_f32(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() 42 const float32x4_t vwKLMN = vld1q_f32(weights + 20); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() 64 float32x4_t vi0x0123 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() 65 float32x4_t vi1x0123 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() [all …]
|