neon-ld64.c.in - OpenGrok cross reference for /external/XNNPACK/src/qs8-vadd/neon-ld64.c.in

Lines Matching refs:ABC
8 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
35       const int8x8_t vx${ABC[N:N+8]} = vld1_s8(input_x); input_x += 8;
36       const int8x8_t vy${ABC[N:N+8]} = vld1_s8(input_y); input_y += 8;
39       const int16x8_t vex${ABC[N:N+8]} = vsubl_s8(vx${ABC[N:N+8]}, vx_zero_point);
40       const int16x8_t vey${ABC[N:N+8]} = vsubl_s8(vy${ABC[N:N+8]}, vy_zero_point);
43 … int32x4_t vacc${ABC[N:N+4]} = vmulq_s32(vmovl_s16(vget_low_s16(vex${ABC[N:N+8]})), vx_multiplier);
44 …int32x4_t vacc${ABC[N+4:N+8]} = vmulq_s32(vmovl_s16(vget_high_s16(vex${ABC[N:N+8]})), vx_multiplie…
47 …vacc${ABC[N:N+4]} = vmlaq_s32(vacc${ABC[N:N+4]}, vmovl_s16(vget_low_s16(vey${ABC[N:N+8]})), vy_mul…
48 …vacc${ABC[N+4:N+8]} = vmlaq_s32(vacc${ABC[N+4:N+8]}, vmovl_s16(vget_high_s16(vey${ABC[N:N+8]})), v…
51 …vacc${ABC[N:N+4]} = vsraq_n_s32(vacc${ABC[N:N+4]}, vbicq_s32(vacc${ABC[N:N+4]}, vzero_shift_mask),…
54       vacc${ABC[N:N+4]} = vrshlq_s32(vacc${ABC[N:N+4]}, vright_shift);
57 …const int16x8_t vacc${ABC[N:N+8]} = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc${ABC[N:N+4]}), vqmovn_…
61 …int8x16_t vout${ABC[N:N+16]} = vcombine_s8(vqmovn_s16(vacc${ABC[N:N+8]}), vqmovn_s16(vacc${ABC[N+8…
63         int8x8_t vout${ABC[N:N+8]} = vqmovn_s16(vacc${ABC[N:N+8]});
67         vout${ABC[N:N+16]} = vmaxq_s8(vout${ABC[N:N+16]}, voutput_min);
69         vout${ABC[N:N+8]} = vmax_s8(vout${ABC[N:N+8]}, vget_low_s8(voutput_min));
73         vout${ABC[N:N+16]} = vminq_s8(vout${ABC[N:N+16]}, voutput_max);
75         vout${ABC[N:N+8]} = vmin_s8(vout${ABC[N:N+8]}, vget_low_s8(voutput_max));
79         vst1q_s8(output, vout${ABC[N:N+16]}); output += 16;
81         vst1_s8(output, vout${ABC[N:N+8]}); output += 8;
86         const int8x8_t vx${ABC[0:8]} = vld1_s8(input_x); input_x += 8;
87         const int8x8_t vy${ABC[0:8]} = vld1_s8(input_y); input_y += 8;
89         const int8x8_t vx${ABC[0:8]} = vld1_s8(input_x);
90         const int8x8_t vy${ABC[0:8]} = vld1_s8(input_y);
92       const int16x8_t vex${ABC[0:8]} = vsubl_s8(vx${ABC[0:8]}, vx_zero_point);
93       const int16x8_t vey${ABC[0:8]} = vsubl_s8(vy${ABC[0:8]}, vy_zero_point);
95       int32x4_t vacc${ABC[0:4]} = vmulq_s32(vmovl_s16(vget_low_s16(vex${ABC[0:8]})), vx_multiplier);
96 …    int32x4_t vacc${ABC[4:8]} = vmulq_s32(vmovl_s16(vget_high_s16(vex${ABC[0:8]})), vx_multiplier);
98 …vacc${ABC[0:4]} = vmlaq_s32(vacc${ABC[0:4]}, vmovl_s16(vget_low_s16(vey${ABC[0:8]})), vy_multiplie…
99 …vacc${ABC[4:8]} = vmlaq_s32(vacc${ABC[4:8]}, vmovl_s16(vget_high_s16(vey${ABC[0:8]})), vy_multipli…
101 …  vacc${ABC[0:4]} = vsraq_n_s32(vacc${ABC[0:4]}, vbicq_s32(vacc${ABC[0:4]}, vzero_shift_mask), 31);
102 …  vacc${ABC[4:8]} = vsraq_n_s32(vacc${ABC[4:8]}, vbicq_s32(vacc${ABC[4:8]}, vzero_shift_mask), 31);
104       vacc${ABC[0:4]} = vrshlq_s32(vacc${ABC[0:4]}, vright_shift);
105       vacc${ABC[4:8]} = vrshlq_s32(vacc${ABC[4:8]}, vright_shift);
107 …const int16x8_t vacc${ABC[0:8]} = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc${ABC[0:4]}), vqmovn_s32(…
109       int8x8_t vout${ABC[0:8]} = vqmovn_s16(vacc${ABC[0:8]});
110       vout${ABC[0:8]} = vmax_s8(vout${ABC[0:8]}, vget_low_s8(voutput_min));
111       vout${ABC[0:8]} = vmin_s8(vout${ABC[0:8]}, vget_low_s8(voutput_max));
115           vst1_s8(output, vout${ABC[0:8]}); output += 8;
119 …vst1_lane_u32(__builtin_assume_aligned(output, 1), vreinterpret_u32_s8(vout${ABC[0:8]}), 0); outpu…
120             vout${ABC[0:8]} = vext_s8(vout${ABC[0:8]}, vout${ABC[0:8]}, 4);
123 …vst1_lane_u16(__builtin_assume_aligned(output, 1), vreinterpret_u16_s8(vout${ABC[0:8]}), 0); outpu…
124             vout${ABC[0:8]} = vext_s8(vout${ABC[0:8]}, vout${ABC[0:8]}, 2);
127             vst1_lane_s8(output, vout${ABC[0:8]}, 0);
133 …vst1_lane_u32(__builtin_assume_aligned(output, 1), vreinterpret_u32_s8(vout${ABC[0:8]}), 0); outpu…
134           vout${ABC[0:8]} = vext_s8(vout${ABC[0:8]}, vout${ABC[0:8]}, 4);
137 …vst1_lane_u16(__builtin_assume_aligned(output, 1), vreinterpret_u16_s8(vout${ABC[0:8]}), 0); outpu…
138           vout${ABC[0:8]} = vext_s8(vout${ABC[0:8]}, vout${ABC[0:8]}, 2);
141           vst1_lane_s8(output, vout${ABC[0:8]}, 0);