Lines Matching refs:int16x8_t
40 static WEBP_INLINE int16x8_t ConvertU8ToS16(uint32x2_t v) { in ConvertU8ToS16()
47 const int16x8_t dst01, in SaturateAndStore4x4()
48 const int16x8_t dst23) { in SaturateAndStore4x4()
60 static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23, in Add4x4()
73 const int16x8_t dst01_s16 = ConvertU8ToS16(dst01); in Add4x4()
74 const int16x8_t dst23_s16 = ConvertU8ToS16(dst23); in Add4x4()
77 const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3); in Add4x4()
78 const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3); in Add4x4()
84 static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1, in Transpose8x2()
97 const int16x8_t B1 = in TransformPass()
101 const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1); in TransformPass()
102 const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2); in TransformPass()
111 const int16x8_t D0 = vcombine_s16(a, b); // D0 = a | b in TransformPass()
112 const int16x8_t D1 = vcombine_s16(d, c); // D1 = d | c in TransformPass()
113 const int16x8_t E0 = vqaddq_s16(D0, D1); // a+d | b+c in TransformPass()
114 const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c in TransformPass()
115 const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp)); in TransformPass()
270 int16x8_t* const out01, in Transpose4x4_S16()
271 int16x8_t* const out32) { in Transpose4x4_S16()
286 static WEBP_INLINE int16x8_t DiffU8ToS16(const uint8x8_t a, in DiffU8ToS16()
293 int16x8_t d0d1, d3d2; // working 4x4 int16 variables in FTransform()
297 const int16x8_t D0D1 = DiffU8ToS16(vget_low_u8(S0), vget_low_u8(R0)); in FTransform()
298 const int16x8_t D2D3 = DiffU8ToS16(vget_high_u8(S0), vget_high_u8(R0)); in FTransform()
308 const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2); // d0+d3 | d1+d2 (=a0|a1) in FTransform()
309 const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2); // d0-d3 | d1-d2 (=a3|a2) in FTransform()
310 const int16x8_t a0a1_2 = vshlq_n_s16(a0a1, 3); in FTransform()
327 const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2); // d0+d3 | d1+d2 (=a0|a1) in FTransform()
328 const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2); // d0-d3 | d1-d2 (=a3|a2) in FTransform()
595 const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(d4_in.val[0], in DistoHorizontalPass()
597 const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(d4_in.val[1], in DistoHorizontalPass()
599 const int16x8_t q_a3 = vreinterpretq_s16_u16(vsubl_u8(d4_in.val[0], in DistoHorizontalPass()
601 const int16x8_t q_a2 = vreinterpretq_s16_u16(vsubl_u8(d4_in.val[1], in DistoHorizontalPass()
615 const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]); in DistoVerticalPass()
616 const int16x8_t q_a1 = vaddq_s16(q4_in.val[1], q4_in.val[3]); in DistoVerticalPass()
617 const int16x8_t q_a2 = vsubq_s16(q4_in.val[1], q4_in.val[3]); in DistoVerticalPass()
618 const int16x8_t q_a3 = vsubq_s16(q4_in.val[0], q4_in.val[2]); in DistoVerticalPass()
736 const int16x8_t a0 = vld1q_s16(out + 0); in CollectHistogram()
737 const int16x8_t b0 = vld1q_s16(out + 8); in CollectHistogram()
820 static int16x8_t Quantize(int16_t* const in, in Quantize()
828 const int16x8_t a = vld1q_s16(in + offset); // in in Quantize()
830 const int16x8_t sign = vshrq_n_s16(a, 15); // sign in Quantize()
839 const int16x8_t c2 = veorq_s16(vreinterpretq_s16_u16(c1), sign); in Quantize()
840 const int16x8_t c3 = vsubq_s16(c2, sign); // restore sign in Quantize()
841 const int16x8_t c4 = vmulq_s16(c3, vreinterpretq_s16_u16(q)); in Quantize()
856 const int16x8_t out0 = Quantize(in, mtx, 0); in QuantizeBlock()
857 const int16x8_t out1 = Quantize(in, mtx, 8); in QuantizeBlock()