Lines Matching refs:int16x8_t
393 static WEBP_INLINE int16x8_t ConvertU8ToS16(uint8x8_t v) { in ConvertU8ToS16()
400 const int16x8_t dst01, in SaturateAndStore4x4()
401 const int16x8_t dst23) { in SaturateAndStore4x4()
413 static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23, in Add4x4()
426 const int16x8_t dst01_s16 = ConvertU8ToS16(vreinterpret_u8_u32(dst01)); in Add4x4()
427 const int16x8_t dst23_s16 = ConvertU8ToS16(vreinterpret_u8_u32(dst23)); in Add4x4()
430 const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3); in Add4x4()
431 const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3); in Add4x4()
759 const int16x8_t kCst63 = vdupq_n_s16(63); in ApplyFilter6()
765 const int16x8_t s1_lo = vmlal_s8(kCst63, kCst27, delta_lo); // 63 + 27 * a in ApplyFilter6()
766 const int16x8_t s1_hi = vmlal_s8(kCst63, kCst27, delta_hi); // 63 + 27 * a in ApplyFilter6()
767 const int16x8_t s2_lo = vmlal_s8(kCst63, kCst18, delta_lo); // 63 + 18 * a in ApplyFilter6()
768 const int16x8_t s2_hi = vmlal_s8(kCst63, kCst18, delta_hi); // 63 + 18 * a in ApplyFilter6()
769 const int16x8_t s3_lo = vmlal_s8(kCst63, kCst9, delta_lo); // 63 + 9 * a in ApplyFilter6()
770 const int16x8_t s3_hi = vmlal_s8(kCst63, kCst9, delta_hi); // 63 + 9 * a in ApplyFilter6()
995 static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1, in Transpose8x2()
1008 const int16x8_t B1 = in TransformPass()
1012 const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1); in TransformPass()
1013 const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2); in TransformPass()
1022 const int16x8_t D0 = vcombine_s16(a, b); // D0 = a | b in TransformPass()
1023 const int16x8_t D1 = vcombine_s16(d, c); // D1 = d | c in TransformPass()
1024 const int16x8_t E0 = vqaddq_s16(D0, D1); // a+d | b+c in TransformPass()
1025 const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c in TransformPass()
1026 const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp)); in TransformPass()
1181 const int16x8_t DC = vdupq_n_s16(in[0]); in TransformDC()
1260 const int16x8_t m0_m1 = vcombine_s16(vqadd_s16(B, d4), vqadd_s16(B, c4)); in TransformAC3()
1261 const int16x8_t m2_m3 = vcombine_s16(vqsub_s16(B, c4), vqsub_s16(B, d4)); in TransformAC3()
1293 const int16x8_t d = vreinterpretq_s16_u16(vsubl_u8(T, TL)); // A[c] - A[-1] in TrueMotion()
1297 const int16x8_t L0 = ConvertU8ToS16(vld1_dup_u8(dst + 0 * BPS - 1)); in TrueMotion()
1298 const int16x8_t L1 = ConvertU8ToS16(vld1_dup_u8(dst + 1 * BPS - 1)); in TrueMotion()
1299 const int16x8_t L2 = ConvertU8ToS16(vld1_dup_u8(dst + 2 * BPS - 1)); in TrueMotion()
1300 const int16x8_t L3 = ConvertU8ToS16(vld1_dup_u8(dst + 3 * BPS - 1)); in TrueMotion()
1301 const int16x8_t r0 = vaddq_s16(L0, d); // L[r] + A[c] - A[-1] in TrueMotion()
1302 const int16x8_t r1 = vaddq_s16(L1, d); in TrueMotion()
1303 const int16x8_t r2 = vaddq_s16(L2, d); in TrueMotion()
1304 const int16x8_t r3 = vaddq_s16(L3, d); in TrueMotion()
1554 const int16x8_t d_lo = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(T), TL)); in TM16()
1555 const int16x8_t d_hi = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(T), TL)); in TM16()
1559 const int16x8_t L0 = ConvertU8ToS16(vld1_dup_u8(dst + 0 * BPS - 1)); in TM16()
1560 const int16x8_t L1 = ConvertU8ToS16(vld1_dup_u8(dst + 1 * BPS - 1)); in TM16()
1561 const int16x8_t L2 = ConvertU8ToS16(vld1_dup_u8(dst + 2 * BPS - 1)); in TM16()
1562 const int16x8_t L3 = ConvertU8ToS16(vld1_dup_u8(dst + 3 * BPS - 1)); in TM16()
1563 const int16x8_t r0_lo = vaddq_s16(L0, d_lo); // L[r] + A[c] - A[-1] in TM16()
1564 const int16x8_t r1_lo = vaddq_s16(L1, d_lo); in TM16()
1565 const int16x8_t r2_lo = vaddq_s16(L2, d_lo); in TM16()
1566 const int16x8_t r3_lo = vaddq_s16(L3, d_lo); in TM16()
1567 const int16x8_t r0_hi = vaddq_s16(L0, d_hi); in TM16()
1568 const int16x8_t r1_hi = vaddq_s16(L1, d_hi); in TM16()
1569 const int16x8_t r2_hi = vaddq_s16(L2, d_hi); in TM16()
1570 const int16x8_t r3_hi = vaddq_s16(L3, d_hi); in TM16()