Lines Matching refs:p0
281 #define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) { \ argument
283 const __m128i t_1 = MM_ABS(p1, p0); \
294 #define GET_BASE_DELTA(p1, p0, q0, q1, o) { \ argument
295 const __m128i qp0 = _mm_subs_epi8(q0, p0); /* q0 - p0 */ \
302 #define DO_SIMPLE_FILTER(p0, q0, fl) { \ argument
314 p0 = _mm_adds_epi8(p0, v3); /* p0 += v3 */ \
328 static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0, in NeedsFilter() argument
335 *mask = MM_ABS(*p0, *q0); // abs(p0 - q0) in NeedsFilter()
348 static WEBP_INLINE void DoFilter2(const __m128i* p1, __m128i* p0, __m128i* q0, in DoFilter2() argument
355 NeedsFilter(p1, p0, q0, q1, thresh, &mask); in DoFilter2()
358 FLIP_SIGN_BIT2(*p0, *q0); in DoFilter2()
360 GET_BASE_DELTA(p1s, *p0, *q0, q1s, a); in DoFilter2()
362 DO_SIMPLE_FILTER(*p0, *q0, a); in DoFilter2()
365 FLIP_SIGN_BIT2(*p0, *q0); in DoFilter2()
369 static WEBP_INLINE void DoFilter4(__m128i* p1, __m128i *p0, in DoFilter4() argument
377 GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev); in DoFilter4()
380 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1); in DoFilter4()
384 t2 = _mm_subs_epi8(*q0, *p0); // q0 - p0 in DoFilter4()
401 *p0 = _mm_adds_epi8(*p0, t2); // p0 += t2 in DoFilter4()
412 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1); in DoFilter4()
416 static WEBP_INLINE void DoFilter6(__m128i *p2, __m128i* p1, __m128i *p0, in DoFilter6() argument
423 GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev); in DoFilter6()
426 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1); in DoFilter6()
429 GET_BASE_DELTA(*p1, *p0, *q0, *q1, a); in DoFilter6()
434 DO_SIMPLE_FILTER(*p0, *q0, f); in DoFilter6()
462 UPDATE_2PIXELS(*p0, *q0, a0_lo, a0_hi); in DoFilter6()
466 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1); in DoFilter6()
516 __m128i* p1, __m128i* p0, in Load16x4() argument
535 Load8x4(r8, stride, p0, q1); in Load16x4()
543 *p1 = _mm_unpacklo_epi64(t1, *p0); in Load16x4()
544 *p0 = _mm_unpackhi_epi64(t1, *p0); in Load16x4()
559 __m128i* p1, __m128i* p0, in Store16x4() argument
565 t1 = *p0; in Store16x4()
566 *p0 = _mm_unpacklo_epi8(*p1, t1); in Store16x4()
577 t1 = *p0; in Store16x4()
578 *p0 = _mm_unpacklo_epi16(t1, *q0); in Store16x4()
587 Store4x4(p0, r0, stride); in Store16x4()
602 __m128i p0 = _mm_loadu_si128((__m128i*)&p[-stride]); in SimpleVFilter16SSE2() local
606 DoFilter2(&p1, &p0, &q0, &q1, thresh); in SimpleVFilter16SSE2()
609 _mm_storeu_si128((__m128i*)&p[-stride], p0); in SimpleVFilter16SSE2()
614 __m128i p1, p0, q0, q1; in SimpleHFilter16SSE2() local
618 Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1); in SimpleHFilter16SSE2()
619 DoFilter2(&p1, &p0, &q0, &q1, thresh); in SimpleHFilter16SSE2()
620 Store16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1); in SimpleHFilter16SSE2()
642 #define MAX_DIFF1(p3, p2, p1, p0, m) { \ argument
645 m = _mm_max_epu8(m, MM_ABS(p1, p0)); \
648 #define MAX_DIFF2(p3, p2, p1, p0, m) { \ argument
651 m = _mm_max_epu8(m, MM_ABS(p1, p0)); \
679 #define COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask) { \ argument
684 NeedsFilter(&p1, &p0, &q0, &q1, thresh, &fl_yes); \
693 __m128i p2, p1, p0, q0, q1, q2; in VFilter16SSE2() local
696 LOAD_H_EDGES4(p - 4 * stride, stride, t1, p2, p1, p0); in VFilter16SSE2()
697 MAX_DIFF1(t1, p2, p1, p0, mask); in VFilter16SSE2()
703 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask); in VFilter16SSE2()
704 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh); in VFilter16SSE2()
709 _mm_storeu_si128((__m128i*)&p[-1 * stride], p0); in VFilter16SSE2()
718 __m128i p3, p2, p1, p0, q0, q1, q2, q3; in HFilter16SSE2() local
721 Load16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0 in HFilter16SSE2()
722 MAX_DIFF1(p3, p2, p1, p0, mask); in HFilter16SSE2()
727 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask); in HFilter16SSE2()
728 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh); in HFilter16SSE2()
730 Store16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0); in HFilter16SSE2()
739 __m128i t1, t2, p1, p0, q0, q1; in VFilter16iSSE2() local
743 LOAD_H_EDGES4(p, stride, t2, t1, p1, p0); in VFilter16iSSE2()
744 MAX_DIFF1(t2, t1, p1, p0, mask); in VFilter16iSSE2()
752 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask); in VFilter16iSSE2()
753 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh); in VFilter16iSSE2()
757 _mm_storeu_si128((__m128i*)&p[-1 * stride], p0); in VFilter16iSSE2()
768 __m128i t1, t2, p1, p0, q0, q1; in HFilter16iSSE2() local
772 Load16x4(b, b + 8 * stride, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0 in HFilter16iSSE2()
773 MAX_DIFF1(t2, t1, p1, p0, mask); in HFilter16iSSE2()
779 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask); in HFilter16iSSE2()
780 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh); in HFilter16iSSE2()
783 Store16x4(b, b + 8 * stride, stride, &p1, &p0, &q0, &q1); in HFilter16iSSE2()
793 __m128i t1, p2, p1, p0, q0, q1, q2; in VFilter8SSE2() local
796 LOADUV_H_EDGES4(u - 4 * stride, v - 4 * stride, stride, t1, p2, p1, p0); in VFilter8SSE2()
797 MAX_DIFF1(t1, p2, p1, p0, mask); in VFilter8SSE2()
803 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask); in VFilter8SSE2()
804 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh); in VFilter8SSE2()
809 STOREUV(p0, u, v, -1 * stride); in VFilter8SSE2()
818 __m128i p3, p2, p1, p0, q0, q1, q2, q3; in HFilter8SSE2() local
822 Load16x4(tu, tv, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0 in HFilter8SSE2()
823 MAX_DIFF1(p3, p2, p1, p0, mask); in HFilter8SSE2()
828 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask); in HFilter8SSE2()
829 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh); in HFilter8SSE2()
831 Store16x4(tu, tv, stride, &p3, &p2, &p1, &p0); in HFilter8SSE2()
838 __m128i t1, t2, p1, p0, q0, q1; in VFilter8iSSE2() local
841 LOADUV_H_EDGES4(u, v, stride, t2, t1, p1, p0); in VFilter8iSSE2()
842 MAX_DIFF1(t2, t1, p1, p0, mask); in VFilter8iSSE2()
851 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask); in VFilter8iSSE2()
852 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh); in VFilter8iSSE2()
856 STOREUV(p0, u, v, -1 * stride); in VFilter8iSSE2()
864 __m128i t1, t2, p1, p0, q0, q1; in HFilter8iSSE2() local
865 Load16x4(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0 in HFilter8iSSE2()
866 MAX_DIFF1(t2, t1, p1, p0, mask); in HFilter8iSSE2()
873 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask); in HFilter8iSSE2()
874 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh); in HFilter8iSSE2()
878 Store16x4(u, v, stride, &p1, &p0, &q0, &q1); in HFilter8iSSE2()