Lines Matching refs:LOADU
607 #define LOADU(x) _mm256_loadu_si256((const VREG*)(x)) macro
617 #define LOADU(x) _mm_loadu_si128((const VREG*)(x)) macro
678 s1n_0 = LOADU(in_even + 0); in opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2()
679 s1n_1 = LOADU(in_even + VREG_INT_COUNT); in opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2()
680 d1n_0 = LOADU(in_odd); in opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2()
681 d1n_1 = LOADU(in_odd + VREG_INT_COUNT); in opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2()
694 s1n_0 = LOADU(in_even + j * stride); in opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2()
695 s1n_1 = LOADU(in_even + j * stride + VREG_INT_COUNT); in opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2()
696 d1n_0 = LOADU(in_odd + j * stride); in opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2()
697 d1n_1 = LOADU(in_odd + j * stride + VREG_INT_COUNT); in opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2()
718 s1n_0 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride); in opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2()
726 s1n_1 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride + VREG_INT_COUNT); in opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2()
780 s1_0 = LOADU(in_even + stride); in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
782 dc_0 = SUB(LOADU(in_odd + 0), in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
783 SAR(ADD3(LOADU(in_even + 0), s1_0, two), 2)); in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
784 STORE(tmp + PARALLEL_COLS_53 * 0, ADD(LOADU(in_even + 0), dc_0)); in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
786 s1_1 = LOADU(in_even + stride + VREG_INT_COUNT); in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
788 dc_1 = SUB(LOADU(in_odd + VREG_INT_COUNT), in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
789 SAR(ADD3(LOADU(in_even + VREG_INT_COUNT), s1_1, two), 2)); in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
791 ADD(LOADU(in_even + VREG_INT_COUNT), dc_1)); in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
795 s2_0 = LOADU(in_even + (j + 1) * stride); in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
796 s2_1 = LOADU(in_even + (j + 1) * stride + VREG_INT_COUNT); in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
799 dn_0 = SUB(LOADU(in_odd + j * stride), in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
801 dn_1 = SUB(LOADU(in_odd + j * stride + VREG_INT_COUNT), in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
823 dn_0 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride), in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
825 dn_1 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride + VREG_INT_COUNT), in opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2()
847 #undef LOADU