Lines Matching refs:dst
79 extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, in rsdIntrinsicConvolve3x3_K() argument
133 _mm_storel_epi64((__m128i *)dst, o0); in rsdIntrinsicConvolve3x3_K()
138 dst = (char *)dst + 8; in rsdIntrinsicConvolve3x3_K()
142 void rsdIntrinsicColorMatrix4x4_K(void *dst, const void *src, in rsdIntrinsicColorMatrix4x4_K() argument
190 _mm_storeu_si128((__m128i *)dst, o4); in rsdIntrinsicColorMatrix4x4_K()
193 dst = (char *)dst + 16; in rsdIntrinsicColorMatrix4x4_K()
197 void rsdIntrinsicColorMatrix3x3_K(void *dst, const void *src, in rsdIntrinsicColorMatrix3x3_K() argument
244 _mm_storeu_si128((__m128i *)dst, o4); in rsdIntrinsicColorMatrix3x3_K()
247 dst = (char *)dst + 16; in rsdIntrinsicColorMatrix3x3_K()
251 void rsdIntrinsicColorMatrixDot_K(void *dst, const void *src, in rsdIntrinsicColorMatrixDot_K() argument
296 _mm_storeu_si128((__m128i *)dst, o4); in rsdIntrinsicColorMatrixDot_K()
299 dst = (char *)dst + 16; in rsdIntrinsicColorMatrixDot_K()
303 void rsdIntrinsicBlurVFU4_K(void *dst, in rsdIntrinsicBlurVFU4_K() argument
334 _mm_storeu_ps((float *)dst, bp0); in rsdIntrinsicBlurVFU4_K()
335 _mm_storeu_ps((float *)dst + 4, bp1); in rsdIntrinsicBlurVFU4_K()
336 dst = (char *)dst + 32; in rsdIntrinsicBlurVFU4_K()
340 void rsdIntrinsicBlurHFU4_K(void *dst, in rsdIntrinsicBlurHFU4_K() argument
368 *(int *)dst = _mm_cvtsi128_si32(_mm_shuffle_epi8(o, Mu8)); in rsdIntrinsicBlurHFU4_K()
369 dst = (char *)dst + 4; in rsdIntrinsicBlurHFU4_K()
373 void rsdIntrinsicBlurHFU1_K(void *dst, in rsdIntrinsicBlurHFU1_K() argument
405 *(int *)dst = _mm_cvtsi128_si32(_mm_shuffle_epi8(o, Mu8)); in rsdIntrinsicBlurHFU1_K()
406 dst = (char *)dst + 4; in rsdIntrinsicBlurHFU1_K()
410 void rsdIntrinsicYuv_K(void *dst, in rsdIntrinsicYuv_K() argument
465 _mm_storeu_si128((__m128i *)dst, y4); in rsdIntrinsicYuv_K()
468 dst = (__m128i *)dst + 1; in rsdIntrinsicYuv_K()
472 void rsdIntrinsicYuvR_K(void *dst, in rsdIntrinsicYuvR_K() argument
527 _mm_storeu_si128((__m128i *)dst, y4); in rsdIntrinsicYuvR_K()
530 dst = (__m128i *)dst + 1; in rsdIntrinsicYuvR_K()
534 void rsdIntrinsicYuv2_K(void *dst, in rsdIntrinsicYuv2_K() argument
588 _mm_storeu_si128((__m128i *)dst, y4); in rsdIntrinsicYuv2_K()
592 dst = (__m128i *)dst + 1; in rsdIntrinsicYuv2_K()
596 extern "C" void rsdIntrinsicConvolve5x5_K(void *dst, const void *y0, in rsdIntrinsicConvolve5x5_K() argument
749 _mm_storeu_si128((__m128i *)dst, o0); in rsdIntrinsicConvolve5x5_K()
756 dst = (char *)dst + 16; in rsdIntrinsicConvolve5x5_K()
760 void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendSrcOver_K() argument
771 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendSrcOver_K()
772 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendSrcOver_K()
808 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendSrcOver_K()
809 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendSrcOver_K()
812 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendSrcOver_K()
816 void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendDstOver_K() argument
827 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendDstOver_K()
828 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendDstOver_K()
865 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendDstOver_K()
866 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendDstOver_K()
869 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendDstOver_K()
873 void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendSrcIn_K() argument
882 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendSrcIn_K()
883 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendSrcIn_K()
915 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendSrcIn_K()
916 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendSrcIn_K()
919 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendSrcIn_K()
923 void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendDstIn_K() argument
932 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendDstIn_K()
933 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendDstIn_K()
965 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendDstIn_K()
966 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendDstIn_K()
969 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendDstIn_K()
973 void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendSrcOut_K() argument
984 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendSrcOut_K()
985 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendSrcOut_K()
1017 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendSrcOut_K()
1018 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendSrcOut_K()
1021 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendSrcOut_K()
1025 void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendDstOut_K() argument
1036 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendDstOut_K()
1037 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendDstOut_K()
1069 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendDstOut_K()
1070 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendDstOut_K()
1073 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendDstOut_K()
1077 void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendSrcAtop_K() argument
1089 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendSrcAtop_K()
1090 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendSrcAtop_K()
1140 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendSrcAtop_K()
1141 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendSrcAtop_K()
1144 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendSrcAtop_K()
1148 void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendDstAtop_K() argument
1160 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendDstAtop_K()
1161 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendDstAtop_K()
1211 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendDstAtop_K()
1212 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendDstAtop_K()
1215 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendDstAtop_K()
1219 void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendXor_K() argument
1226 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendXor_K()
1227 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendXor_K()
1232 _mm_storeu_si128((__m128i *)dst, out0); in rsdIntrinsicBlendXor_K()
1233 _mm_storeu_si128((__m128i *)dst + 1, out1); in rsdIntrinsicBlendXor_K()
1236 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendXor_K()
1240 void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendMultiply_K() argument
1248 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendMultiply_K()
1249 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendMultiply_K()
1269 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendMultiply_K()
1270 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendMultiply_K()
1273 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendMultiply_K()
1277 void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendAdd_K() argument
1284 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendAdd_K()
1285 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendAdd_K()
1290 _mm_storeu_si128((__m128i *)dst, out0); in rsdIntrinsicBlendAdd_K()
1291 _mm_storeu_si128((__m128i *)dst + 1, out1); in rsdIntrinsicBlendAdd_K()
1294 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendAdd_K()
1298 void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendSub_K() argument
1305 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendSub_K()
1306 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendSub_K()
1311 _mm_storeu_si128((__m128i *)dst, out0); in rsdIntrinsicBlendSub_K()
1312 _mm_storeu_si128((__m128i *)dst + 1, out1); in rsdIntrinsicBlendSub_K()
1315 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendSub_K()