1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 * icv_sad.c
24 *
25 * @brief
26 * This file contains the functions to compute SAD
27 *
28 * @author
29 * Ittiam
30 *
31 * @par List of Functions:
32 * icv_sad_8x4_ssse3()
33 *
34 * @remarks
35 * None
36 *
37 *******************************************************************************
38 */
39 /*****************************************************************************/
40 /* File Includes */
41 /*****************************************************************************/
42 /* System include files */
43 #include <stdio.h>
44 #include <stdint.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <assert.h>
48 #include <immintrin.h>
49
50 /* User include files */
51 #include "icv_datatypes.h"
52 #include "icv_macros.h"
53 #include "icv_platform_macros.h"
54 #include "icv.h"
55
56 /**
57 *******************************************************************************
58 *
59 * @brief
60 * Compute 8x4 SAD
61 *
62 * @par Description
63 * Compute 8x4 sum of absolute differences between source and reference block
64 *
65 * @param[in] pu1_src
66 * Source buffer
67 *
68 * @param[in] pu1_ref
69 * Reference buffer
70 *
71 * @param[in] src_strd
72 * Source stride
73 *
74 * @param[in] ref_strd
75 * Reference stride
76 *
77 * @param[in] wd
78 * Assumed to be 8
79 *
80 * @param[in] ht
81 * Assumed to be 4
82
83 * @returns
84 * SAD
85 *
86 * @remarks
87 *
88 *******************************************************************************
89 */
icv_sad_8x4_ssse3(UWORD8 * pu1_src,UWORD8 * pu1_ref,WORD32 src_strd,WORD32 ref_strd,WORD32 wd,WORD32 ht)90 WORD32 icv_sad_8x4_ssse3(UWORD8 *pu1_src,
91 UWORD8 *pu1_ref,
92 WORD32 src_strd,
93 WORD32 ref_strd,
94 WORD32 wd,
95 WORD32 ht)
96 {
97 WORD32 sad;
98 __m128 src_r0, src_r1;
99 __m128 ref_r0, ref_r1;
100 __m128i res_r0, res_r1;
101
102 UNUSED(wd);
103 UNUSED(ht);
104 ASSERT(wd == 8);
105 ASSERT(ht == 4);
106
107 /* Load source */
108 src_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
109 pu1_src += src_strd;
110
111 src_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
112 pu1_src += src_strd;
113
114 src_r0 = _mm_loadh_pi (src_r0, (__m64 *) (pu1_src));
115 pu1_src += src_strd;
116
117 src_r1 = _mm_loadh_pi (src_r1, (__m64 *) (pu1_src));
118 pu1_src += src_strd;
119
120
121 /* Load reference */
122 ref_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref));
123 pu1_ref += ref_strd;
124
125 ref_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref));
126 pu1_ref += ref_strd;
127
128 ref_r0 = _mm_loadh_pi (ref_r0, (__m64 *) (pu1_ref));
129 pu1_ref += ref_strd;
130
131 ref_r1 = _mm_loadh_pi (ref_r1, (__m64 *) (pu1_ref));
132 pu1_ref += ref_strd;
133
134 /* Compute SAD for each row */
135 res_r0 = _mm_sad_epu8((__m128i)src_r0, (__m128i)ref_r0);
136 res_r1 = _mm_sad_epu8((__m128i)src_r1, (__m128i)ref_r1);
137
138 /* Accumulate SAD */
139 res_r0 = _mm_add_epi64(res_r0, res_r1);
140 res_r0 = _mm_add_epi64(res_r0, _mm_srli_si128(res_r0, 8));
141
142 sad = _mm_cvtsi128_si32(res_r0);
143
144 return sad;
145 }
146