1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 *  ideint_cac_ssse3.c
24 *
25 * @brief
26 *  This file include the definitions of the combing  artifact check function
27 * of the de-interlacer and some  variant of that.
28 *
29 * @author
30 *  Ittiam
31 *
32 * @par List of Functions:
33 *  cac_4x8()
34 *  ideint_cac()
35 *
36 * @remarks
37 *  In the de-interlacer workspace, cac is not a seperate  assembly module as
38 * it comes along with the  de_int_decision() function. But in C-Model, to
39 * keep  the things cleaner, it was made to be a separate  function during
40 * cac experiments long after the  assembly was written by Mudit.
41 *
42 *******************************************************************************
43 */
44 /*****************************************************************************/
45 /* File Includes                                                             */
46 /*****************************************************************************/
47 /* System include files */
48 #include <stdio.h>
49 #include <stdint.h>
50 #include <string.h>
51 #include <stdlib.h>
52 #include <immintrin.h>
53 
54 /* User include files */
55 #include "icv_datatypes.h"
56 #include "icv_macros.h"
57 #include "icv.h"
58 #include "icv_variance.h"
59 #include "icv_sad.h"
60 #include "ideint.h"
61 #include "ideint_defs.h"
62 #include "ideint_structs.h"
63 #include "ideint_cac.h"
64 
65 /**
66 *******************************************************************************
67 *
68 * @brief
69 * Combing artifact check function for 8x8 block
70 *
71 * @par   Description
72 * Determines CAC for 8x8 block by calling 8x4 CAC function
73 *
74 * @param[in] pu1_top
75 *  Top field
76 *
77 * @param[in] pu1_bot
78 *  Bottom field
79 *
80 * @param[in] top_strd
81 *  Top field Stride
82 *
83 * @param[in] bot_strd
84 *  Bottom field stride
85 *
86 * @returns
87 * combing artifact flag (1 = detected, 0 = not detected)
88 *
89 * @remarks
90 *
91 *******************************************************************************
92 */
ideint_cac_8x8_ssse3(UWORD8 * pu1_top,UWORD8 * pu1_bot,WORD32 top_strd,WORD32 bot_strd)93 WORD32 ideint_cac_8x8_ssse3(UWORD8 *pu1_top,
94                             UWORD8 *pu1_bot,
95                             WORD32 top_strd,
96                             WORD32 bot_strd)
97 {
98     WORD32 ca;        /* combing artifact result                          */
99     WORD32 i;
100     WORD32 adj[2] = {0};
101     WORD32 alt[2] = {0};
102     WORD32 sum_1, sum_2, sum_3, sum_4;
103     WORD32 sum_diff, diff_sum;
104 
105     __m128i top[4];
106     __m128i bot[4];
107     __m128i sum_t[4];
108     __m128i sum_b[4];
109     __m128i zero;
110 
111 
112     zero = _mm_setzero_si128();
113 
114     for(i = 0; i < 4; i++)
115     {
116         /* Load top */
117         top[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_top));
118         pu1_top += top_strd;
119 
120         /* Load bottom */
121         bot[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_bot));
122         pu1_bot += bot_strd;
123 
124         /* Unpack */
125         top[i] = _mm_unpacklo_epi8(top[i], zero);
126         bot[i] = _mm_unpacklo_epi8(bot[i], zero);
127 
128         /* Compute row sums */
129         sum_t[i]  = _mm_sad_epu8(top[i], zero);
130         sum_b[i]  = _mm_sad_epu8(bot[i], zero);
131     }
132 
133     /* Compute row based alt and adj */
134     for(i = 0; i < 4; i += 2)
135     {
136         sum_1 = _mm_cvtsi128_si32(sum_t[i + 0]);
137         sum_2 = _mm_cvtsi128_si32(sum_b[i + 0]);
138         sum_diff = ABS_DIF(sum_1, sum_2);
139         if(sum_diff >= RSUM_CSUM_THRESH)
140             adj[0] += sum_diff;
141 
142         sum_3 = _mm_cvtsi128_si32(sum_t[i + 1]);
143         sum_4 = _mm_cvtsi128_si32(sum_b[i + 1]);
144         sum_diff = ABS_DIF(sum_3, sum_4);
145         if(sum_diff >= RSUM_CSUM_THRESH)
146             adj[0] += sum_diff;
147 
148         alt[0] += ABS_DIF(sum_1, sum_3);
149         alt[0] += ABS_DIF(sum_2, sum_4);
150 
151         sum_1 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 0], 8));
152         sum_2 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 0], 8));
153         sum_diff = ABS_DIF(sum_1, sum_2);
154         if(sum_diff >= RSUM_CSUM_THRESH)
155             adj[1] += sum_diff;
156 
157         sum_3 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 1], 8));
158         sum_4 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 1], 8));
159         sum_diff = ABS_DIF(sum_3, sum_4);
160         if(sum_diff >= RSUM_CSUM_THRESH)
161             adj[1] += sum_diff;
162 
163         alt[1] += ABS_DIF(sum_1, sum_3);
164         alt[1] += ABS_DIF(sum_2, sum_4);
165     }
166 
167     /* Compute column based adj */
168     {
169         __m128i avg1, avg2;
170         __m128i top_avg, bot_avg;
171         __m128i min, max, diff, thresh;
172         __m128i mask;
173         avg1 = _mm_avg_epu8(top[0], top[1]);
174         avg2 = _mm_avg_epu8(top[2], top[3]);
175         top_avg = _mm_avg_epu8(avg1, avg2);
176 
177         avg1 = _mm_avg_epu8(bot[0], bot[1]);
178         avg2 = _mm_avg_epu8(bot[2], bot[3]);
179         bot_avg = _mm_avg_epu8(avg1, avg2);
180 
181         min = _mm_min_epu8(top_avg, bot_avg);
182         max = _mm_max_epu8(top_avg, bot_avg);
183 
184         diff = _mm_sub_epi16(max, min);
185         thresh = _mm_set1_epi16((RSUM_CSUM_THRESH >> 2) - 1);
186 
187         mask = _mm_cmpgt_epi16(diff, thresh);
188         diff = _mm_and_si128(diff, mask);
189 
190         diff_sum = _mm_extract_epi16(diff, 0);
191         diff_sum += _mm_extract_epi16(diff, 1);
192         diff_sum += _mm_extract_epi16(diff, 2);
193         diff_sum += _mm_extract_epi16(diff, 3);
194 
195         adj[0] += diff_sum << 2;
196 
197         diff_sum = _mm_extract_epi16(diff, 4);
198         diff_sum += _mm_extract_epi16(diff, 5);
199         diff_sum += _mm_extract_epi16(diff, 6);
200         diff_sum += _mm_extract_epi16(diff, 7);
201 
202         adj[1] += diff_sum << 2;
203 
204     }
205 
206     /* Compute column based alt */
207     {
208         __m128i avg1, avg2;
209         __m128i even_avg, odd_avg, diff;
210         avg1 = _mm_avg_epu8(top[0], bot[0]);
211         avg2 = _mm_avg_epu8(top[2], bot[2]);
212         even_avg = _mm_avg_epu8(avg1, avg2);
213 
214         avg1 = _mm_avg_epu8(top[1], bot[1]);
215         avg2 = _mm_avg_epu8(top[3], bot[3]);
216         odd_avg = _mm_avg_epu8(avg1, avg2);
217 
218         diff = _mm_sad_epu8(even_avg, odd_avg);
219 
220 
221         diff_sum = _mm_cvtsi128_si32(diff);
222         alt[0] += diff_sum << 2;
223 
224         diff_sum = _mm_cvtsi128_si32(_mm_srli_si128(diff, 8));
225         alt[1] += diff_sum << 2;
226 
227     }
228     alt[0] += (alt[0] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
229     alt[1] += (alt[1] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
230 
231     ca    = (alt[0] < adj[0]);
232     ca   |= (alt[1] < adj[1]);
233 
234     return ca;
235 }
236 
237