1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 ******************************************************************************
22 * \file ihevce_chroma_had_satd.c
23 *
24 * \brief
25 * This file contains function definitions of chroma HAD SATD functions
26 *
27 * \date
28 * 15/07/2013
29 *
30 * \author
31 * Ittiam
32 *
33 * List of Functions
34 * ihevce_chroma_HAD_4x4_8b()
35 * ihevce_chroma_compute_AC_HAD_4x4_8bit()
36 * ihevce_hbd_chroma_HAD_4x4()
37 * ihevce_hbd_chroma_compute_AC_HAD_4x4()
38 * ihevce_chroma_HAD_8x8_8bit()
39 * ihevce_hbd_chroma_HAD_8x8()
40 * ihevce_chroma_HAD_16x16_8bit()
41 * ihevce_hbd_chroma_HAD_16x16()
42 *
43 ******************************************************************************
44 */
45
46 /*****************************************************************************/
47 /* File Includes */
48 /*****************************************************************************/
49 /* System include files */
50 #include <stdio.h>
51 #include <string.h>
52 #include <stdlib.h>
53 #include <assert.h>
54 #include <stdarg.h>
55 #include <math.h>
56
57 /* User include files */
58 #include "ihevc_typedefs.h"
59 #include "ihevc_debug.h"
60 #include "itt_video_api.h"
61
62 #include "ihevce_api.h"
63 #include "ihevce_defs.h"
64 #include "ihevce_had_satd.h"
65
66 /*****************************************************************************/
67 /* Function Definitions */
68 /*****************************************************************************/
69
70 /**
71 *******************************************************************************
72 *
73 * @brief
74 * Chroma Hadamard Transform of 4x4 block (8-bit input)
75 *
76 * @par Description:
77 *
78 * @param[in] pu1_origin
79 * UWORD8 pointer to the source block (u or v, interleaved)
80 *
81 * @param[in] src_strd
82 * WORD32 Source stride
83 *
84 * @param[in] pu1_pred_buf
85 * UWORD8 pointer to the prediction block (u or v, interleaved)
86 *
87 * @param[in] pred_strd
88 * WORD32 Pred stride
89 *
90 * @param[in] pi2_dst
91 * WORD16 pointer to the transform block
92 *
93 * @param[in] dst_strd (u or v, interleaved)
94 * WORD32 Destination stride
95 *
96 * @returns
97 * Hadamard SAD
98 *
99 * @remarks
100 * Not updating the transform destination now. Only returning the SATD
101 *
102 *******************************************************************************
103 */
ihevce_chroma_HAD_4x4_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)104 UWORD32 ihevce_chroma_HAD_4x4_8bit(
105 UWORD8 *pu1_origin,
106 WORD32 src_strd,
107 UWORD8 *pu1_pred_buf,
108 WORD32 pred_strd,
109 WORD16 *pi2_dst,
110 WORD32 dst_strd)
111 {
112 WORD32 k;
113 WORD16 diff[16], m[16], d[16];
114 UWORD32 u4_sad = 0;
115
116 (void)pi2_dst;
117 (void)dst_strd;
118 for(k = 0; k < 16; k += 4)
119 {
120 /* u or v, interleaved */
121 diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
122 diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
123 diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
124 diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
125
126 pu1_pred_buf += pred_strd;
127 pu1_origin += src_strd;
128 }
129
130 /*===== hadamard transform =====*/
131 m[0] = diff[0] + diff[12];
132 m[1] = diff[1] + diff[13];
133 m[2] = diff[2] + diff[14];
134 m[3] = diff[3] + diff[15];
135 m[4] = diff[4] + diff[8];
136 m[5] = diff[5] + diff[9];
137 m[6] = diff[6] + diff[10];
138 m[7] = diff[7] + diff[11];
139 m[8] = diff[4] - diff[8];
140 m[9] = diff[5] - diff[9];
141 m[10] = diff[6] - diff[10];
142 m[11] = diff[7] - diff[11];
143 m[12] = diff[0] - diff[12];
144 m[13] = diff[1] - diff[13];
145 m[14] = diff[2] - diff[14];
146 m[15] = diff[3] - diff[15];
147
148 d[0] = m[0] + m[4];
149 d[1] = m[1] + m[5];
150 d[2] = m[2] + m[6];
151 d[3] = m[3] + m[7];
152 d[4] = m[8] + m[12];
153 d[5] = m[9] + m[13];
154 d[6] = m[10] + m[14];
155 d[7] = m[11] + m[15];
156 d[8] = m[0] - m[4];
157 d[9] = m[1] - m[5];
158 d[10] = m[2] - m[6];
159 d[11] = m[3] - m[7];
160 d[12] = m[12] - m[8];
161 d[13] = m[13] - m[9];
162 d[14] = m[14] - m[10];
163 d[15] = m[15] - m[11];
164
165 m[0] = d[0] + d[3];
166 m[1] = d[1] + d[2];
167 m[2] = d[1] - d[2];
168 m[3] = d[0] - d[3];
169 m[4] = d[4] + d[7];
170 m[5] = d[5] + d[6];
171 m[6] = d[5] - d[6];
172 m[7] = d[4] - d[7];
173 m[8] = d[8] + d[11];
174 m[9] = d[9] + d[10];
175 m[10] = d[9] - d[10];
176 m[11] = d[8] - d[11];
177 m[12] = d[12] + d[15];
178 m[13] = d[13] + d[14];
179 m[14] = d[13] - d[14];
180 m[15] = d[12] - d[15];
181
182 d[0] = m[0] + m[1];
183 d[1] = m[0] - m[1];
184 d[2] = m[2] + m[3];
185 d[3] = m[3] - m[2];
186 d[4] = m[4] + m[5];
187 d[5] = m[4] - m[5];
188 d[6] = m[6] + m[7];
189 d[7] = m[7] - m[6];
190 d[8] = m[8] + m[9];
191 d[9] = m[8] - m[9];
192 d[10] = m[10] + m[11];
193 d[11] = m[11] - m[10];
194 d[12] = m[12] + m[13];
195 d[13] = m[12] - m[13];
196 d[14] = m[14] + m[15];
197 d[15] = m[15] - m[14];
198
199 /*===== sad =====*/
200 for(k = 0; k < 16; ++k)
201 {
202 u4_sad += (d[k] > 0 ? d[k] : -d[k]);
203 }
204 u4_sad = ((u4_sad + 2) >> 2);
205
206 return u4_sad;
207 }
208
209 /**
210 *******************************************************************************
211 *
212 * @brief
213 * Chroma Hadamard Transform of 4x4 block (8-bit input) with DC suppressed
214 *
215 * @par Description:
216 *
217 * @param[in] pu1_origin
218 * UWORD8 pointer to the source block (u or v, interleaved)
219 *
220 * @param[in] src_strd
221 * WORD32 Source stride
222 *
223 * @param[in] pu1_pred_buf
224 * UWORD8 pointer to the prediction block (u or v, interleaved)
225 *
226 * @param[in] pred_strd
227 * WORD32 Pred stride
228 *
229 * @param[in] pi2_dst
230 * WORD16 pointer to the transform block
231 *
232 * @param[in] dst_strd (u or v, interleaved)
233 * WORD32 Destination stride
234 *
235 * @returns
236 * Hadamard SAD
237 *
238 * @remarks
239 * Not updating the transform destination now. Only returning the SATD
240 *
241 *******************************************************************************
242 */
ihevce_chroma_compute_AC_HAD_4x4_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)243 UWORD32 ihevce_chroma_compute_AC_HAD_4x4_8bit(
244 UWORD8 *pu1_origin,
245 WORD32 src_strd,
246 UWORD8 *pu1_pred_buf,
247 WORD32 pred_strd,
248 WORD16 *pi2_dst,
249 WORD32 dst_strd)
250 {
251 WORD32 k;
252 WORD16 diff[16], m[16], d[16];
253 UWORD32 u4_sad = 0;
254
255 (void)pi2_dst;
256 (void)dst_strd;
257 for(k = 0; k < 16; k += 4)
258 {
259 /* u or v, interleaved */
260 diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
261 diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
262 diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
263 diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
264
265 pu1_pred_buf += pred_strd;
266 pu1_origin += src_strd;
267 }
268
269 /*===== hadamard transform =====*/
270 m[0] = diff[0] + diff[12];
271 m[1] = diff[1] + diff[13];
272 m[2] = diff[2] + diff[14];
273 m[3] = diff[3] + diff[15];
274 m[4] = diff[4] + diff[8];
275 m[5] = diff[5] + diff[9];
276 m[6] = diff[6] + diff[10];
277 m[7] = diff[7] + diff[11];
278 m[8] = diff[4] - diff[8];
279 m[9] = diff[5] - diff[9];
280 m[10] = diff[6] - diff[10];
281 m[11] = diff[7] - diff[11];
282 m[12] = diff[0] - diff[12];
283 m[13] = diff[1] - diff[13];
284 m[14] = diff[2] - diff[14];
285 m[15] = diff[3] - diff[15];
286
287 d[0] = m[0] + m[4];
288 d[1] = m[1] + m[5];
289 d[2] = m[2] + m[6];
290 d[3] = m[3] + m[7];
291 d[4] = m[8] + m[12];
292 d[5] = m[9] + m[13];
293 d[6] = m[10] + m[14];
294 d[7] = m[11] + m[15];
295 d[8] = m[0] - m[4];
296 d[9] = m[1] - m[5];
297 d[10] = m[2] - m[6];
298 d[11] = m[3] - m[7];
299 d[12] = m[12] - m[8];
300 d[13] = m[13] - m[9];
301 d[14] = m[14] - m[10];
302 d[15] = m[15] - m[11];
303
304 m[0] = d[0] + d[3];
305 m[1] = d[1] + d[2];
306 m[2] = d[1] - d[2];
307 m[3] = d[0] - d[3];
308 m[4] = d[4] + d[7];
309 m[5] = d[5] + d[6];
310 m[6] = d[5] - d[6];
311 m[7] = d[4] - d[7];
312 m[8] = d[8] + d[11];
313 m[9] = d[9] + d[10];
314 m[10] = d[9] - d[10];
315 m[11] = d[8] - d[11];
316 m[12] = d[12] + d[15];
317 m[13] = d[13] + d[14];
318 m[14] = d[13] - d[14];
319 m[15] = d[12] - d[15];
320
321 d[0] = m[0] + m[1];
322 d[1] = m[0] - m[1];
323 d[2] = m[2] + m[3];
324 d[3] = m[3] - m[2];
325 d[4] = m[4] + m[5];
326 d[5] = m[4] - m[5];
327 d[6] = m[6] + m[7];
328 d[7] = m[7] - m[6];
329 d[8] = m[8] + m[9];
330 d[9] = m[8] - m[9];
331 d[10] = m[10] + m[11];
332 d[11] = m[11] - m[10];
333 d[12] = m[12] + m[13];
334 d[13] = m[12] - m[13];
335 d[14] = m[14] + m[15];
336 d[15] = m[15] - m[14];
337
338 /* DC masking */
339 d[0] = 0;
340
341 /*===== sad =====*/
342 for(k = 0; k < 16; ++k)
343 {
344 u4_sad += (d[k] > 0 ? d[k] : -d[k]);
345 }
346 u4_sad = ((u4_sad + 2) >> 2);
347
348 return u4_sad;
349 }
350
351 /**
352 *******************************************************************************
353 *
354 * @brief
355 * Chroma Hadamard Transform of 8x8 block (8-bit input)
356 *
357 * @par Description:
358 *
359 * @param[in] pu1_origin
360 * UWORD8 pointer to the source block (u or v, interleaved)
361 *
362 * @param[in] src_strd
363 * WORD32 Source stride
364 *
365 * @param[in] pu1_pred_buf
366 * UWORD8 pointer to the prediction block (u or v, interleaved)
367 *
368 * @param[in] pred_strd
369 * WORD32 Pred stride
370 *
371 * @param[in] pi2_dst
372 * WORD16 pointer to the transform block
373 *
374 * @param[in] dst_strd (u or v, interleaved)
375 * WORD32 Destination stride
376 *
377 * @returns
378 * Hadamard SAD
379 *
380 * @remarks
381 * Not updating the transform destination now. Only returning the SATD
382 *
383 *******************************************************************************
384 */
ihevce_chroma_HAD_8x8_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)385 UWORD32 ihevce_chroma_HAD_8x8_8bit(
386 UWORD8 *pu1_origin,
387 WORD32 src_strd,
388 UWORD8 *pu1_pred_buf,
389 WORD32 pred_strd,
390 WORD16 *pi2_dst,
391 WORD32 dst_strd)
392 {
393 WORD32 k, i, j, jj;
394 UWORD32 u4_sad = 0;
395 WORD16 diff[64], m1[8][8], m2[8][8], m3[8][8];
396
397 (void)pi2_dst;
398 (void)dst_strd;
399 for(k = 0; k < 64; k += 8)
400 {
401 /* u or v, interleaved */
402 diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
403 diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
404 diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
405 diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
406 diff[k + 4] = pu1_origin[2 * 4] - pu1_pred_buf[2 * 4];
407 diff[k + 5] = pu1_origin[2 * 5] - pu1_pred_buf[2 * 5];
408 diff[k + 6] = pu1_origin[2 * 6] - pu1_pred_buf[2 * 6];
409 diff[k + 7] = pu1_origin[2 * 7] - pu1_pred_buf[2 * 7];
410
411 pu1_pred_buf += pred_strd;
412 pu1_origin += src_strd;
413 }
414
415 /*===== hadamard transform =====*/
416 // horizontal
417 for(j = 0; j < 8; j++)
418 {
419 jj = j << 3;
420 m2[j][0] = diff[jj] + diff[jj + 4];
421 m2[j][1] = diff[jj + 1] + diff[jj + 5];
422 m2[j][2] = diff[jj + 2] + diff[jj + 6];
423 m2[j][3] = diff[jj + 3] + diff[jj + 7];
424 m2[j][4] = diff[jj] - diff[jj + 4];
425 m2[j][5] = diff[jj + 1] - diff[jj + 5];
426 m2[j][6] = diff[jj + 2] - diff[jj + 6];
427 m2[j][7] = diff[jj + 3] - diff[jj + 7];
428
429 m1[j][0] = m2[j][0] + m2[j][2];
430 m1[j][1] = m2[j][1] + m2[j][3];
431 m1[j][2] = m2[j][0] - m2[j][2];
432 m1[j][3] = m2[j][1] - m2[j][3];
433 m1[j][4] = m2[j][4] + m2[j][6];
434 m1[j][5] = m2[j][5] + m2[j][7];
435 m1[j][6] = m2[j][4] - m2[j][6];
436 m1[j][7] = m2[j][5] - m2[j][7];
437
438 m2[j][0] = m1[j][0] + m1[j][1];
439 m2[j][1] = m1[j][0] - m1[j][1];
440 m2[j][2] = m1[j][2] + m1[j][3];
441 m2[j][3] = m1[j][2] - m1[j][3];
442 m2[j][4] = m1[j][4] + m1[j][5];
443 m2[j][5] = m1[j][4] - m1[j][5];
444 m2[j][6] = m1[j][6] + m1[j][7];
445 m2[j][7] = m1[j][6] - m1[j][7];
446 }
447
448 // vertical
449 for(i = 0; i < 8; i++)
450 {
451 m3[0][i] = m2[0][i] + m2[4][i];
452 m3[1][i] = m2[1][i] + m2[5][i];
453 m3[2][i] = m2[2][i] + m2[6][i];
454 m3[3][i] = m2[3][i] + m2[7][i];
455 m3[4][i] = m2[0][i] - m2[4][i];
456 m3[5][i] = m2[1][i] - m2[5][i];
457 m3[6][i] = m2[2][i] - m2[6][i];
458 m3[7][i] = m2[3][i] - m2[7][i];
459
460 m1[0][i] = m3[0][i] + m3[2][i];
461 m1[1][i] = m3[1][i] + m3[3][i];
462 m1[2][i] = m3[0][i] - m3[2][i];
463 m1[3][i] = m3[1][i] - m3[3][i];
464 m1[4][i] = m3[4][i] + m3[6][i];
465 m1[5][i] = m3[5][i] + m3[7][i];
466 m1[6][i] = m3[4][i] - m3[6][i];
467 m1[7][i] = m3[5][i] - m3[7][i];
468
469 m2[0][i] = m1[0][i] + m1[1][i];
470 m2[1][i] = m1[0][i] - m1[1][i];
471 m2[2][i] = m1[2][i] + m1[3][i];
472 m2[3][i] = m1[2][i] - m1[3][i];
473 m2[4][i] = m1[4][i] + m1[5][i];
474 m2[5][i] = m1[4][i] - m1[5][i];
475 m2[6][i] = m1[6][i] + m1[7][i];
476 m2[7][i] = m1[6][i] - m1[7][i];
477 }
478
479 /*===== sad =====*/
480 for(i = 0; i < 8; i++)
481 {
482 for(j = 0; j < 8; j++)
483 {
484 u4_sad += (m2[i][j] > 0 ? m2[i][j] : -m2[i][j]);
485 }
486 }
487 u4_sad = ((u4_sad + 4) >> 3);
488
489 return u4_sad;
490 }
491
492 /**
493 *******************************************************************************
494 *
495 * @brief
496 * Chroma Hadamard Transform of 16x16 block (8-bit input)
497 *
498 * @par Description:
499 *
500 * @param[in] pu1_origin
501 * UWORD8 pointer to the source block (u or v, interleaved)
502 *
503 * @param[in] src_strd
504 * WORD32 Source stride
505 *
506 * @param[in] pu1_pred_buf
507 * UWORD8 pointer to the prediction block (u or v, interleaved)
508 *
509 * @param[in] pred_strd
510 * WORD32 Pred stride
511 *
512 * @param[in] pi2_dst
513 * WORD16 pointer to the transform block
514 *
515 * @param[in] dst_strd (u or v, interleaved)
516 * WORD32 Destination stride
517 *
518 * @returns
519 * Hadamard SAD
520 *
521 * @remarks
522 * Not updating the transform destination now. Only returning the SATD
523 *
524 *******************************************************************************
525 */
ihevce_chroma_HAD_16x16_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)526 UWORD32 ihevce_chroma_HAD_16x16_8bit(
527 UWORD8 *pu1_origin,
528 WORD32 src_strd,
529 UWORD8 *pu1_pred_buf,
530 WORD32 pred_strd,
531 WORD16 *pi2_dst,
532 WORD32 dst_strd)
533 {
534 UWORD32 au4_sad[4], u4_result = 0;
535 WORD32 i;
536
537 for(i = 0; i < 4; i++)
538 {
539 au4_sad[i] = ihevce_chroma_HAD_8x8_8bit(
540 pu1_origin, src_strd, pu1_pred_buf, pred_strd, pi2_dst, dst_strd);
541
542 if(i == 0 || i == 2)
543 {
544 pu1_origin += 16;
545 pu1_pred_buf += 16;
546 }
547
548 if(i == 1)
549 {
550 pu1_origin += (8 * src_strd) - 16;
551 pu1_pred_buf += (8 * pred_strd) - 16;
552 }
553
554 u4_result += au4_sad[i];
555 }
556
557 return u4_result;
558 }
559