1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 ******************************************************************************
22 * \file ihevce_chroma_had_satd.c
23 *
24 * \brief
25 *    This file contains function definitions of chroma HAD SATD functions
26 *
27 * \date
28 *    15/07/2013
29 *
30 * \author
31 *    Ittiam
32 *
33 * List of Functions
34 *  ihevce_chroma_HAD_4x4_8b()
35 *  ihevce_chroma_compute_AC_HAD_4x4_8bit()
36 *  ihevce_hbd_chroma_HAD_4x4()
37 *  ihevce_hbd_chroma_compute_AC_HAD_4x4()
38 *  ihevce_chroma_HAD_8x8_8bit()
39 *  ihevce_hbd_chroma_HAD_8x8()
40 *  ihevce_chroma_HAD_16x16_8bit()
41 *  ihevce_hbd_chroma_HAD_16x16()
42 *
43 ******************************************************************************
44 */
45 
46 /*****************************************************************************/
47 /* File Includes                                                             */
48 /*****************************************************************************/
49 /* System include files */
50 #include <stdio.h>
51 #include <string.h>
52 #include <stdlib.h>
53 #include <assert.h>
54 #include <stdarg.h>
55 #include <math.h>
56 
57 /* User include files */
58 #include "ihevc_typedefs.h"
59 #include "ihevc_debug.h"
60 #include "itt_video_api.h"
61 
62 #include "ihevce_api.h"
63 #include "ihevce_defs.h"
64 #include "ihevce_had_satd.h"
65 
66 /*****************************************************************************/
67 /* Function Definitions                                                      */
68 /*****************************************************************************/
69 
70 /**
71 *******************************************************************************
72 *
73 * @brief
74 *  Chroma Hadamard Transform of 4x4 block (8-bit input)
75 *
76 * @par Description:
77 *
78 * @param[in] pu1_origin
79 *  UWORD8 pointer to the source block (u or v, interleaved)
80 *
81 * @param[in] src_strd
82 *  WORD32 Source stride
83 *
84 * @param[in] pu1_pred_buf
85 *  UWORD8 pointer to the prediction block (u or v, interleaved)
86 *
87 * @param[in] pred_strd
88 *  WORD32 Pred stride
89 *
90 * @param[in] pi2_dst
91 *  WORD16 pointer to the transform block
92 *
93 * @param[in] dst_strd (u or v, interleaved)
94 *  WORD32 Destination stride
95 *
96 * @returns
97 *  Hadamard SAD
98 *
99 * @remarks
100 *  Not updating the transform destination now. Only returning the SATD
101 *
102 *******************************************************************************
103 */
ihevce_chroma_HAD_4x4_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)104 UWORD32 ihevce_chroma_HAD_4x4_8bit(
105     UWORD8 *pu1_origin,
106     WORD32 src_strd,
107     UWORD8 *pu1_pred_buf,
108     WORD32 pred_strd,
109     WORD16 *pi2_dst,
110     WORD32 dst_strd)
111 {
112     WORD32 k;
113     WORD16 diff[16], m[16], d[16];
114     UWORD32 u4_sad = 0;
115 
116     (void)pi2_dst;
117     (void)dst_strd;
118     for(k = 0; k < 16; k += 4)
119     {
120         /* u or v, interleaved */
121         diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
122         diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
123         diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
124         diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
125 
126         pu1_pred_buf += pred_strd;
127         pu1_origin += src_strd;
128     }
129 
130     /*===== hadamard transform =====*/
131     m[0] = diff[0] + diff[12];
132     m[1] = diff[1] + diff[13];
133     m[2] = diff[2] + diff[14];
134     m[3] = diff[3] + diff[15];
135     m[4] = diff[4] + diff[8];
136     m[5] = diff[5] + diff[9];
137     m[6] = diff[6] + diff[10];
138     m[7] = diff[7] + diff[11];
139     m[8] = diff[4] - diff[8];
140     m[9] = diff[5] - diff[9];
141     m[10] = diff[6] - diff[10];
142     m[11] = diff[7] - diff[11];
143     m[12] = diff[0] - diff[12];
144     m[13] = diff[1] - diff[13];
145     m[14] = diff[2] - diff[14];
146     m[15] = diff[3] - diff[15];
147 
148     d[0] = m[0] + m[4];
149     d[1] = m[1] + m[5];
150     d[2] = m[2] + m[6];
151     d[3] = m[3] + m[7];
152     d[4] = m[8] + m[12];
153     d[5] = m[9] + m[13];
154     d[6] = m[10] + m[14];
155     d[7] = m[11] + m[15];
156     d[8] = m[0] - m[4];
157     d[9] = m[1] - m[5];
158     d[10] = m[2] - m[6];
159     d[11] = m[3] - m[7];
160     d[12] = m[12] - m[8];
161     d[13] = m[13] - m[9];
162     d[14] = m[14] - m[10];
163     d[15] = m[15] - m[11];
164 
165     m[0] = d[0] + d[3];
166     m[1] = d[1] + d[2];
167     m[2] = d[1] - d[2];
168     m[3] = d[0] - d[3];
169     m[4] = d[4] + d[7];
170     m[5] = d[5] + d[6];
171     m[6] = d[5] - d[6];
172     m[7] = d[4] - d[7];
173     m[8] = d[8] + d[11];
174     m[9] = d[9] + d[10];
175     m[10] = d[9] - d[10];
176     m[11] = d[8] - d[11];
177     m[12] = d[12] + d[15];
178     m[13] = d[13] + d[14];
179     m[14] = d[13] - d[14];
180     m[15] = d[12] - d[15];
181 
182     d[0] = m[0] + m[1];
183     d[1] = m[0] - m[1];
184     d[2] = m[2] + m[3];
185     d[3] = m[3] - m[2];
186     d[4] = m[4] + m[5];
187     d[5] = m[4] - m[5];
188     d[6] = m[6] + m[7];
189     d[7] = m[7] - m[6];
190     d[8] = m[8] + m[9];
191     d[9] = m[8] - m[9];
192     d[10] = m[10] + m[11];
193     d[11] = m[11] - m[10];
194     d[12] = m[12] + m[13];
195     d[13] = m[12] - m[13];
196     d[14] = m[14] + m[15];
197     d[15] = m[15] - m[14];
198 
199     /*===== sad =====*/
200     for(k = 0; k < 16; ++k)
201     {
202         u4_sad += (d[k] > 0 ? d[k] : -d[k]);
203     }
204     u4_sad = ((u4_sad + 2) >> 2);
205 
206     return u4_sad;
207 }
208 
209 /**
210 *******************************************************************************
211 *
212 * @brief
213 *  Chroma Hadamard Transform of 4x4 block (8-bit input) with DC suppressed
214 *
215 * @par Description:
216 *
217 * @param[in] pu1_origin
218 *  UWORD8 pointer to the source block (u or v, interleaved)
219 *
220 * @param[in] src_strd
221 *  WORD32 Source stride
222 *
223 * @param[in] pu1_pred_buf
224 *  UWORD8 pointer to the prediction block (u or v, interleaved)
225 *
226 * @param[in] pred_strd
227 *  WORD32 Pred stride
228 *
229 * @param[in] pi2_dst
230 *  WORD16 pointer to the transform block
231 *
232 * @param[in] dst_strd (u or v, interleaved)
233 *  WORD32 Destination stride
234 *
235 * @returns
236 *  Hadamard SAD
237 *
238 * @remarks
239 *  Not updating the transform destination now. Only returning the SATD
240 *
241 *******************************************************************************
242 */
ihevce_chroma_compute_AC_HAD_4x4_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)243 UWORD32 ihevce_chroma_compute_AC_HAD_4x4_8bit(
244     UWORD8 *pu1_origin,
245     WORD32 src_strd,
246     UWORD8 *pu1_pred_buf,
247     WORD32 pred_strd,
248     WORD16 *pi2_dst,
249     WORD32 dst_strd)
250 {
251     WORD32 k;
252     WORD16 diff[16], m[16], d[16];
253     UWORD32 u4_sad = 0;
254 
255     (void)pi2_dst;
256     (void)dst_strd;
257     for(k = 0; k < 16; k += 4)
258     {
259         /* u or v, interleaved */
260         diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
261         diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
262         diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
263         diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
264 
265         pu1_pred_buf += pred_strd;
266         pu1_origin += src_strd;
267     }
268 
269     /*===== hadamard transform =====*/
270     m[0] = diff[0] + diff[12];
271     m[1] = diff[1] + diff[13];
272     m[2] = diff[2] + diff[14];
273     m[3] = diff[3] + diff[15];
274     m[4] = diff[4] + diff[8];
275     m[5] = diff[5] + diff[9];
276     m[6] = diff[6] + diff[10];
277     m[7] = diff[7] + diff[11];
278     m[8] = diff[4] - diff[8];
279     m[9] = diff[5] - diff[9];
280     m[10] = diff[6] - diff[10];
281     m[11] = diff[7] - diff[11];
282     m[12] = diff[0] - diff[12];
283     m[13] = diff[1] - diff[13];
284     m[14] = diff[2] - diff[14];
285     m[15] = diff[3] - diff[15];
286 
287     d[0] = m[0] + m[4];
288     d[1] = m[1] + m[5];
289     d[2] = m[2] + m[6];
290     d[3] = m[3] + m[7];
291     d[4] = m[8] + m[12];
292     d[5] = m[9] + m[13];
293     d[6] = m[10] + m[14];
294     d[7] = m[11] + m[15];
295     d[8] = m[0] - m[4];
296     d[9] = m[1] - m[5];
297     d[10] = m[2] - m[6];
298     d[11] = m[3] - m[7];
299     d[12] = m[12] - m[8];
300     d[13] = m[13] - m[9];
301     d[14] = m[14] - m[10];
302     d[15] = m[15] - m[11];
303 
304     m[0] = d[0] + d[3];
305     m[1] = d[1] + d[2];
306     m[2] = d[1] - d[2];
307     m[3] = d[0] - d[3];
308     m[4] = d[4] + d[7];
309     m[5] = d[5] + d[6];
310     m[6] = d[5] - d[6];
311     m[7] = d[4] - d[7];
312     m[8] = d[8] + d[11];
313     m[9] = d[9] + d[10];
314     m[10] = d[9] - d[10];
315     m[11] = d[8] - d[11];
316     m[12] = d[12] + d[15];
317     m[13] = d[13] + d[14];
318     m[14] = d[13] - d[14];
319     m[15] = d[12] - d[15];
320 
321     d[0] = m[0] + m[1];
322     d[1] = m[0] - m[1];
323     d[2] = m[2] + m[3];
324     d[3] = m[3] - m[2];
325     d[4] = m[4] + m[5];
326     d[5] = m[4] - m[5];
327     d[6] = m[6] + m[7];
328     d[7] = m[7] - m[6];
329     d[8] = m[8] + m[9];
330     d[9] = m[8] - m[9];
331     d[10] = m[10] + m[11];
332     d[11] = m[11] - m[10];
333     d[12] = m[12] + m[13];
334     d[13] = m[12] - m[13];
335     d[14] = m[14] + m[15];
336     d[15] = m[15] - m[14];
337 
338     /* DC masking */
339     d[0] = 0;
340 
341     /*===== sad =====*/
342     for(k = 0; k < 16; ++k)
343     {
344         u4_sad += (d[k] > 0 ? d[k] : -d[k]);
345     }
346     u4_sad = ((u4_sad + 2) >> 2);
347 
348     return u4_sad;
349 }
350 
351 /**
352 *******************************************************************************
353 *
354 * @brief
355 *  Chroma Hadamard Transform of 8x8 block (8-bit input)
356 *
357 * @par Description:
358 *
359 * @param[in] pu1_origin
360 *  UWORD8 pointer to the source block (u or v, interleaved)
361 *
362 * @param[in] src_strd
363 *  WORD32 Source stride
364 *
365 * @param[in] pu1_pred_buf
366 *  UWORD8 pointer to the prediction block (u or v, interleaved)
367 *
368 * @param[in] pred_strd
369 *  WORD32 Pred stride
370 *
371 * @param[in] pi2_dst
372 *  WORD16 pointer to the transform block
373 *
374 * @param[in] dst_strd (u or v, interleaved)
375 *  WORD32 Destination stride
376 *
377 * @returns
378 *  Hadamard SAD
379 *
380 * @remarks
381 *  Not updating the transform destination now. Only returning the SATD
382 *
383 *******************************************************************************
384 */
ihevce_chroma_HAD_8x8_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)385 UWORD32 ihevce_chroma_HAD_8x8_8bit(
386     UWORD8 *pu1_origin,
387     WORD32 src_strd,
388     UWORD8 *pu1_pred_buf,
389     WORD32 pred_strd,
390     WORD16 *pi2_dst,
391     WORD32 dst_strd)
392 {
393     WORD32 k, i, j, jj;
394     UWORD32 u4_sad = 0;
395     WORD16 diff[64], m1[8][8], m2[8][8], m3[8][8];
396 
397     (void)pi2_dst;
398     (void)dst_strd;
399     for(k = 0; k < 64; k += 8)
400     {
401         /* u or v, interleaved */
402         diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
403         diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
404         diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
405         diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
406         diff[k + 4] = pu1_origin[2 * 4] - pu1_pred_buf[2 * 4];
407         diff[k + 5] = pu1_origin[2 * 5] - pu1_pred_buf[2 * 5];
408         diff[k + 6] = pu1_origin[2 * 6] - pu1_pred_buf[2 * 6];
409         diff[k + 7] = pu1_origin[2 * 7] - pu1_pred_buf[2 * 7];
410 
411         pu1_pred_buf += pred_strd;
412         pu1_origin += src_strd;
413     }
414 
415     /*===== hadamard transform =====*/
416     // horizontal
417     for(j = 0; j < 8; j++)
418     {
419         jj = j << 3;
420         m2[j][0] = diff[jj] + diff[jj + 4];
421         m2[j][1] = diff[jj + 1] + diff[jj + 5];
422         m2[j][2] = diff[jj + 2] + diff[jj + 6];
423         m2[j][3] = diff[jj + 3] + diff[jj + 7];
424         m2[j][4] = diff[jj] - diff[jj + 4];
425         m2[j][5] = diff[jj + 1] - diff[jj + 5];
426         m2[j][6] = diff[jj + 2] - diff[jj + 6];
427         m2[j][7] = diff[jj + 3] - diff[jj + 7];
428 
429         m1[j][0] = m2[j][0] + m2[j][2];
430         m1[j][1] = m2[j][1] + m2[j][3];
431         m1[j][2] = m2[j][0] - m2[j][2];
432         m1[j][3] = m2[j][1] - m2[j][3];
433         m1[j][4] = m2[j][4] + m2[j][6];
434         m1[j][5] = m2[j][5] + m2[j][7];
435         m1[j][6] = m2[j][4] - m2[j][6];
436         m1[j][7] = m2[j][5] - m2[j][7];
437 
438         m2[j][0] = m1[j][0] + m1[j][1];
439         m2[j][1] = m1[j][0] - m1[j][1];
440         m2[j][2] = m1[j][2] + m1[j][3];
441         m2[j][3] = m1[j][2] - m1[j][3];
442         m2[j][4] = m1[j][4] + m1[j][5];
443         m2[j][5] = m1[j][4] - m1[j][5];
444         m2[j][6] = m1[j][6] + m1[j][7];
445         m2[j][7] = m1[j][6] - m1[j][7];
446     }
447 
448     // vertical
449     for(i = 0; i < 8; i++)
450     {
451         m3[0][i] = m2[0][i] + m2[4][i];
452         m3[1][i] = m2[1][i] + m2[5][i];
453         m3[2][i] = m2[2][i] + m2[6][i];
454         m3[3][i] = m2[3][i] + m2[7][i];
455         m3[4][i] = m2[0][i] - m2[4][i];
456         m3[5][i] = m2[1][i] - m2[5][i];
457         m3[6][i] = m2[2][i] - m2[6][i];
458         m3[7][i] = m2[3][i] - m2[7][i];
459 
460         m1[0][i] = m3[0][i] + m3[2][i];
461         m1[1][i] = m3[1][i] + m3[3][i];
462         m1[2][i] = m3[0][i] - m3[2][i];
463         m1[3][i] = m3[1][i] - m3[3][i];
464         m1[4][i] = m3[4][i] + m3[6][i];
465         m1[5][i] = m3[5][i] + m3[7][i];
466         m1[6][i] = m3[4][i] - m3[6][i];
467         m1[7][i] = m3[5][i] - m3[7][i];
468 
469         m2[0][i] = m1[0][i] + m1[1][i];
470         m2[1][i] = m1[0][i] - m1[1][i];
471         m2[2][i] = m1[2][i] + m1[3][i];
472         m2[3][i] = m1[2][i] - m1[3][i];
473         m2[4][i] = m1[4][i] + m1[5][i];
474         m2[5][i] = m1[4][i] - m1[5][i];
475         m2[6][i] = m1[6][i] + m1[7][i];
476         m2[7][i] = m1[6][i] - m1[7][i];
477     }
478 
479     /*===== sad =====*/
480     for(i = 0; i < 8; i++)
481     {
482         for(j = 0; j < 8; j++)
483         {
484             u4_sad += (m2[i][j] > 0 ? m2[i][j] : -m2[i][j]);
485         }
486     }
487     u4_sad = ((u4_sad + 4) >> 3);
488 
489     return u4_sad;
490 }
491 
492 /**
493 *******************************************************************************
494 *
495 * @brief
496 *  Chroma Hadamard Transform of 16x16 block (8-bit input)
497 *
498 * @par Description:
499 *
500 * @param[in] pu1_origin
501 *  UWORD8 pointer to the source block (u or v, interleaved)
502 *
503 * @param[in] src_strd
504 *  WORD32 Source stride
505 *
506 * @param[in] pu1_pred_buf
507 *  UWORD8 pointer to the prediction block (u or v, interleaved)
508 *
509 * @param[in] pred_strd
510 *  WORD32 Pred stride
511 *
512 * @param[in] pi2_dst
513 *  WORD16 pointer to the transform block
514 *
515 * @param[in] dst_strd (u or v, interleaved)
516 *  WORD32 Destination stride
517 *
518 * @returns
519 *  Hadamard SAD
520 *
521 * @remarks
522 *  Not updating the transform destination now. Only returning the SATD
523 *
524 *******************************************************************************
525 */
ihevce_chroma_HAD_16x16_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)526 UWORD32 ihevce_chroma_HAD_16x16_8bit(
527     UWORD8 *pu1_origin,
528     WORD32 src_strd,
529     UWORD8 *pu1_pred_buf,
530     WORD32 pred_strd,
531     WORD16 *pi2_dst,
532     WORD32 dst_strd)
533 {
534     UWORD32 au4_sad[4], u4_result = 0;
535     WORD32 i;
536 
537     for(i = 0; i < 4; i++)
538     {
539         au4_sad[i] = ihevce_chroma_HAD_8x8_8bit(
540             pu1_origin, src_strd, pu1_pred_buf, pred_strd, pi2_dst, dst_strd);
541 
542         if(i == 0 || i == 2)
543         {
544             pu1_origin += 16;
545             pu1_pred_buf += 16;
546         }
547 
548         if(i == 1)
549         {
550             pu1_origin += (8 * src_strd) - 16;
551             pu1_pred_buf += (8 * pred_strd) - 16;
552         }
553 
554         u4_result += au4_sad[i];
555     }
556 
557     return u4_result;
558 }
559