1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*****************************************************************************/
21 /*                                                                           */
22 /*  File Name         : ih264_weighted_pred.c                                */
23 /*                                                                           */
24 /*  Description       : Contains function definitions for weighted           */
25 /*                      prediction functions                                 */
26 /*                                                                           */
27 /*  List of Functions : ih264_default_weighted_pred_luma()                   */
28 /*                      ih264_default_weighted_pred_chroma()                 */
29 /*                      ih264_weighted_pred_luma()                           */
30 /*                      ih264_weighted_pred_chroma()                         */
31 /*                      ih264_weighted_bipred_luma()                         */
32 /*                      ih264_weighted_bipred_chroma()                       */
33 /*                                                                           */
34 /*  Issues / Problems : None                                                 */
35 /*                                                                           */
36 /*  Revision History  :                                                      */
37 /*                                                                           */
38 /*         DD MM YYYY   Author(s)       Changes                              */
39 /*         07 01 2015   Kaushik         Initial version                      */
40 /*                      Senthoor                                             */
41 /*                                                                           */
42 /*****************************************************************************/
43 /*****************************************************************************/
44 /* File Includes                                                             */
45 /*****************************************************************************/
46 
47 /* User include files */
48 #include "ih264_typedefs.h"
49 #include "ih264_macros.h"
50 #include "ih264_platform_macros.h"
51 #include "ih264_weighted_pred.h"
52 
53 /*****************************************************************************/
54 /*  Function definitions .                                                   */
55 /*****************************************************************************/
56 /*****************************************************************************/
57 /*                                                                           */
58 /*  Function Name : ih264_default_weighted_pred_luma                         */
59 /*                                                                           */
60 /*  Description   : This function performs the default weighted prediction   */
61 /*                  as described in sec 8.4.2.3.1 titled "Default weighted   */
62 /*                  sample prediction process" for luma. The function gets   */
63 /*                  two ht x wd blocks, calculates their rounded-average and */
64 /*                  stores it in the destination block. (ht,wd) can be       */
65 /*                  (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16).   */
66 /*                                                                           */
67 /*  Inputs        : puc_src1  - Pointer to source 1                          */
68 /*                  puc_src2  - Pointer to source 2                          */
69 /*                  puc_dst   - Pointer to destination                       */
70 /*                  src_strd1 - stride for source 1                          */
71 /*                  src_strd1 - stride for source 2                          */
72 /*                  dst_strd  - stride for destination                       */
73 /*                  ht        - height of the block                          */
74 /*                  wd        - width of the block                           */
75 /*                                                                           */
76 /*  Issues        : None                                                     */
77 /*                                                                           */
78 /*  Revision History:                                                        */
79 /*                                                                           */
80 /*         DD MM YYYY   Author(s)       Changes                              */
81 /*         07 01 2015   Kaushik         Initial Version                      */
82 /*                      Senthoor                                             */
83 /*                                                                           */
84 /*****************************************************************************/
ih264_default_weighted_pred_luma(UWORD8 * pu1_src1,UWORD8 * pu1_src2,UWORD8 * pu1_dst,WORD32 src_strd1,WORD32 src_strd2,WORD32 dst_strd,WORD32 ht,WORD32 wd)85 void ih264_default_weighted_pred_luma(UWORD8 *pu1_src1,
86                                       UWORD8 *pu1_src2,
87                                       UWORD8 *pu1_dst,
88                                       WORD32 src_strd1,
89                                       WORD32 src_strd2,
90                                       WORD32 dst_strd,
91                                       WORD32 ht,
92                                       WORD32 wd)
93 {
94     WORD32 i, j;
95 
96     src_strd1 -= wd;
97     src_strd2 -= wd;
98     dst_strd -= wd;
99 
100     for(i = 0; i < ht; i++)
101     {
102         for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
103             *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;
104 
105         pu1_src1 += src_strd1;
106         pu1_src2 += src_strd2;
107         pu1_dst += dst_strd;
108     }
109 }
110 
111 /*****************************************************************************/
112 /*                                                                           */
113 /*  Function Name : ih264_default_weighted_pred_chroma                       */
114 /*                                                                           */
115 /*  Description   : This function performs the default weighted prediction   */
116 /*                  as described in sec 8.4.2.3.1 titled "Default weighted   */
117 /*                  sample prediction process" for chroma. The function gets */
118 /*                  two ht x wd blocks, calculates their rounded-average and */
119 /*                  stores it in the destination block. (ht,wd) can be       */
120 /*                  (2,2), (4,2) , (2,4), (4,4), (8,4), (4,8) or (8,8).      */
121 /*                                                                           */
122 /*  Inputs        : puc_src1  - Pointer to source 1                          */
123 /*                  puc_src2  - Pointer to source 2                          */
124 /*                  puc_dst   - Pointer to destination                       */
125 /*                  src_strd1 - stride for source 1                          */
126 /*                  src_strd1 - stride for source 2                          */
127 /*                  dst_strd  - stride for destination                       */
128 /*                  ht        - height of the block                          */
129 /*                  wd        - width of the block                           */
130 /*                                                                           */
131 /*  Issues        : None                                                     */
132 /*                                                                           */
133 /*  Revision History:                                                        */
134 /*                                                                           */
135 /*         DD MM YYYY   Author(s)       Changes                              */
136 /*         07 01 2015   Kaushik         Initial Version                      */
137 /*                      Senthoor                                             */
138 /*                                                                           */
139 /*****************************************************************************/
ih264_default_weighted_pred_chroma(UWORD8 * pu1_src1,UWORD8 * pu1_src2,UWORD8 * pu1_dst,WORD32 src_strd1,WORD32 src_strd2,WORD32 dst_strd,WORD32 ht,WORD32 wd)140 void ih264_default_weighted_pred_chroma(UWORD8 *pu1_src1,
141                                         UWORD8 *pu1_src2,
142                                         UWORD8 *pu1_dst,
143                                         WORD32 src_strd1,
144                                         WORD32 src_strd2,
145                                         WORD32 dst_strd,
146                                         WORD32 ht,
147                                         WORD32 wd)
148 {
149     WORD32 i, j;
150 
151     wd = wd << 1;
152 
153     src_strd1 -= wd;
154     src_strd2 -= wd;
155     dst_strd -= wd;
156 
157     for(i = 0; i < ht; i++)
158     {
159         for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
160             *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;
161 
162         pu1_src1 += src_strd1;
163         pu1_src2 += src_strd2;
164         pu1_dst += dst_strd;
165     }
166 }
167 
168 /*****************************************************************************/
169 /*                                                                           */
170 /*  Function Name : ih264_weighted_pred_luma                                 */
171 /*                                                                           */
172 /*  Description   : This function performs the weighted prediction as        */
173 /*                  described in sec 8.4.2.3.2 titled "Weighted sample       */
174 /*                  prediction process" for luma. The function gets one      */
175 /*                  ht x wd block, weights it, rounds it off, offsets it,    */
176 /*                  saturates it to unsigned 8-bit and stores it in the      */
177 /*                  destination block. (ht,wd) can be (4,4), (8,4), (4,8),   */
178 /*                  (8,8), (16,8), (8,16) or (16,16).                        */
179 /*                                                                           */
180 /*  Inputs        : puc_src  - Pointer to source                             */
181 /*                  puc_dst  - Pointer to destination                        */
182 /*                  src_strd - stride for source                             */
183 /*                  dst_strd - stride for destination                        */
184 /*                  log_wd   - number of bits to be rounded off              */
185 /*                  wt       - weight value                                  */
186 /*                  ofst     - offset value                                  */
187 /*                  ht       - height of the block                           */
188 /*                  wd       - width of the block                            */
189 /*                                                                           */
190 /*  Issues        : None                                                     */
191 /*                                                                           */
192 /*  Revision History:                                                        */
193 /*                                                                           */
194 /*         DD MM YYYY   Author(s)       Changes                              */
195 /*         07 01 2015   Kaushik         Initial Version                      */
196 /*                      Senthoor                                             */
197 /*                                                                           */
198 /*****************************************************************************/
ih264_weighted_pred_luma(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 log_wd,WORD32 wt,WORD32 ofst,WORD32 ht,WORD32 wd)199 void ih264_weighted_pred_luma(UWORD8 *pu1_src,
200                               UWORD8 *pu1_dst,
201                               WORD32 src_strd,
202                               WORD32 dst_strd,
203                               WORD32 log_wd,
204                               WORD32 wt,
205                               WORD32 ofst,
206                               WORD32 ht,
207                               WORD32 wd)
208 {
209     WORD32 i, j;
210 
211     wt = (WORD16)(wt & 0xffff);
212     ofst = (WORD8)(ofst & 0xff);
213 
214     src_strd -= wd;
215     dst_strd -= wd;
216 
217     if(log_wd >= 1)
218     {
219         WORD32 i_ofst = (1 << (log_wd - 1)) + (ofst << log_wd);
220         for(i = 0; i < ht; i++)
221         {
222             for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
223                 *pu1_dst = CLIP_U8((wt * (*pu1_src) + i_ofst) >> log_wd);
224 
225             pu1_src += src_strd;
226             pu1_dst += dst_strd;
227         }
228     }
229     else
230     {
231         for(i = 0; i < ht; i++)
232         {
233             for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
234                 *pu1_dst = CLIP_U8(wt * (*pu1_src) + ofst);
235 
236             pu1_src += src_strd;
237             pu1_dst += dst_strd;
238         }
239     }
240 }
241 
242 /*****************************************************************************/
243 /*                                                                           */
244 /*  Function Name : ih264_weighted_pred_chroma                               */
245 /*                                                                           */
246 /*  Description   : This function performs the weighted prediction as        */
247 /*                  described in sec 8.4.2.3.2 titled "Weighted sample       */
248 /*                  prediction process" for chroma. The function gets one    */
249 /*                  ht x wd block, weights it, rounds it off, offsets it,    */
250 /*                  saturates it to unsigned 8-bit and stores it in the      */
251 /*                  destination block. (ht,wd) can be (2,2), (4,2), (2,4),   */
252 /*                  (4,4), (8,4), (4,8) or (8,8).                            */
253 /*                                                                           */
254 /*  Inputs        : puc_src  - Pointer to source                             */
255 /*                  puc_dst  - Pointer to destination                        */
256 /*                  src_strd - stride for source                             */
257 /*                  dst_strd - stride for destination                        */
258 /*                  log_wd   - number of bits to be rounded off              */
259 /*                  wt       - weight values for u and v                     */
260 /*                  ofst     - offset values for u and v                     */
261 /*                  ht       - height of the block                           */
262 /*                  wd       - width of the block                            */
263 /*                                                                           */
264 /*  Issues        : None                                                     */
265 /*                                                                           */
266 /*  Revision History:                                                        */
267 /*                                                                           */
268 /*         DD MM YYYY   Author(s)       Changes                              */
269 /*         07 01 2015   Kaushik         Initial Version                      */
270 /*                      Senthoor                                             */
271 /*                                                                           */
272 /*****************************************************************************/
ih264_weighted_pred_chroma(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 log_wd,WORD32 wt,WORD32 ofst,WORD32 ht,WORD32 wd)273 void ih264_weighted_pred_chroma(UWORD8 *pu1_src,
274                                 UWORD8 *pu1_dst,
275                                 WORD32 src_strd,
276                                 WORD32 dst_strd,
277                                 WORD32 log_wd,
278                                 WORD32 wt,
279                                 WORD32 ofst,
280                                 WORD32 ht,
281                                 WORD32 wd)
282 {
283     WORD32 i, j;
284     WORD32 wt_u, wt_v;
285     WORD32 ofst_u, ofst_v;
286 
287     wt_u = (WORD16)(wt & 0xffff);
288     wt_v = (WORD16)(wt >> 16);
289 
290     ofst_u = (WORD8)(ofst & 0xff);
291     ofst_v = (WORD8)(ofst >> 8);
292 
293     src_strd -= wd << 1;
294     dst_strd -= wd << 1;
295 
296     if(log_wd >= 1)
297     {
298         ofst_u = (1 << (log_wd - 1)) + (ofst_u << log_wd);
299         ofst_v = (1 << (log_wd - 1)) + (ofst_v << log_wd);
300 
301         for(i = 0; i < ht; i++)
302         {
303             for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
304             {
305                 *pu1_dst = CLIP_U8((wt_u * (*pu1_src) + ofst_u) >> log_wd);
306                 pu1_src++;
307                 pu1_dst++;
308                 *pu1_dst = CLIP_U8((wt_v * (*pu1_src) + ofst_v) >> log_wd);
309             }
310             pu1_src += src_strd;
311             pu1_dst += dst_strd;
312         }
313     }
314     else
315     {
316         for(i = 0; i < ht; i++)
317         {
318             for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
319             {
320                 *pu1_dst = CLIP_U8(wt_u * (*pu1_src) + ofst_u);
321                 pu1_src++;
322                 pu1_dst++;
323                 *pu1_dst = CLIP_U8(wt_v * (*pu1_src) + ofst_v);
324             }
325             pu1_src += src_strd;
326             pu1_dst += dst_strd;
327         }
328     }
329 }
330 
331 /*****************************************************************************/
332 /*                                                                           */
333 /*  Function Name : ih264_weighted_bi_pred_luma                              */
334 /*                                                                           */
335 /*  Description   : This function performs the weighted biprediction as      */
336 /*                  described in sec 8.4.2.3.2 titled "Weighted sample       */
337 /*                  prediction process" for luma. The function gets two      */
338 /*                  ht x wd blocks, weights them, adds them, rounds off the  */
339 /*                  sum, offsets it, saturates it to unsigned 8-bit and      */
340 /*                  stores it in the destination block. (ht,wd) can be       */
341 /*                  (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16).   */
342 /*                                                                           */
343 /*  Inputs        : puc_src1  - Pointer to source 1                          */
344 /*                  puc_src2  - Pointer to source 2                          */
345 /*                  puc_dst   - Pointer to destination                       */
346 /*                  src_strd1 - stride for source 1                          */
347 /*                  src_strd2 - stride for source 2                          */
348 /*                  dst_strd2 - stride for destination                       */
349 /*                  log_wd    - number of bits to be rounded off             */
350 /*                  wt1       - weight value for source 1                    */
351 /*                  wt2       - weight value for source 2                    */
352 /*                  ofst1     - offset value for source 1                    */
353 /*                  ofst2     - offset value for source 2                    */
354 /*                  ht        - height of the block                          */
355 /*                  wd        - width of the block                           */
356 /*                                                                           */
357 /*  Issues        : None                                                     */
358 /*                                                                           */
359 /*  Revision History:                                                        */
360 /*                                                                           */
361 /*         DD MM YYYY   Author(s)       Changes                              */
362 /*         07 01 2015   Kaushik         Initial Version                      */
363 /*                      Senthoor                                             */
364 /*                                                                           */
365 /*****************************************************************************/
ih264_weighted_bi_pred_luma(UWORD8 * pu1_src1,UWORD8 * pu1_src2,UWORD8 * pu1_dst,WORD32 src_strd1,WORD32 src_strd2,WORD32 dst_strd,WORD32 log_wd,WORD32 wt1,WORD32 wt2,WORD32 ofst1,WORD32 ofst2,WORD32 ht,WORD32 wd)366 void ih264_weighted_bi_pred_luma(UWORD8 *pu1_src1,
367                                  UWORD8 *pu1_src2,
368                                  UWORD8 *pu1_dst,
369                                  WORD32 src_strd1,
370                                  WORD32 src_strd2,
371                                  WORD32 dst_strd,
372                                  WORD32 log_wd,
373                                  WORD32 wt1,
374                                  WORD32 wt2,
375                                  WORD32 ofst1,
376                                  WORD32 ofst2,
377                                  WORD32 ht,
378                                  WORD32 wd)
379 {
380     WORD32 i, j;
381     WORD32 shft, ofst;
382 
383     ofst1 = (WORD8)(ofst1 & 0xff);
384     ofst2 = (WORD8)(ofst2 & 0xff);
385     wt1 = (WORD16)(wt1 & 0xffff);
386     wt2 = (WORD16)(wt2 & 0xffff);
387     ofst = (ofst1 + ofst2 + 1) >> 1;
388 
389     shft = log_wd + 1;
390     ofst = (1 << log_wd) + (ofst << shft);
391 
392     src_strd1 -= wd;
393     src_strd2 -= wd;
394     dst_strd -= wd;
395 
396     for(i = 0; i < ht; i++)
397     {
398         for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
399             *pu1_dst = CLIP_U8((wt1 * (*pu1_src1) + wt2 * (*pu1_src2) + ofst) >> shft);
400 
401         pu1_src1 += src_strd1;
402         pu1_src2 += src_strd2;
403         pu1_dst += dst_strd;
404     }
405 }
406 
407 /*****************************************************************************/
408 /*                                                                           */
409 /*  Function Name : ih264_weighted_bi_pred_chroma                            */
410 /*                                                                           */
411 /*  Description   : This function performs the weighted biprediction as      */
412 /*                  described in sec 8.4.2.3.2 titled "Weighted sample       */
413 /*                  prediction process" for chroma. The function gets two    */
414 /*                  ht x wd blocks, weights them, adds them, rounds off the  */
415 /*                  sum, offsets it, saturates it to unsigned 8-bit and      */
416 /*                  stores it in the destination block. (ht,wd) can be       */
417 /*                  (2,2), (4,2), (2,4), (4,4), (8,4), (4,8) or (8,8).       */
418 /*                                                                           */
419 /*  Inputs        : puc_src1  - Pointer to source 1                          */
420 /*                  puc_src2  - Pointer to source 2                          */
421 /*                  puc_dst   - Pointer to destination                       */
422 /*                  src_strd1 - stride for source 1                          */
423 /*                  src_strd2 - stride for source 2                          */
424 /*                  dst_strd2 - stride for destination                       */
425 /*                  log_wd    - number of bits to be rounded off             */
426 /*                  wt1       - weight values for u and v in source 1        */
427 /*                  wt2       - weight values for u and v in source 2        */
428 /*                  ofst1     - offset value for u and v in source 1         */
429 /*                  ofst2     - offset value for u and v in source 2         */
430 /*                  ht        - height of the block                          */
431 /*                  wd        - width of the block                           */
432 /*                                                                           */
433 /*  Issues        : None                                                     */
434 /*                                                                           */
435 /*  Revision History:                                                        */
436 /*                                                                           */
437 /*         DD MM YYYY   Author(s)       Changes                              */
438 /*         07 01 2015   Kaushik         Initial Version                      */
439 /*                      Senthoor                                             */
440 /*                                                                           */
441 /*****************************************************************************/
ih264_weighted_bi_pred_chroma(UWORD8 * pu1_src1,UWORD8 * pu1_src2,UWORD8 * pu1_dst,WORD32 src_strd1,WORD32 src_strd2,WORD32 dst_strd,WORD32 log_wd,WORD32 wt1,WORD32 wt2,WORD32 ofst1,WORD32 ofst2,WORD32 ht,WORD32 wd)442 void ih264_weighted_bi_pred_chroma(UWORD8 *pu1_src1,
443                                    UWORD8 *pu1_src2,
444                                    UWORD8 *pu1_dst,
445                                    WORD32 src_strd1,
446                                    WORD32 src_strd2,
447                                    WORD32 dst_strd,
448                                    WORD32 log_wd,
449                                    WORD32 wt1,
450                                    WORD32 wt2,
451                                    WORD32 ofst1,
452                                    WORD32 ofst2,
453                                    WORD32 ht,
454                                    WORD32 wd)
455 {
456     WORD32 i, j;
457     WORD32 wt1_u, wt1_v, wt2_u, wt2_v;
458     WORD32 ofst1_u, ofst1_v, ofst2_u, ofst2_v;
459     WORD32 ofst_u, ofst_v;
460     WORD32 shft;
461 
462     ofst1_u = (WORD8)(ofst1 & 0xff);
463     ofst1_v = (WORD8)(ofst1 >> 8);
464     ofst2_u = (WORD8)(ofst2 & 0xff);
465     ofst2_v = (WORD8)(ofst2 >> 8);
466     wt1_u = (WORD16)(wt1 & 0xffff);
467     wt1_v = (WORD16)(wt1 >> 16);
468     wt2_u = (WORD16)(wt2 & 0xffff);
469     wt2_v = (WORD16)(wt2 >> 16);
470     ofst_u = (ofst1_u + ofst2_u + 1) >> 1;
471     ofst_v = (ofst1_v + ofst2_v + 1) >> 1;
472 
473     src_strd1 -= wd << 1;
474     src_strd2 -= wd << 1;
475     dst_strd -= wd << 1;
476 
477     shft = log_wd + 1;
478     ofst_u = (1 << log_wd) + (ofst_u << shft);
479     ofst_v = (1 << log_wd) + (ofst_v << shft);
480 
481     for(i = 0; i < ht; i++)
482     {
483         for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
484         {
485             *pu1_dst = CLIP_U8((wt1_u * (*pu1_src1) + wt2_u * (*pu1_src2) + ofst_u) >> shft);
486             pu1_src1++;
487             pu1_src2++;
488             pu1_dst++;
489             *pu1_dst = CLIP_U8((wt1_v * (*pu1_src1) + wt2_v * (*pu1_src2) + ofst_v) >> shft);
490         }
491         pu1_src1 += src_strd1;
492         pu1_src2 += src_strd2;
493         pu1_dst += dst_strd;
494     }
495 }
496