1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 *  ihevcd_fmt_conv.c
22 *
23 * @brief
24 *  Contains functions for format conversion or frame copy of output buffer
25 *
26 * @author
27 *  Harish
28 *
29 * @par List of Functions:
30 *
31 * @remarks
32 *  None
33 *
34 *******************************************************************************
35 */
36 /*****************************************************************************/
37 /* File Includes                                                             */
38 /*****************************************************************************/
39 #include <stdio.h>
40 #include <stddef.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <assert.h>
44 
45 #include "ihevc_typedefs.h"
46 #include "iv.h"
47 #include "ivd.h"
48 #include "ihevcd_cxa.h"
49 #include "ithread.h"
50 
51 #include "ihevc_defs.h"
52 #include "ihevc_debug.h"
53 #include "ihevc_structs.h"
54 #include "ihevc_macros.h"
55 #include "ihevc_platform_macros.h"
56 #include "ihevc_cabac_tables.h"
57 #include "ihevc_disp_mgr.h"
58 
59 #include "ihevcd_defs.h"
60 #include "ihevcd_function_selector.h"
61 #include "ihevcd_structs.h"
62 #include "ihevcd_error.h"
63 #include "ihevcd_nal.h"
64 #include "ihevcd_bitstream.h"
65 #include "ihevcd_fmt_conv.h"
66 #include "ihevcd_profile.h"
67 
68 /**
69 *******************************************************************************
70 *
71 * @brief Function used from copying a 420SP buffer
72 *
73 * @par   Description
74 * Function used from copying a 420SP buffer
75 *
76 * @param[in] pu1_y_src
77 *   Input Y pointer
78 *
79 * @param[in] pu1_uv_src
80 *   Input UV pointer (UV is interleaved either in UV or VU format)
81 *
82 * @param[in] pu1_y_dst
83 *   Output Y pointer
84 *
85 * @param[in] pu1_uv_dst
86 *   Output UV pointer (UV is interleaved in the same format as that of input)
87 *
88 * @param[in] wd
89 *   Width
90 *
91 * @param[in] ht
92 *   Height
93 *
94 * @param[in] src_y_strd
95 *   Input Y Stride
96 *
97 * @param[in] src_uv_strd
98 *   Input UV stride
99 *
100 * @param[in] dst_y_strd
101 *   Output Y stride
102 *
103 * @param[in] dst_uv_strd
104 *   Output UV stride
105 *
106 * @returns None
107 *
108 * @remarks In case there is a need to perform partial frame copy then
109 * by passion appropriate source and destination pointers and appropriate
110 * values for wd and ht it can be done
111 *
112 *******************************************************************************
113 */
ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD16 * pu2_rgb_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)114 void ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
115                                      UWORD8 *pu1_uv_src,
116                                      UWORD16 *pu2_rgb_dst,
117                                      WORD32 wd,
118                                      WORD32 ht,
119                                      WORD32 src_y_strd,
120                                      WORD32 src_uv_strd,
121                                      WORD32 dst_strd,
122                                      WORD32 is_u_first)
123 {
124 
125 
126     WORD16  i2_r, i2_g, i2_b;
127     UWORD32  u4_r, u4_g, u4_b;
128     WORD16  i2_i, i2_j;
129     UWORD8  *pu1_y_src_nxt;
130     UWORD16 *pu2_rgb_dst_NextRow;
131 
132     UWORD8 *pu1_u_src, *pu1_v_src;
133 
134     if(is_u_first)
135     {
136         pu1_u_src = (UWORD8 *)pu1_uv_src;
137         pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
138     }
139     else
140     {
141         pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
142         pu1_v_src = (UWORD8 *)pu1_uv_src;
143     }
144 
145     pu1_y_src_nxt   = pu1_y_src + src_y_strd;
146     pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
147 
148     for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
149     {
150         for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
151         {
152             i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
153             i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
154             i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
155 
156             pu1_u_src += 2;
157             pu1_v_src += 2;
158             /* pixel 0 */
159             /* B */
160             u4_b = CLIP_U8(*pu1_y_src + i2_b);
161             u4_b >>= 3;
162             /* G */
163             u4_g = CLIP_U8(*pu1_y_src + i2_g);
164             u4_g >>= 2;
165             /* R */
166             u4_r = CLIP_U8(*pu1_y_src + i2_r);
167             u4_r >>= 3;
168 
169             pu1_y_src++;
170             *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
171 
172             /* pixel 1 */
173             /* B */
174             u4_b = CLIP_U8(*pu1_y_src + i2_b);
175             u4_b >>= 3;
176             /* G */
177             u4_g = CLIP_U8(*pu1_y_src + i2_g);
178             u4_g >>= 2;
179             /* R */
180             u4_r = CLIP_U8(*pu1_y_src + i2_r);
181             u4_r >>= 3;
182 
183             pu1_y_src++;
184             *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
185 
186             /* pixel 2 */
187             /* B */
188             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
189             u4_b >>= 3;
190             /* G */
191             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
192             u4_g >>= 2;
193             /* R */
194             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
195             u4_r >>= 3;
196 
197             pu1_y_src_nxt++;
198             *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
199 
200             /* pixel 3 */
201             /* B */
202             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
203             u4_b >>= 3;
204             /* G */
205             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
206             u4_g >>= 2;
207             /* R */
208             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
209             u4_r >>= 3;
210 
211             pu1_y_src_nxt++;
212             *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
213 
214         }
215 
216         pu1_u_src = pu1_u_src + src_uv_strd - wd;
217         pu1_v_src = pu1_v_src + src_uv_strd - wd;
218 
219         pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
220         pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
221 
222         pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
223         pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
224     }
225 
226 
227 }
228 
ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD32 * pu4_rgba_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)229 void ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
230                                        UWORD8 *pu1_uv_src,
231                                        UWORD32 *pu4_rgba_dst,
232                                        WORD32 wd,
233                                        WORD32 ht,
234                                        WORD32 src_y_strd,
235                                        WORD32 src_uv_strd,
236                                        WORD32 dst_strd,
237                                        WORD32 is_u_first)
238 {
239 
240 
241     WORD16  i2_r, i2_g, i2_b;
242     UWORD32  u4_r, u4_g, u4_b;
243     WORD16  i2_i, i2_j;
244     UWORD8  *pu1_y_src_nxt;
245     UWORD32 *pu4_rgba_dst_NextRow;
246 
247     UWORD8 *pu1_u_src, *pu1_v_src;
248 
249     if(is_u_first)
250     {
251         pu1_u_src = (UWORD8 *)pu1_uv_src;
252         pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
253     }
254     else
255     {
256         pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
257         pu1_v_src = (UWORD8 *)pu1_uv_src;
258     }
259 
260     pu1_y_src_nxt   = pu1_y_src + src_y_strd;
261     pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
262 
263     for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
264     {
265         for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
266         {
267             i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
268             i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
269             i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
270 
271             pu1_u_src += 2;
272             pu1_v_src += 2;
273             /* pixel 0 */
274             /* B */
275             u4_b = CLIP_U8(*pu1_y_src + i2_b);
276             /* G */
277             u4_g = CLIP_U8(*pu1_y_src + i2_g);
278             /* R */
279             u4_r = CLIP_U8(*pu1_y_src + i2_r);
280 
281             pu1_y_src++;
282             *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
283 
284             /* pixel 1 */
285             /* B */
286             u4_b = CLIP_U8(*pu1_y_src + i2_b);
287             /* G */
288             u4_g = CLIP_U8(*pu1_y_src + i2_g);
289             /* R */
290             u4_r = CLIP_U8(*pu1_y_src + i2_r);
291 
292             pu1_y_src++;
293             *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
294 
295             /* pixel 2 */
296             /* B */
297             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
298             /* G */
299             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
300             /* R */
301             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
302 
303             pu1_y_src_nxt++;
304             *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
305 
306             /* pixel 3 */
307             /* B */
308             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
309             /* G */
310             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
311             /* R */
312             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
313 
314             pu1_y_src_nxt++;
315             *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
316 
317         }
318 
319         pu1_u_src = pu1_u_src + src_uv_strd - wd;
320         pu1_v_src = pu1_v_src + src_uv_strd - wd;
321 
322         pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
323         pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
324 
325         pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
326         pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
327     }
328 
329 
330 }
331 
332 /**
333 *******************************************************************************
334 *
335 * @brief Function used from copying a 420SP buffer
336 *
337 * @par   Description
338 * Function used from copying a 420SP buffer
339 *
340 * @param[in] pu1_y_src
341 *   Input Y pointer
342 *
343 * @param[in] pu1_uv_src
344 *   Input UV pointer (UV is interleaved either in UV or VU format)
345 *
346 * @param[in] pu1_y_dst
347 *   Output Y pointer
348 *
349 * @param[in] pu1_uv_dst
350 *   Output UV pointer (UV is interleaved in the same format as that of input)
351 *
352 * @param[in] wd
353 *   Width
354 *
355 * @param[in] ht
356 *   Height
357 *
358 * @param[in] src_y_strd
359 *   Input Y Stride
360 *
361 * @param[in] src_uv_strd
362 *   Input UV stride
363 *
364 * @param[in] dst_y_strd
365 *   Output Y stride
366 *
367 * @param[in] dst_uv_strd
368 *   Output UV stride
369 *
370 * @returns None
371 *
372 * @remarks In case there is a need to perform partial frame copy then
373 * by passion appropriate source and destination pointers and appropriate
374 * values for wd and ht it can be done
375 *
376 *******************************************************************************
377 */
378 
ihevcd_fmt_conv_420sp_to_420sp(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)379 void ihevcd_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
380                                     UWORD8 *pu1_uv_src,
381                                     UWORD8 *pu1_y_dst,
382                                     UWORD8 *pu1_uv_dst,
383                                     WORD32 wd,
384                                     WORD32 ht,
385                                     WORD32 src_y_strd,
386                                     WORD32 src_uv_strd,
387                                     WORD32 dst_y_strd,
388                                     WORD32 dst_uv_strd)
389 {
390     UWORD8 *pu1_src, *pu1_dst;
391     WORD32 num_rows, num_cols, src_strd, dst_strd;
392     WORD32 i;
393 
394     /* copy luma */
395     pu1_src = (UWORD8 *)pu1_y_src;
396     pu1_dst = (UWORD8 *)pu1_y_dst;
397 
398     num_rows = ht;
399     num_cols = wd;
400 
401     src_strd = src_y_strd;
402     dst_strd = dst_y_strd;
403 
404     for(i = 0; i < num_rows; i++)
405     {
406         memcpy(pu1_dst, pu1_src, num_cols);
407         pu1_dst += dst_strd;
408         pu1_src += src_strd;
409     }
410 
411     /* copy U and V */
412     pu1_src = (UWORD8 *)pu1_uv_src;
413     pu1_dst = (UWORD8 *)pu1_uv_dst;
414 
415     num_rows = ht >> 1;
416     num_cols = wd;
417 
418     src_strd = src_uv_strd;
419     dst_strd = dst_uv_strd;
420 
421     for(i = 0; i < num_rows; i++)
422     {
423         memcpy(pu1_dst, pu1_src, num_cols);
424         pu1_dst += dst_strd;
425         pu1_src += src_strd;
426     }
427     return;
428 }
429 
430 
431 
432 /**
433 *******************************************************************************
434 *
435 * @brief Function used from copying a 420SP buffer
436 *
437 * @par   Description
438 * Function used from copying a 420SP buffer
439 *
440 * @param[in] pu1_y_src
441 *   Input Y pointer
442 *
443 * @param[in] pu1_uv_src
444 *   Input UV pointer (UV is interleaved either in UV or VU format)
445 *
446 * @param[in] pu1_y_dst
447 *   Output Y pointer
448 *
449 * @param[in] pu1_uv_dst
450 *   Output UV pointer (UV is interleaved in the same format as that of input)
451 *
452 * @param[in] wd
453 *   Width
454 *
455 * @param[in] ht
456 *   Height
457 *
458 * @param[in] src_y_strd
459 *   Input Y Stride
460 *
461 * @param[in] src_uv_strd
462 *   Input UV stride
463 *
464 * @param[in] dst_y_strd
465 *   Output Y stride
466 *
467 * @param[in] dst_uv_strd
468 *   Output UV stride
469 *
470 * @returns None
471 *
472 * @remarks In case there is a need to perform partial frame copy then
473 * by passion appropriate source and destination pointers and appropriate
474 * values for wd and ht it can be done
475 *
476 *******************************************************************************
477 */
ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)478 void ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
479                                             UWORD8 *pu1_uv_src,
480                                             UWORD8 *pu1_y_dst,
481                                             UWORD8 *pu1_uv_dst,
482                                             WORD32 wd,
483                                             WORD32 ht,
484                                             WORD32 src_y_strd,
485                                             WORD32 src_uv_strd,
486                                             WORD32 dst_y_strd,
487                                             WORD32 dst_uv_strd)
488 {
489     UWORD8 *pu1_src, *pu1_dst;
490     WORD32 num_rows, num_cols, src_strd, dst_strd;
491     WORD32 i;
492 
493     /* copy luma */
494     pu1_src = (UWORD8 *)pu1_y_src;
495     pu1_dst = (UWORD8 *)pu1_y_dst;
496 
497     num_rows = ht;
498     num_cols = wd;
499 
500     src_strd = src_y_strd;
501     dst_strd = dst_y_strd;
502 
503     for(i = 0; i < num_rows; i++)
504     {
505         memcpy(pu1_dst, pu1_src, num_cols);
506         pu1_dst += dst_strd;
507         pu1_src += src_strd;
508     }
509 
510     /* copy U and V */
511     pu1_src = (UWORD8 *)pu1_uv_src;
512     pu1_dst = (UWORD8 *)pu1_uv_dst;
513 
514     num_rows = ht >> 1;
515     num_cols = wd;
516 
517     src_strd = src_uv_strd;
518     dst_strd = dst_uv_strd;
519 
520     for(i = 0; i < num_rows; i++)
521     {
522         WORD32 j;
523         for(j = 0; j < num_cols; j += 2)
524         {
525             pu1_dst[j + 0] = pu1_src[j + 1];
526             pu1_dst[j + 1] = pu1_src[j + 0];
527         }
528         pu1_dst += dst_strd;
529         pu1_src += src_strd;
530     }
531     return;
532 }
533 /**
534 *******************************************************************************
535 *
536 * @brief Function used from copying a 420SP buffer
537 *
538 * @par   Description
539 * Function used from copying a 420SP buffer
540 *
541 * @param[in] pu1_y_src
542 *   Input Y pointer
543 *
544 * @param[in] pu1_uv_src
545 *   Input UV pointer (UV is interleaved either in UV or VU format)
546 *
547 * @param[in] pu1_y_dst
548 *   Output Y pointer
549 *
550 * @param[in] pu1_u_dst
551 *   Output U pointer
552 *
553 * @param[in] pu1_v_dst
554 *   Output V pointer
555 *
556 * @param[in] wd
557 *   Width
558 *
559 * @param[in] ht
560 *   Height
561 *
562 * @param[in] src_y_strd
563 *   Input Y Stride
564 *
565 * @param[in] src_uv_strd
566 *   Input UV stride
567 *
568 * @param[in] dst_y_strd
569 *   Output Y stride
570 *
571 * @param[in] dst_uv_strd
572 *   Output UV stride
573 *
574 * @param[in] is_u_first
575 *   Flag to indicate if U is the first byte in input chroma part
576 *
577 * @returns none
578 *
579 * @remarks In case there is a need to perform partial frame copy then
580 * by passion appropriate source and destination pointers and appropriate
581 * values for wd and ht it can be done
582 *
583 *******************************************************************************
584 */
585 
586 
ihevcd_fmt_conv_420sp_to_420p(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd,WORD32 is_u_first,WORD32 disable_luma_copy)587 void ihevcd_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
588                                    UWORD8 *pu1_uv_src,
589                                    UWORD8 *pu1_y_dst,
590                                    UWORD8 *pu1_u_dst,
591                                    UWORD8 *pu1_v_dst,
592                                    WORD32 wd,
593                                    WORD32 ht,
594                                    WORD32 src_y_strd,
595                                    WORD32 src_uv_strd,
596                                    WORD32 dst_y_strd,
597                                    WORD32 dst_uv_strd,
598                                    WORD32 is_u_first,
599                                    WORD32 disable_luma_copy)
600 {
601     UWORD8 *pu1_src, *pu1_dst;
602     UWORD8 *pu1_u_src, *pu1_v_src;
603     WORD32 num_rows, num_cols, src_strd, dst_strd;
604     WORD32 i, j;
605 
606     if(0 == disable_luma_copy)
607     {
608         /* copy luma */
609         pu1_src = (UWORD8 *)pu1_y_src;
610         pu1_dst = (UWORD8 *)pu1_y_dst;
611 
612         num_rows = ht;
613         num_cols = wd;
614 
615         src_strd = src_y_strd;
616         dst_strd = dst_y_strd;
617 
618         for(i = 0; i < num_rows; i++)
619         {
620             memcpy(pu1_dst, pu1_src, num_cols);
621             pu1_dst += dst_strd;
622             pu1_src += src_strd;
623         }
624     }
625     /* de-interleave U and V and copy to destination */
626     if(is_u_first)
627     {
628         pu1_u_src = (UWORD8 *)pu1_uv_src;
629         pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
630     }
631     else
632     {
633         pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
634         pu1_v_src = (UWORD8 *)pu1_uv_src;
635     }
636 
637 
638     num_rows = ht >> 1;
639     num_cols = wd >> 1;
640 
641     src_strd = src_uv_strd;
642     dst_strd = dst_uv_strd;
643 
644     for(i = 0; i < num_rows; i++)
645     {
646         for(j = 0; j < num_cols; j++)
647         {
648             pu1_u_dst[j] = pu1_u_src[j * 2];
649             pu1_v_dst[j] = pu1_v_src[j * 2];
650         }
651 
652         pu1_u_dst += dst_strd;
653         pu1_v_dst += dst_strd;
654         pu1_u_src += src_strd;
655         pu1_v_src += src_strd;
656     }
657     return;
658 }
659 
660 
661 
662 /**
663 *******************************************************************************
664 *
665 * @brief Function used from format conversion or frame copy
666 *
667 * @par   Description
668 * Function used from copying or converting a reference frame to display buffer
669 * in non shared mode
670 *
671 * @param[in] pu1_y_dst
672 *   Output Y pointer
673 *
674 * @param[in] pu1_u_dst
675 *   Output U/UV pointer ( UV is interleaved in the same format as that of input)
676 *
677 * @param[in] pu1_v_dst
678 *   Output V pointer ( used in 420P output case)
679 *
680 * @param[in] blocking
681 *   To indicate whether format conversion should wait till frame is reconstructed
682 *   and then return after complete copy is done. To be set to 1 when called at the
683 *   end of frame processing and set to 0 when called between frame processing modules
684 *   in order to utilize available MCPS
685 *
686 * @returns Error from IHEVCD_ERROR_T
687 *
688 *******************************************************************************
689 */
ihevcd_fmt_conv(codec_t * ps_codec,process_ctxt_t * ps_proc,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,WORD32 cur_row,WORD32 num_rows)690 IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec,
691                                process_ctxt_t *ps_proc,
692                                UWORD8 *pu1_y_dst,
693                                UWORD8 *pu1_u_dst,
694                                UWORD8 *pu1_v_dst,
695                                WORD32 cur_row,
696                                WORD32 num_rows)
697 {
698     IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
699     pic_buf_t *ps_disp_pic;
700     UWORD8 *pu1_y_src, *pu1_uv_src;
701     UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
702     UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
703     UWORD16 *pu2_rgb_dst_tmp;
704     UWORD32 *pu4_rgb_dst_tmp;
705     WORD32 is_u_first;
706     UWORD8 *pu1_luma;
707     UWORD8 *pu1_chroma;
708     sps_t *ps_sps;
709     WORD32 disable_luma_copy;
710     WORD32 crop_unit_x, crop_unit_y;
711 
712     if(0 == num_rows)
713         return ret;
714 
715     /* In case processing is disabled, then no need to format convert/copy */
716     PROFILE_DISABLE_FMT_CONV();
717     ps_sps = ps_proc->ps_sps;
718 
719     crop_unit_x = 1;
720     crop_unit_y = 1;
721 
722     if(CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc)
723     {
724         crop_unit_x = 2;
725         crop_unit_y = 2;
726     }
727 
728     ps_disp_pic = ps_codec->ps_disp_buf;
729     pu1_luma = ps_disp_pic->pu1_luma;
730     pu1_chroma = ps_disp_pic->pu1_chroma;
731 
732 
733     /* Take care of cropping */
734     pu1_luma    += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset * crop_unit_y + ps_sps->i2_pic_crop_left_offset * crop_unit_x;
735 
736     /* Left offset is multiplied by 2 because buffer is UV interleaved */
737     pu1_chroma  += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset + ps_sps->i2_pic_crop_left_offset * 2;
738 
739 
740     is_u_first = (IV_YUV_420SP_UV == ps_codec->e_ref_chroma_fmt) ? 1 : 0;
741 
742     /* In case of 420P output luma copy is disabled for shared mode */
743     disable_luma_copy = 0;
744     if(1 == ps_codec->i4_share_disp_buf)
745     {
746         disable_luma_copy = 1;
747     }
748 
749 
750 
751     {
752         pu1_y_src   = pu1_luma + cur_row * ps_codec->i4_strd;
753         pu1_uv_src  = pu1_chroma + (cur_row / 2) * ps_codec->i4_strd;
754 
755         /* In case of shared mode, with 420P output, get chroma destination */
756         if((1 == ps_codec->i4_share_disp_buf) && (IV_YUV_420P == ps_codec->e_chroma_fmt))
757         {
758             WORD32 i;
759             for(i = 0; i < ps_codec->i4_share_disp_buf_cnt; i++)
760             {
761                 WORD32 diff = ps_disp_pic->pu1_luma - ps_codec->s_disp_buffer[i].pu1_bufs[0];
762                 if(diff == (ps_codec->i4_strd * PAD_TOP + PAD_LEFT))
763                 {
764                     pu1_u_dst = ps_codec->s_disp_buffer[i].pu1_bufs[1];
765                     pu1_u_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
766 
767                     pu1_v_dst = ps_codec->s_disp_buffer[i].pu1_bufs[2];
768                     pu1_v_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
769                     break;
770                 }
771             }
772         }
773         pu2_rgb_dst_tmp  = (UWORD16 *)pu1_y_dst;
774         pu2_rgb_dst_tmp  += cur_row * ps_codec->i4_disp_strd;
775         pu4_rgb_dst_tmp  = (UWORD32 *)pu1_y_dst;
776         pu4_rgb_dst_tmp  += cur_row * ps_codec->i4_disp_strd;
777         pu1_y_dst_tmp  = pu1_y_dst  + cur_row * ps_codec->i4_disp_strd;
778         pu1_uv_dst_tmp = pu1_u_dst  + (cur_row / 2) * ps_codec->i4_disp_strd;
779         pu1_u_dst_tmp = pu1_u_dst  + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
780         pu1_v_dst_tmp = pu1_v_dst  + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
781 
782         /* In case of multi threaded implementation, format conversion might be called
783          * before reconstruction is completed. If the frame being converted/copied
784          * is same as the frame being reconstructed,
785          * Check how many rows can be format converted
786          * Convert those many rows and then check for remaining rows and so on
787          */
788 
789         if((0 == ps_codec->i4_flush_mode) && (ps_codec->i4_disp_buf_id == ps_proc->i4_cur_pic_buf_id) && (1 < ps_codec->i4_num_cores))
790         {
791             WORD32 idx;
792             UWORD8 *pu1_buf;
793             WORD32 status;
794             WORD32 last_row = cur_row + num_rows;
795             WORD32 last_ctb_y;
796             UWORD32 ctb_in_row;
797 
798             while(1)
799             {
800                 last_row = cur_row + MAX(num_rows, (1 << ps_sps->i1_log2_ctb_size)) +
801                                 ps_sps->i2_pic_crop_top_offset * crop_unit_y;
802                 last_ctb_y = (last_row >> ps_sps->i1_log2_ctb_size) - 1;
803                 /* Since deblocking works with a shift of -4, -4 ,wait till next CTB row is processed */
804                 last_ctb_y++;
805                 /* In case of a  conformance window, an extra wait of one row might be needed */
806                 last_ctb_y++;
807                 last_ctb_y = MIN(last_ctb_y, (ps_sps->i2_pic_ht_in_ctb - 1));
808 
809                 idx = (last_ctb_y * ps_sps->i2_pic_wd_in_ctb);
810 
811                 /*Check if the row below is completely processed before proceeding with format conversion*/
812                 status = 1;
813                 for(ctb_in_row = 0; (WORD32)ctb_in_row < ps_sps->i2_pic_wd_in_ctb; ctb_in_row++)
814                 {
815                     pu1_buf = (ps_codec->pu1_proc_map + idx + ctb_in_row);
816                     status &= *pu1_buf;
817                 }
818 
819                 if(status)
820                 {
821                     break;
822                 }
823                 else
824                 {
825                     ithread_yield();
826                 }
827             }
828         }
829 
830 
831         if((IV_YUV_420SP_UV == ps_codec->e_chroma_fmt) || (IV_YUV_420SP_VU == ps_codec->e_chroma_fmt))
832         {
833 
834             ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr(pu1_y_src, pu1_uv_src,
835                                                                           pu1_y_dst_tmp, pu1_uv_dst_tmp,
836                                                                           ps_codec->i4_disp_wd,
837                                                                           num_rows,
838                                                                           ps_codec->i4_strd,
839                                                                           ps_codec->i4_strd,
840                                                                           ps_codec->i4_disp_strd,
841                                                                           ps_codec->i4_disp_strd);
842         }
843         else if(IV_YUV_420P == ps_codec->e_chroma_fmt)
844         {
845 
846             if(0 == disable_luma_copy)
847             {
848                 // copy luma
849                 WORD32 i;
850                 WORD32 num_cols = ps_codec->i4_disp_wd;
851 
852                 for(i = 0; i < num_rows; i++)
853                 {
854                     memcpy(pu1_y_dst_tmp, pu1_y_src, num_cols);
855                     pu1_y_dst_tmp += ps_codec->i4_disp_strd;
856                     pu1_y_src += ps_codec->i4_strd;
857                 }
858 
859                 disable_luma_copy = 1;
860             }
861 
862             ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr(pu1_y_src, pu1_uv_src,
863                                                                          pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp,
864                                                                          ps_codec->i4_disp_wd,
865                                                                          num_rows,
866                                                                          ps_codec->i4_strd,
867                                                                          ps_codec->i4_strd,
868                                                                          ps_codec->i4_disp_strd,
869                                                                          (ps_codec->i4_disp_strd / 2),
870                                                                          is_u_first,
871                                                                          disable_luma_copy);
872 
873         }
874         else if(IV_RGB_565 == ps_codec->e_chroma_fmt)
875         {
876 
877             ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr(pu1_y_src, pu1_uv_src,
878                                                                            pu2_rgb_dst_tmp,
879                                                                            ps_codec->i4_disp_wd,
880                                                                            num_rows,
881                                                                            ps_codec->i4_strd,
882                                                                            ps_codec->i4_strd,
883                                                                            ps_codec->i4_disp_strd,
884                                                                            is_u_first);
885 
886         }
887         else if(IV_RGBA_8888 == ps_codec->e_chroma_fmt)
888         {
889             ASSERT(is_u_first == 1);
890 
891             ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr(pu1_y_src,
892                                                                              pu1_uv_src,
893                                                                              pu4_rgb_dst_tmp,
894                                                                              ps_codec->i4_disp_wd,
895                                                                              num_rows,
896                                                                              ps_codec->i4_strd,
897                                                                              ps_codec->i4_strd,
898                                                                              ps_codec->i4_disp_strd,
899                                                                              is_u_first);
900 
901         }
902 
903 
904 
905     }
906     return (ret);
907 }
908 
909