1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 *  ihevcd_fmt_conv.c
22 *
23 * @brief
24 *  Contains functions for format conversion or frame copy of output buffer
25 *
26 * @author
27 *  Harish
28 *
29 * @par List of Functions:
30 *
31 * @remarks
32 *  None
33 *
34 *******************************************************************************
35 */
36 /*****************************************************************************/
37 /* File Includes                                                             */
38 /*****************************************************************************/
39 #include <stdio.h>
40 #include <stddef.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <assert.h>
44 
45 #include "ihevc_typedefs.h"
46 #include "iv.h"
47 #include "ivd.h"
48 #include "ihevcd_cxa.h"
49 #include "ithread.h"
50 
51 #include "ihevc_defs.h"
52 #include "ihevc_debug.h"
53 #include "ihevc_structs.h"
54 #include "ihevc_macros.h"
55 #include "ihevc_platform_macros.h"
56 #include "ihevc_cabac_tables.h"
57 #include "ihevc_disp_mgr.h"
58 
59 #include "ihevcd_defs.h"
60 #include "ihevcd_function_selector.h"
61 #include "ihevcd_structs.h"
62 #include "ihevcd_error.h"
63 #include "ihevcd_nal.h"
64 #include "ihevcd_bitstream.h"
65 #include "ihevcd_fmt_conv.h"
66 #include "ihevcd_profile.h"
67 
68 /* SIMD variants of format conversion modules do not support width less than 32 */
69 #define MIN_FMT_CONV_SIMD_WIDTH 32
70 /**
71 *******************************************************************************
72 *
73 * @brief Function used from copying a 420SP buffer
74 *
75 * @par   Description
76 * Function used from copying a 420SP buffer
77 *
78 * @param[in] pu1_y_src
79 *   Input Y pointer
80 *
81 * @param[in] pu1_uv_src
82 *   Input UV pointer (UV is interleaved either in UV or VU format)
83 *
84 * @param[in] pu1_y_dst
85 *   Output Y pointer
86 *
87 * @param[in] pu1_uv_dst
88 *   Output UV pointer (UV is interleaved in the same format as that of input)
89 *
90 * @param[in] wd
91 *   Width
92 *
93 * @param[in] ht
94 *   Height
95 *
96 * @param[in] src_y_strd
97 *   Input Y Stride
98 *
99 * @param[in] src_uv_strd
100 *   Input UV stride
101 *
102 * @param[in] dst_y_strd
103 *   Output Y stride
104 *
105 * @param[in] dst_uv_strd
106 *   Output UV stride
107 *
108 * @returns None
109 *
110 * @remarks In case there is a need to perform partial frame copy then
111 * by passion appropriate source and destination pointers and appropriate
112 * values for wd and ht it can be done
113 *
114 *******************************************************************************
115 */
ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD16 * pu2_rgb_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)116 void ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
117                                      UWORD8 *pu1_uv_src,
118                                      UWORD16 *pu2_rgb_dst,
119                                      WORD32 wd,
120                                      WORD32 ht,
121                                      WORD32 src_y_strd,
122                                      WORD32 src_uv_strd,
123                                      WORD32 dst_strd,
124                                      WORD32 is_u_first)
125 {
126 
127 
128     WORD16  i2_r, i2_g, i2_b;
129     UWORD32  u4_r, u4_g, u4_b;
130     WORD16  i2_i, i2_j;
131     UWORD8  *pu1_y_src_nxt;
132     UWORD16 *pu2_rgb_dst_NextRow;
133 
134     UWORD8 *pu1_u_src, *pu1_v_src;
135 
136     if(is_u_first)
137     {
138         pu1_u_src = (UWORD8 *)pu1_uv_src;
139         pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
140     }
141     else
142     {
143         pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
144         pu1_v_src = (UWORD8 *)pu1_uv_src;
145     }
146 
147     pu1_y_src_nxt   = pu1_y_src + src_y_strd;
148     pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
149 
150     for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
151     {
152         for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
153         {
154             i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
155             i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
156             i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
157 
158             pu1_u_src += 2;
159             pu1_v_src += 2;
160             /* pixel 0 */
161             /* B */
162             u4_b = CLIP_U8(*pu1_y_src + i2_b);
163             u4_b >>= 3;
164             /* G */
165             u4_g = CLIP_U8(*pu1_y_src + i2_g);
166             u4_g >>= 2;
167             /* R */
168             u4_r = CLIP_U8(*pu1_y_src + i2_r);
169             u4_r >>= 3;
170 
171             pu1_y_src++;
172             *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
173 
174             /* pixel 1 */
175             /* B */
176             u4_b = CLIP_U8(*pu1_y_src + i2_b);
177             u4_b >>= 3;
178             /* G */
179             u4_g = CLIP_U8(*pu1_y_src + i2_g);
180             u4_g >>= 2;
181             /* R */
182             u4_r = CLIP_U8(*pu1_y_src + i2_r);
183             u4_r >>= 3;
184 
185             pu1_y_src++;
186             *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
187 
188             /* pixel 2 */
189             /* B */
190             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
191             u4_b >>= 3;
192             /* G */
193             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
194             u4_g >>= 2;
195             /* R */
196             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
197             u4_r >>= 3;
198 
199             pu1_y_src_nxt++;
200             *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
201 
202             /* pixel 3 */
203             /* B */
204             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
205             u4_b >>= 3;
206             /* G */
207             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
208             u4_g >>= 2;
209             /* R */
210             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
211             u4_r >>= 3;
212 
213             pu1_y_src_nxt++;
214             *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
215 
216         }
217 
218         pu1_u_src = pu1_u_src + src_uv_strd - wd;
219         pu1_v_src = pu1_v_src + src_uv_strd - wd;
220 
221         pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
222         pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
223 
224         pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
225         pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
226     }
227 
228 
229 }
230 
ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD32 * pu4_rgba_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)231 void ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
232                                        UWORD8 *pu1_uv_src,
233                                        UWORD32 *pu4_rgba_dst,
234                                        WORD32 wd,
235                                        WORD32 ht,
236                                        WORD32 src_y_strd,
237                                        WORD32 src_uv_strd,
238                                        WORD32 dst_strd,
239                                        WORD32 is_u_first)
240 {
241 
242 
243     WORD16  i2_r, i2_g, i2_b;
244     UWORD32  u4_r, u4_g, u4_b;
245     WORD16  i2_i, i2_j;
246     UWORD8  *pu1_y_src_nxt;
247     UWORD32 *pu4_rgba_dst_NextRow;
248 
249     UWORD8 *pu1_u_src, *pu1_v_src;
250 
251     if(is_u_first)
252     {
253         pu1_u_src = (UWORD8 *)pu1_uv_src;
254         pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
255     }
256     else
257     {
258         pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
259         pu1_v_src = (UWORD8 *)pu1_uv_src;
260     }
261 
262     pu1_y_src_nxt   = pu1_y_src + src_y_strd;
263     pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
264 
265     for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
266     {
267         for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
268         {
269             i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
270             i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
271             i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
272 
273             pu1_u_src += 2;
274             pu1_v_src += 2;
275             /* pixel 0 */
276             /* B */
277             u4_b = CLIP_U8(*pu1_y_src + i2_b);
278             /* G */
279             u4_g = CLIP_U8(*pu1_y_src + i2_g);
280             /* R */
281             u4_r = CLIP_U8(*pu1_y_src + i2_r);
282 
283             pu1_y_src++;
284             *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
285 
286             /* pixel 1 */
287             /* B */
288             u4_b = CLIP_U8(*pu1_y_src + i2_b);
289             /* G */
290             u4_g = CLIP_U8(*pu1_y_src + i2_g);
291             /* R */
292             u4_r = CLIP_U8(*pu1_y_src + i2_r);
293 
294             pu1_y_src++;
295             *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
296 
297             /* pixel 2 */
298             /* B */
299             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
300             /* G */
301             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
302             /* R */
303             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
304 
305             pu1_y_src_nxt++;
306             *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
307 
308             /* pixel 3 */
309             /* B */
310             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
311             /* G */
312             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
313             /* R */
314             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
315 
316             pu1_y_src_nxt++;
317             *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
318 
319         }
320 
321         pu1_u_src = pu1_u_src + src_uv_strd - wd;
322         pu1_v_src = pu1_v_src + src_uv_strd - wd;
323 
324         pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
325         pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
326 
327         pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
328         pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
329     }
330 
331 
332 }
333 
334 /**
335 *******************************************************************************
336 *
337 * @brief Function used from copying a 420SP buffer
338 *
339 * @par   Description
340 * Function used from copying a 420SP buffer
341 *
342 * @param[in] pu1_y_src
343 *   Input Y pointer
344 *
345 * @param[in] pu1_uv_src
346 *   Input UV pointer (UV is interleaved either in UV or VU format)
347 *
348 * @param[in] pu1_y_dst
349 *   Output Y pointer
350 *
351 * @param[in] pu1_uv_dst
352 *   Output UV pointer (UV is interleaved in the same format as that of input)
353 *
354 * @param[in] wd
355 *   Width
356 *
357 * @param[in] ht
358 *   Height
359 *
360 * @param[in] src_y_strd
361 *   Input Y Stride
362 *
363 * @param[in] src_uv_strd
364 *   Input UV stride
365 *
366 * @param[in] dst_y_strd
367 *   Output Y stride
368 *
369 * @param[in] dst_uv_strd
370 *   Output UV stride
371 *
372 * @returns None
373 *
374 * @remarks In case there is a need to perform partial frame copy then
375 * by passion appropriate source and destination pointers and appropriate
376 * values for wd and ht it can be done
377 *
378 *******************************************************************************
379 */
380 
ihevcd_fmt_conv_420sp_to_420sp(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)381 void ihevcd_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
382                                     UWORD8 *pu1_uv_src,
383                                     UWORD8 *pu1_y_dst,
384                                     UWORD8 *pu1_uv_dst,
385                                     WORD32 wd,
386                                     WORD32 ht,
387                                     WORD32 src_y_strd,
388                                     WORD32 src_uv_strd,
389                                     WORD32 dst_y_strd,
390                                     WORD32 dst_uv_strd)
391 {
392     UWORD8 *pu1_src, *pu1_dst;
393     WORD32 num_rows, num_cols, src_strd, dst_strd;
394     WORD32 i;
395 
396     /* copy luma */
397     pu1_src = (UWORD8 *)pu1_y_src;
398     pu1_dst = (UWORD8 *)pu1_y_dst;
399 
400     num_rows = ht;
401     num_cols = wd;
402 
403     src_strd = src_y_strd;
404     dst_strd = dst_y_strd;
405 
406     for(i = 0; i < num_rows; i++)
407     {
408         memcpy(pu1_dst, pu1_src, num_cols);
409         pu1_dst += dst_strd;
410         pu1_src += src_strd;
411     }
412 
413     /* copy U and V */
414     pu1_src = (UWORD8 *)pu1_uv_src;
415     pu1_dst = (UWORD8 *)pu1_uv_dst;
416 
417     num_rows = ht >> 1;
418     num_cols = wd;
419 
420     src_strd = src_uv_strd;
421     dst_strd = dst_uv_strd;
422 
423     for(i = 0; i < num_rows; i++)
424     {
425         memcpy(pu1_dst, pu1_src, num_cols);
426         pu1_dst += dst_strd;
427         pu1_src += src_strd;
428     }
429     return;
430 }
431 
432 
433 
434 /**
435 *******************************************************************************
436 *
437 * @brief Function used from copying a 420SP buffer
438 *
439 * @par   Description
440 * Function used from copying a 420SP buffer
441 *
442 * @param[in] pu1_y_src
443 *   Input Y pointer
444 *
445 * @param[in] pu1_uv_src
446 *   Input UV pointer (UV is interleaved either in UV or VU format)
447 *
448 * @param[in] pu1_y_dst
449 *   Output Y pointer
450 *
451 * @param[in] pu1_uv_dst
452 *   Output UV pointer (UV is interleaved in the same format as that of input)
453 *
454 * @param[in] wd
455 *   Width
456 *
457 * @param[in] ht
458 *   Height
459 *
460 * @param[in] src_y_strd
461 *   Input Y Stride
462 *
463 * @param[in] src_uv_strd
464 *   Input UV stride
465 *
466 * @param[in] dst_y_strd
467 *   Output Y stride
468 *
469 * @param[in] dst_uv_strd
470 *   Output UV stride
471 *
472 * @returns None
473 *
474 * @remarks In case there is a need to perform partial frame copy then
475 * by passion appropriate source and destination pointers and appropriate
476 * values for wd and ht it can be done
477 *
478 *******************************************************************************
479 */
ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)480 void ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
481                                             UWORD8 *pu1_uv_src,
482                                             UWORD8 *pu1_y_dst,
483                                             UWORD8 *pu1_uv_dst,
484                                             WORD32 wd,
485                                             WORD32 ht,
486                                             WORD32 src_y_strd,
487                                             WORD32 src_uv_strd,
488                                             WORD32 dst_y_strd,
489                                             WORD32 dst_uv_strd)
490 {
491     UWORD8 *pu1_src, *pu1_dst;
492     WORD32 num_rows, num_cols, src_strd, dst_strd;
493     WORD32 i;
494 
495     /* copy luma */
496     pu1_src = (UWORD8 *)pu1_y_src;
497     pu1_dst = (UWORD8 *)pu1_y_dst;
498 
499     num_rows = ht;
500     num_cols = wd;
501 
502     src_strd = src_y_strd;
503     dst_strd = dst_y_strd;
504 
505     for(i = 0; i < num_rows; i++)
506     {
507         memcpy(pu1_dst, pu1_src, num_cols);
508         pu1_dst += dst_strd;
509         pu1_src += src_strd;
510     }
511 
512     /* copy U and V */
513     pu1_src = (UWORD8 *)pu1_uv_src;
514     pu1_dst = (UWORD8 *)pu1_uv_dst;
515 
516     num_rows = ht >> 1;
517     num_cols = wd;
518 
519     src_strd = src_uv_strd;
520     dst_strd = dst_uv_strd;
521 
522     for(i = 0; i < num_rows; i++)
523     {
524         WORD32 j;
525         for(j = 0; j < num_cols; j += 2)
526         {
527             pu1_dst[j + 0] = pu1_src[j + 1];
528             pu1_dst[j + 1] = pu1_src[j + 0];
529         }
530         pu1_dst += dst_strd;
531         pu1_src += src_strd;
532     }
533     return;
534 }
535 /**
536 *******************************************************************************
537 *
538 * @brief Function used from copying a 420SP buffer
539 *
540 * @par   Description
541 * Function used from copying a 420SP buffer
542 *
543 * @param[in] pu1_y_src
544 *   Input Y pointer
545 *
546 * @param[in] pu1_uv_src
547 *   Input UV pointer (UV is interleaved either in UV or VU format)
548 *
549 * @param[in] pu1_y_dst
550 *   Output Y pointer
551 *
552 * @param[in] pu1_u_dst
553 *   Output U pointer
554 *
555 * @param[in] pu1_v_dst
556 *   Output V pointer
557 *
558 * @param[in] wd
559 *   Width
560 *
561 * @param[in] ht
562 *   Height
563 *
564 * @param[in] src_y_strd
565 *   Input Y Stride
566 *
567 * @param[in] src_uv_strd
568 *   Input UV stride
569 *
570 * @param[in] dst_y_strd
571 *   Output Y stride
572 *
573 * @param[in] dst_uv_strd
574 *   Output UV stride
575 *
576 * @param[in] is_u_first
577 *   Flag to indicate if U is the first byte in input chroma part
578 *
579 * @returns none
580 *
581 * @remarks In case there is a need to perform partial frame copy then
582 * by passion appropriate source and destination pointers and appropriate
583 * values for wd and ht it can be done
584 *
585 *******************************************************************************
586 */
587 
588 
ihevcd_fmt_conv_420sp_to_420p(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd,WORD32 is_u_first,WORD32 disable_luma_copy)589 void ihevcd_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
590                                    UWORD8 *pu1_uv_src,
591                                    UWORD8 *pu1_y_dst,
592                                    UWORD8 *pu1_u_dst,
593                                    UWORD8 *pu1_v_dst,
594                                    WORD32 wd,
595                                    WORD32 ht,
596                                    WORD32 src_y_strd,
597                                    WORD32 src_uv_strd,
598                                    WORD32 dst_y_strd,
599                                    WORD32 dst_uv_strd,
600                                    WORD32 is_u_first,
601                                    WORD32 disable_luma_copy)
602 {
603     UWORD8 *pu1_src, *pu1_dst;
604     UWORD8 *pu1_u_src, *pu1_v_src;
605     WORD32 num_rows, num_cols, src_strd, dst_strd;
606     WORD32 i, j;
607 
608     if(0 == disable_luma_copy)
609     {
610         /* copy luma */
611         pu1_src = (UWORD8 *)pu1_y_src;
612         pu1_dst = (UWORD8 *)pu1_y_dst;
613 
614         num_rows = ht;
615         num_cols = wd;
616 
617         src_strd = src_y_strd;
618         dst_strd = dst_y_strd;
619 
620         for(i = 0; i < num_rows; i++)
621         {
622             memcpy(pu1_dst, pu1_src, num_cols);
623             pu1_dst += dst_strd;
624             pu1_src += src_strd;
625         }
626     }
627     /* de-interleave U and V and copy to destination */
628     if(is_u_first)
629     {
630         pu1_u_src = (UWORD8 *)pu1_uv_src;
631         pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
632     }
633     else
634     {
635         pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
636         pu1_v_src = (UWORD8 *)pu1_uv_src;
637     }
638 
639 
640     num_rows = ht >> 1;
641     num_cols = wd >> 1;
642 
643     src_strd = src_uv_strd;
644     dst_strd = dst_uv_strd;
645 
646     for(i = 0; i < num_rows; i++)
647     {
648         for(j = 0; j < num_cols; j++)
649         {
650             pu1_u_dst[j] = pu1_u_src[j * 2];
651             pu1_v_dst[j] = pu1_v_src[j * 2];
652         }
653 
654         pu1_u_dst += dst_strd;
655         pu1_v_dst += dst_strd;
656         pu1_u_src += src_strd;
657         pu1_v_src += src_strd;
658     }
659     return;
660 }
661 
662 
663 
664 /**
665 *******************************************************************************
666 *
667 * @brief Function used from format conversion or frame copy
668 *
669 * @par   Description
670 * Function used from copying or converting a reference frame to display buffer
671 * in non shared mode
672 *
673 * @param[in] pu1_y_dst
674 *   Output Y pointer
675 *
676 * @param[in] pu1_u_dst
677 *   Output U/UV pointer ( UV is interleaved in the same format as that of input)
678 *
679 * @param[in] pu1_v_dst
680 *   Output V pointer ( used in 420P output case)
681 *
682 * @param[in] blocking
683 *   To indicate whether format conversion should wait till frame is reconstructed
684 *   and then return after complete copy is done. To be set to 1 when called at the
685 *   end of frame processing and set to 0 when called between frame processing modules
686 *   in order to utilize available MCPS
687 *
688 * @returns Error from IHEVCD_ERROR_T
689 *
690 *******************************************************************************
691 */
ihevcd_fmt_conv(codec_t * ps_codec,process_ctxt_t * ps_proc,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,WORD32 cur_row,WORD32 num_rows)692 IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec,
693                                process_ctxt_t *ps_proc,
694                                UWORD8 *pu1_y_dst,
695                                UWORD8 *pu1_u_dst,
696                                UWORD8 *pu1_v_dst,
697                                WORD32 cur_row,
698                                WORD32 num_rows)
699 {
700     IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
701     pic_buf_t *ps_disp_pic;
702     UWORD8 *pu1_y_src, *pu1_uv_src;
703     UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
704     UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
705     UWORD16 *pu2_rgb_dst_tmp;
706     UWORD32 *pu4_rgb_dst_tmp;
707     WORD32 is_u_first;
708     UWORD8 *pu1_luma;
709     UWORD8 *pu1_chroma;
710     sps_t *ps_sps;
711     WORD32 disable_luma_copy;
712     WORD32 crop_unit_x, crop_unit_y;
713 
714     if(0 == num_rows)
715         return ret;
716 
717     /* In case processing is disabled, then no need to format convert/copy */
718     PROFILE_DISABLE_FMT_CONV();
719     ps_sps = ps_proc->ps_sps;
720 
721     crop_unit_x = 1;
722     crop_unit_y = 1;
723 
724     if(CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc)
725     {
726         crop_unit_x = 2;
727         crop_unit_y = 2;
728     }
729 
730     ps_disp_pic = ps_codec->ps_disp_buf;
731     pu1_luma = ps_disp_pic->pu1_luma;
732     pu1_chroma = ps_disp_pic->pu1_chroma;
733 
734 
735     /* Take care of cropping */
736     pu1_luma    += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset * crop_unit_y + ps_sps->i2_pic_crop_left_offset * crop_unit_x;
737 
738     /* Left offset is multiplied by 2 because buffer is UV interleaved */
739     pu1_chroma  += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset + ps_sps->i2_pic_crop_left_offset * 2;
740 
741 
742     is_u_first = (IV_YUV_420SP_UV == ps_codec->e_ref_chroma_fmt) ? 1 : 0;
743 
744     /* In case of 420P output luma copy is disabled for shared mode */
745     disable_luma_copy = 0;
746     if(1 == ps_codec->i4_share_disp_buf)
747     {
748         disable_luma_copy = 1;
749     }
750 
751 
752 
753     {
754         pu1_y_src   = pu1_luma + cur_row * ps_codec->i4_strd;
755         pu1_uv_src  = pu1_chroma + (cur_row / 2) * ps_codec->i4_strd;
756 
757         /* In case of shared mode, with 420P output, get chroma destination */
758         if((1 == ps_codec->i4_share_disp_buf) && (IV_YUV_420P == ps_codec->e_chroma_fmt))
759         {
760             WORD32 i;
761             for(i = 0; i < ps_codec->i4_share_disp_buf_cnt; i++)
762             {
763                 WORD32 diff = ps_disp_pic->pu1_luma - ps_codec->s_disp_buffer[i].pu1_bufs[0];
764                 if(diff == (ps_codec->i4_strd * PAD_TOP + PAD_LEFT))
765                 {
766                     pu1_u_dst = ps_codec->s_disp_buffer[i].pu1_bufs[1];
767                     pu1_u_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
768 
769                     pu1_v_dst = ps_codec->s_disp_buffer[i].pu1_bufs[2];
770                     pu1_v_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
771                     break;
772                 }
773             }
774         }
775         pu2_rgb_dst_tmp  = (UWORD16 *)pu1_y_dst;
776         pu2_rgb_dst_tmp  += cur_row * ps_codec->i4_disp_strd;
777         pu4_rgb_dst_tmp  = (UWORD32 *)pu1_y_dst;
778         pu4_rgb_dst_tmp  += cur_row * ps_codec->i4_disp_strd;
779         pu1_y_dst_tmp  = pu1_y_dst  + cur_row * ps_codec->i4_disp_strd;
780         pu1_uv_dst_tmp = pu1_u_dst  + (cur_row / 2) * ps_codec->i4_disp_strd;
781         pu1_u_dst_tmp = pu1_u_dst  + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
782         pu1_v_dst_tmp = pu1_v_dst  + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
783 
784         /* In case of multi threaded implementation, format conversion might be called
785          * before reconstruction is completed. If the frame being converted/copied
786          * is same as the frame being reconstructed,
787          * Check how many rows can be format converted
788          * Convert those many rows and then check for remaining rows and so on
789          */
790 
791         if((0 == ps_codec->i4_flush_mode) && (ps_codec->i4_disp_buf_id == ps_proc->i4_cur_pic_buf_id) && (1 < ps_codec->i4_num_cores))
792         {
793             WORD32 idx;
794             UWORD8 *pu1_buf;
795             WORD32 status;
796             WORD32 last_row = cur_row + num_rows;
797             WORD32 last_ctb_y;
798             UWORD32 ctb_in_row;
799 
800             while(1)
801             {
802                 last_row = cur_row + MAX(num_rows, (1 << ps_sps->i1_log2_ctb_size)) +
803                                 ps_sps->i2_pic_crop_top_offset * crop_unit_y;
804                 last_ctb_y = (last_row >> ps_sps->i1_log2_ctb_size) - 1;
805                 /* Since deblocking works with a shift of -4, -4 ,wait till next CTB row is processed */
806                 last_ctb_y++;
807                 /* In case of a  conformance window, an extra wait of one row might be needed */
808                 last_ctb_y++;
809                 last_ctb_y = MIN(last_ctb_y, (ps_sps->i2_pic_ht_in_ctb - 1));
810 
811                 idx = (last_ctb_y * ps_sps->i2_pic_wd_in_ctb);
812 
813                 /*Check if the row below is completely processed before proceeding with format conversion*/
814                 status = 1;
815                 for(ctb_in_row = 0; (WORD32)ctb_in_row < ps_sps->i2_pic_wd_in_ctb; ctb_in_row++)
816                 {
817                     pu1_buf = (ps_codec->pu1_proc_map + idx + ctb_in_row);
818                     status &= *pu1_buf;
819                 }
820 
821                 if(status)
822                 {
823                     break;
824                 }
825                 else
826                 {
827                     ithread_yield();
828                 }
829             }
830         }
831 
832 
833         if((IV_YUV_420SP_UV == ps_codec->e_chroma_fmt) || (IV_YUV_420SP_VU == ps_codec->e_chroma_fmt))
834         {
835             ihevcd_fmt_conv_420sp_to_420sp_ft *fmt_conv_fptr;
836             if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
837             {
838                 fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr;
839             }
840             else
841             {
842                 fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420sp;
843             }
844             fmt_conv_fptr(pu1_y_src, pu1_uv_src,
845                           pu1_y_dst_tmp, pu1_uv_dst_tmp,
846                           ps_codec->i4_disp_wd,
847                           num_rows,
848                           ps_codec->i4_strd,
849                           ps_codec->i4_strd,
850                           ps_codec->i4_disp_strd,
851                           ps_codec->i4_disp_strd);
852         }
853         else if(IV_YUV_420P == ps_codec->e_chroma_fmt)
854         {
855             ihevcd_fmt_conv_420sp_to_420p_ft *fmt_conv_fptr;
856             if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
857             {
858                 fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr;
859             }
860             else
861             {
862                 fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_420p;
863             }
864 
865             if(0 == disable_luma_copy)
866             {
867                 // copy luma
868                 WORD32 i;
869                 WORD32 num_cols = ps_codec->i4_disp_wd;
870 
871                 for(i = 0; i < num_rows; i++)
872                 {
873                     memcpy(pu1_y_dst_tmp, pu1_y_src, num_cols);
874                     pu1_y_dst_tmp += ps_codec->i4_disp_strd;
875                     pu1_y_src += ps_codec->i4_strd;
876                 }
877 
878                 disable_luma_copy = 1;
879             }
880             fmt_conv_fptr(pu1_y_src, pu1_uv_src,
881                           pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp,
882                           ps_codec->i4_disp_wd,
883                           num_rows,
884                           ps_codec->i4_strd,
885                           ps_codec->i4_strd,
886                           ps_codec->i4_disp_strd,
887                           (ps_codec->i4_disp_strd / 2),
888                           is_u_first,
889                           disable_luma_copy);
890         }
891         else if(IV_RGB_565 == ps_codec->e_chroma_fmt)
892         {
893             ihevcd_fmt_conv_420sp_to_rgb565_ft *fmt_conv_fptr;
894             if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
895             {
896                 fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr;
897             }
898             else
899             {
900                 fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgb565;
901             }
902 
903             fmt_conv_fptr(pu1_y_src, pu1_uv_src,
904                           pu2_rgb_dst_tmp,
905                           ps_codec->i4_disp_wd,
906                           num_rows,
907                           ps_codec->i4_strd,
908                           ps_codec->i4_strd,
909                           ps_codec->i4_disp_strd,
910                           is_u_first);
911         }
912         else if(IV_RGBA_8888 == ps_codec->e_chroma_fmt)
913         {
914             ihevcd_fmt_conv_420sp_to_rgba8888_ft *fmt_conv_fptr;
915             if(ps_codec->i4_disp_wd >= MIN_FMT_CONV_SIMD_WIDTH)
916             {
917                 fmt_conv_fptr = ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr;
918             }
919             else
920             {
921                 fmt_conv_fptr = ihevcd_fmt_conv_420sp_to_rgba8888;
922             }
923 
924             ASSERT(is_u_first == 1);
925             fmt_conv_fptr(pu1_y_src,
926                           pu1_uv_src,
927                           pu4_rgb_dst_tmp,
928                           ps_codec->i4_disp_wd,
929                           num_rows,
930                           ps_codec->i4_strd,
931                           ps_codec->i4_strd,
932                           ps_codec->i4_disp_strd,
933                           is_u_first);
934         }
935 
936 
937 
938     }
939     return (ret);
940 }
941 
942