1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevcd_fmt_conv.c
22 *
23 * @brief
24 * Contains functions for format conversion or frame copy of output buffer
25 *
26 * @author
27 * Harish
28 *
29 * @par List of Functions:
30 *
31 * @remarks
32 * None
33 *
34 *******************************************************************************
35 */
36 /*****************************************************************************/
37 /* File Includes */
38 /*****************************************************************************/
39 #include <stdio.h>
40 #include <stddef.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <assert.h>
44
45 #include "ihevc_typedefs.h"
46 #include "iv.h"
47 #include "ivd.h"
48 #include "ihevcd_cxa.h"
49 #include "ithread.h"
50
51 #include "ihevc_defs.h"
52 #include "ihevc_debug.h"
53 #include "ihevc_structs.h"
54 #include "ihevc_macros.h"
55 #include "ihevc_platform_macros.h"
56 #include "ihevc_cabac_tables.h"
57 #include "ihevc_disp_mgr.h"
58
59 #include "ihevcd_defs.h"
60 #include "ihevcd_function_selector.h"
61 #include "ihevcd_structs.h"
62 #include "ihevcd_error.h"
63 #include "ihevcd_nal.h"
64 #include "ihevcd_bitstream.h"
65 #include "ihevcd_fmt_conv.h"
66 #include "ihevcd_profile.h"
67
68 /**
69 *******************************************************************************
70 *
71 * @brief Function used from copying a 420SP buffer
72 *
73 * @par Description
74 * Function used from copying a 420SP buffer
75 *
76 * @param[in] pu1_y_src
77 * Input Y pointer
78 *
79 * @param[in] pu1_uv_src
80 * Input UV pointer (UV is interleaved either in UV or VU format)
81 *
82 * @param[in] pu1_y_dst
83 * Output Y pointer
84 *
85 * @param[in] pu1_uv_dst
86 * Output UV pointer (UV is interleaved in the same format as that of input)
87 *
88 * @param[in] wd
89 * Width
90 *
91 * @param[in] ht
92 * Height
93 *
94 * @param[in] src_y_strd
95 * Input Y Stride
96 *
97 * @param[in] src_uv_strd
98 * Input UV stride
99 *
100 * @param[in] dst_y_strd
101 * Output Y stride
102 *
103 * @param[in] dst_uv_strd
104 * Output UV stride
105 *
106 * @returns None
107 *
108 * @remarks In case there is a need to perform partial frame copy then
109 * by passion appropriate source and destination pointers and appropriate
110 * values for wd and ht it can be done
111 *
112 *******************************************************************************
113 */
ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD16 * pu2_rgb_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)114 void ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
115 UWORD8 *pu1_uv_src,
116 UWORD16 *pu2_rgb_dst,
117 WORD32 wd,
118 WORD32 ht,
119 WORD32 src_y_strd,
120 WORD32 src_uv_strd,
121 WORD32 dst_strd,
122 WORD32 is_u_first)
123 {
124
125
126 WORD16 i2_r, i2_g, i2_b;
127 UWORD32 u4_r, u4_g, u4_b;
128 WORD16 i2_i, i2_j;
129 UWORD8 *pu1_y_src_nxt;
130 UWORD16 *pu2_rgb_dst_NextRow;
131
132 UWORD8 *pu1_u_src, *pu1_v_src;
133
134 if(is_u_first)
135 {
136 pu1_u_src = (UWORD8 *)pu1_uv_src;
137 pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
138 }
139 else
140 {
141 pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
142 pu1_v_src = (UWORD8 *)pu1_uv_src;
143 }
144
145 pu1_y_src_nxt = pu1_y_src + src_y_strd;
146 pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
147
148 for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
149 {
150 for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
151 {
152 i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
153 i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
154 i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
155
156 pu1_u_src += 2;
157 pu1_v_src += 2;
158 /* pixel 0 */
159 /* B */
160 u4_b = CLIP_U8(*pu1_y_src + i2_b);
161 u4_b >>= 3;
162 /* G */
163 u4_g = CLIP_U8(*pu1_y_src + i2_g);
164 u4_g >>= 2;
165 /* R */
166 u4_r = CLIP_U8(*pu1_y_src + i2_r);
167 u4_r >>= 3;
168
169 pu1_y_src++;
170 *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
171
172 /* pixel 1 */
173 /* B */
174 u4_b = CLIP_U8(*pu1_y_src + i2_b);
175 u4_b >>= 3;
176 /* G */
177 u4_g = CLIP_U8(*pu1_y_src + i2_g);
178 u4_g >>= 2;
179 /* R */
180 u4_r = CLIP_U8(*pu1_y_src + i2_r);
181 u4_r >>= 3;
182
183 pu1_y_src++;
184 *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
185
186 /* pixel 2 */
187 /* B */
188 u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
189 u4_b >>= 3;
190 /* G */
191 u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
192 u4_g >>= 2;
193 /* R */
194 u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
195 u4_r >>= 3;
196
197 pu1_y_src_nxt++;
198 *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
199
200 /* pixel 3 */
201 /* B */
202 u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
203 u4_b >>= 3;
204 /* G */
205 u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
206 u4_g >>= 2;
207 /* R */
208 u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
209 u4_r >>= 3;
210
211 pu1_y_src_nxt++;
212 *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
213
214 }
215
216 pu1_u_src = pu1_u_src + src_uv_strd - wd;
217 pu1_v_src = pu1_v_src + src_uv_strd - wd;
218
219 pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
220 pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
221
222 pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
223 pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
224 }
225
226
227 }
228
ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD32 * pu4_rgba_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)229 void ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
230 UWORD8 *pu1_uv_src,
231 UWORD32 *pu4_rgba_dst,
232 WORD32 wd,
233 WORD32 ht,
234 WORD32 src_y_strd,
235 WORD32 src_uv_strd,
236 WORD32 dst_strd,
237 WORD32 is_u_first)
238 {
239
240
241 WORD16 i2_r, i2_g, i2_b;
242 UWORD32 u4_r, u4_g, u4_b;
243 WORD16 i2_i, i2_j;
244 UWORD8 *pu1_y_src_nxt;
245 UWORD32 *pu4_rgba_dst_NextRow;
246
247 UWORD8 *pu1_u_src, *pu1_v_src;
248
249 if(is_u_first)
250 {
251 pu1_u_src = (UWORD8 *)pu1_uv_src;
252 pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
253 }
254 else
255 {
256 pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
257 pu1_v_src = (UWORD8 *)pu1_uv_src;
258 }
259
260 pu1_y_src_nxt = pu1_y_src + src_y_strd;
261 pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
262
263 for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
264 {
265 for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
266 {
267 i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
268 i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
269 i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
270
271 pu1_u_src += 2;
272 pu1_v_src += 2;
273 /* pixel 0 */
274 /* B */
275 u4_b = CLIP_U8(*pu1_y_src + i2_b);
276 /* G */
277 u4_g = CLIP_U8(*pu1_y_src + i2_g);
278 /* R */
279 u4_r = CLIP_U8(*pu1_y_src + i2_r);
280
281 pu1_y_src++;
282 *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
283
284 /* pixel 1 */
285 /* B */
286 u4_b = CLIP_U8(*pu1_y_src + i2_b);
287 /* G */
288 u4_g = CLIP_U8(*pu1_y_src + i2_g);
289 /* R */
290 u4_r = CLIP_U8(*pu1_y_src + i2_r);
291
292 pu1_y_src++;
293 *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
294
295 /* pixel 2 */
296 /* B */
297 u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
298 /* G */
299 u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
300 /* R */
301 u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
302
303 pu1_y_src_nxt++;
304 *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
305
306 /* pixel 3 */
307 /* B */
308 u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
309 /* G */
310 u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
311 /* R */
312 u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
313
314 pu1_y_src_nxt++;
315 *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
316
317 }
318
319 pu1_u_src = pu1_u_src + src_uv_strd - wd;
320 pu1_v_src = pu1_v_src + src_uv_strd - wd;
321
322 pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
323 pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
324
325 pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
326 pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
327 }
328
329
330 }
331
332 /**
333 *******************************************************************************
334 *
335 * @brief Function used from copying a 420SP buffer
336 *
337 * @par Description
338 * Function used from copying a 420SP buffer
339 *
340 * @param[in] pu1_y_src
341 * Input Y pointer
342 *
343 * @param[in] pu1_uv_src
344 * Input UV pointer (UV is interleaved either in UV or VU format)
345 *
346 * @param[in] pu1_y_dst
347 * Output Y pointer
348 *
349 * @param[in] pu1_uv_dst
350 * Output UV pointer (UV is interleaved in the same format as that of input)
351 *
352 * @param[in] wd
353 * Width
354 *
355 * @param[in] ht
356 * Height
357 *
358 * @param[in] src_y_strd
359 * Input Y Stride
360 *
361 * @param[in] src_uv_strd
362 * Input UV stride
363 *
364 * @param[in] dst_y_strd
365 * Output Y stride
366 *
367 * @param[in] dst_uv_strd
368 * Output UV stride
369 *
370 * @returns None
371 *
372 * @remarks In case there is a need to perform partial frame copy then
373 * by passion appropriate source and destination pointers and appropriate
374 * values for wd and ht it can be done
375 *
376 *******************************************************************************
377 */
378
ihevcd_fmt_conv_420sp_to_420sp(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)379 void ihevcd_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
380 UWORD8 *pu1_uv_src,
381 UWORD8 *pu1_y_dst,
382 UWORD8 *pu1_uv_dst,
383 WORD32 wd,
384 WORD32 ht,
385 WORD32 src_y_strd,
386 WORD32 src_uv_strd,
387 WORD32 dst_y_strd,
388 WORD32 dst_uv_strd)
389 {
390 UWORD8 *pu1_src, *pu1_dst;
391 WORD32 num_rows, num_cols, src_strd, dst_strd;
392 WORD32 i;
393
394 /* copy luma */
395 pu1_src = (UWORD8 *)pu1_y_src;
396 pu1_dst = (UWORD8 *)pu1_y_dst;
397
398 num_rows = ht;
399 num_cols = wd;
400
401 src_strd = src_y_strd;
402 dst_strd = dst_y_strd;
403
404 for(i = 0; i < num_rows; i++)
405 {
406 memcpy(pu1_dst, pu1_src, num_cols);
407 pu1_dst += dst_strd;
408 pu1_src += src_strd;
409 }
410
411 /* copy U and V */
412 pu1_src = (UWORD8 *)pu1_uv_src;
413 pu1_dst = (UWORD8 *)pu1_uv_dst;
414
415 num_rows = ht >> 1;
416 num_cols = wd;
417
418 src_strd = src_uv_strd;
419 dst_strd = dst_uv_strd;
420
421 for(i = 0; i < num_rows; i++)
422 {
423 memcpy(pu1_dst, pu1_src, num_cols);
424 pu1_dst += dst_strd;
425 pu1_src += src_strd;
426 }
427 return;
428 }
429
430
431
432 /**
433 *******************************************************************************
434 *
435 * @brief Function used from copying a 420SP buffer
436 *
437 * @par Description
438 * Function used from copying a 420SP buffer
439 *
440 * @param[in] pu1_y_src
441 * Input Y pointer
442 *
443 * @param[in] pu1_uv_src
444 * Input UV pointer (UV is interleaved either in UV or VU format)
445 *
446 * @param[in] pu1_y_dst
447 * Output Y pointer
448 *
449 * @param[in] pu1_uv_dst
450 * Output UV pointer (UV is interleaved in the same format as that of input)
451 *
452 * @param[in] wd
453 * Width
454 *
455 * @param[in] ht
456 * Height
457 *
458 * @param[in] src_y_strd
459 * Input Y Stride
460 *
461 * @param[in] src_uv_strd
462 * Input UV stride
463 *
464 * @param[in] dst_y_strd
465 * Output Y stride
466 *
467 * @param[in] dst_uv_strd
468 * Output UV stride
469 *
470 * @returns None
471 *
472 * @remarks In case there is a need to perform partial frame copy then
473 * by passion appropriate source and destination pointers and appropriate
474 * values for wd and ht it can be done
475 *
476 *******************************************************************************
477 */
ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)478 void ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
479 UWORD8 *pu1_uv_src,
480 UWORD8 *pu1_y_dst,
481 UWORD8 *pu1_uv_dst,
482 WORD32 wd,
483 WORD32 ht,
484 WORD32 src_y_strd,
485 WORD32 src_uv_strd,
486 WORD32 dst_y_strd,
487 WORD32 dst_uv_strd)
488 {
489 UWORD8 *pu1_src, *pu1_dst;
490 WORD32 num_rows, num_cols, src_strd, dst_strd;
491 WORD32 i;
492
493 /* copy luma */
494 pu1_src = (UWORD8 *)pu1_y_src;
495 pu1_dst = (UWORD8 *)pu1_y_dst;
496
497 num_rows = ht;
498 num_cols = wd;
499
500 src_strd = src_y_strd;
501 dst_strd = dst_y_strd;
502
503 for(i = 0; i < num_rows; i++)
504 {
505 memcpy(pu1_dst, pu1_src, num_cols);
506 pu1_dst += dst_strd;
507 pu1_src += src_strd;
508 }
509
510 /* copy U and V */
511 pu1_src = (UWORD8 *)pu1_uv_src;
512 pu1_dst = (UWORD8 *)pu1_uv_dst;
513
514 num_rows = ht >> 1;
515 num_cols = wd;
516
517 src_strd = src_uv_strd;
518 dst_strd = dst_uv_strd;
519
520 for(i = 0; i < num_rows; i++)
521 {
522 WORD32 j;
523 for(j = 0; j < num_cols; j += 2)
524 {
525 pu1_dst[j + 0] = pu1_src[j + 1];
526 pu1_dst[j + 1] = pu1_src[j + 0];
527 }
528 pu1_dst += dst_strd;
529 pu1_src += src_strd;
530 }
531 return;
532 }
533 /**
534 *******************************************************************************
535 *
536 * @brief Function used from copying a 420SP buffer
537 *
538 * @par Description
539 * Function used from copying a 420SP buffer
540 *
541 * @param[in] pu1_y_src
542 * Input Y pointer
543 *
544 * @param[in] pu1_uv_src
545 * Input UV pointer (UV is interleaved either in UV or VU format)
546 *
547 * @param[in] pu1_y_dst
548 * Output Y pointer
549 *
550 * @param[in] pu1_u_dst
551 * Output U pointer
552 *
553 * @param[in] pu1_v_dst
554 * Output V pointer
555 *
556 * @param[in] wd
557 * Width
558 *
559 * @param[in] ht
560 * Height
561 *
562 * @param[in] src_y_strd
563 * Input Y Stride
564 *
565 * @param[in] src_uv_strd
566 * Input UV stride
567 *
568 * @param[in] dst_y_strd
569 * Output Y stride
570 *
571 * @param[in] dst_uv_strd
572 * Output UV stride
573 *
574 * @param[in] is_u_first
575 * Flag to indicate if U is the first byte in input chroma part
576 *
577 * @returns none
578 *
579 * @remarks In case there is a need to perform partial frame copy then
580 * by passion appropriate source and destination pointers and appropriate
581 * values for wd and ht it can be done
582 *
583 *******************************************************************************
584 */
585
586
ihevcd_fmt_conv_420sp_to_420p(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd,WORD32 is_u_first,WORD32 disable_luma_copy)587 void ihevcd_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
588 UWORD8 *pu1_uv_src,
589 UWORD8 *pu1_y_dst,
590 UWORD8 *pu1_u_dst,
591 UWORD8 *pu1_v_dst,
592 WORD32 wd,
593 WORD32 ht,
594 WORD32 src_y_strd,
595 WORD32 src_uv_strd,
596 WORD32 dst_y_strd,
597 WORD32 dst_uv_strd,
598 WORD32 is_u_first,
599 WORD32 disable_luma_copy)
600 {
601 UWORD8 *pu1_src, *pu1_dst;
602 UWORD8 *pu1_u_src, *pu1_v_src;
603 WORD32 num_rows, num_cols, src_strd, dst_strd;
604 WORD32 i, j;
605
606 if(0 == disable_luma_copy)
607 {
608 /* copy luma */
609 pu1_src = (UWORD8 *)pu1_y_src;
610 pu1_dst = (UWORD8 *)pu1_y_dst;
611
612 num_rows = ht;
613 num_cols = wd;
614
615 src_strd = src_y_strd;
616 dst_strd = dst_y_strd;
617
618 for(i = 0; i < num_rows; i++)
619 {
620 memcpy(pu1_dst, pu1_src, num_cols);
621 pu1_dst += dst_strd;
622 pu1_src += src_strd;
623 }
624 }
625 /* de-interleave U and V and copy to destination */
626 if(is_u_first)
627 {
628 pu1_u_src = (UWORD8 *)pu1_uv_src;
629 pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
630 }
631 else
632 {
633 pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
634 pu1_v_src = (UWORD8 *)pu1_uv_src;
635 }
636
637
638 num_rows = ht >> 1;
639 num_cols = wd >> 1;
640
641 src_strd = src_uv_strd;
642 dst_strd = dst_uv_strd;
643
644 for(i = 0; i < num_rows; i++)
645 {
646 for(j = 0; j < num_cols; j++)
647 {
648 pu1_u_dst[j] = pu1_u_src[j * 2];
649 pu1_v_dst[j] = pu1_v_src[j * 2];
650 }
651
652 pu1_u_dst += dst_strd;
653 pu1_v_dst += dst_strd;
654 pu1_u_src += src_strd;
655 pu1_v_src += src_strd;
656 }
657 return;
658 }
659
660
661
662 /**
663 *******************************************************************************
664 *
665 * @brief Function used from format conversion or frame copy
666 *
667 * @par Description
668 * Function used from copying or converting a reference frame to display buffer
669 * in non shared mode
670 *
671 * @param[in] pu1_y_dst
672 * Output Y pointer
673 *
674 * @param[in] pu1_u_dst
675 * Output U/UV pointer ( UV is interleaved in the same format as that of input)
676 *
677 * @param[in] pu1_v_dst
678 * Output V pointer ( used in 420P output case)
679 *
680 * @param[in] blocking
681 * To indicate whether format conversion should wait till frame is reconstructed
682 * and then return after complete copy is done. To be set to 1 when called at the
683 * end of frame processing and set to 0 when called between frame processing modules
684 * in order to utilize available MCPS
685 *
686 * @returns Error from IHEVCD_ERROR_T
687 *
688 *******************************************************************************
689 */
ihevcd_fmt_conv(codec_t * ps_codec,process_ctxt_t * ps_proc,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,WORD32 cur_row,WORD32 num_rows)690 IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec,
691 process_ctxt_t *ps_proc,
692 UWORD8 *pu1_y_dst,
693 UWORD8 *pu1_u_dst,
694 UWORD8 *pu1_v_dst,
695 WORD32 cur_row,
696 WORD32 num_rows)
697 {
698 IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
699 pic_buf_t *ps_disp_pic;
700 UWORD8 *pu1_y_src, *pu1_uv_src;
701 UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
702 UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
703 UWORD16 *pu2_rgb_dst_tmp;
704 UWORD32 *pu4_rgb_dst_tmp;
705 WORD32 is_u_first;
706 UWORD8 *pu1_luma;
707 UWORD8 *pu1_chroma;
708 sps_t *ps_sps;
709 WORD32 disable_luma_copy;
710 WORD32 crop_unit_x, crop_unit_y;
711
712 if(0 == num_rows)
713 return ret;
714
715 /* In case processing is disabled, then no need to format convert/copy */
716 PROFILE_DISABLE_FMT_CONV();
717 ps_sps = ps_proc->ps_sps;
718
719 crop_unit_x = 1;
720 crop_unit_y = 1;
721
722 if(CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc)
723 {
724 crop_unit_x = 2;
725 crop_unit_y = 2;
726 }
727
728 ps_disp_pic = ps_codec->ps_disp_buf;
729 pu1_luma = ps_disp_pic->pu1_luma;
730 pu1_chroma = ps_disp_pic->pu1_chroma;
731
732
733 /* Take care of cropping */
734 pu1_luma += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset * crop_unit_y + ps_sps->i2_pic_crop_left_offset * crop_unit_x;
735
736 /* Left offset is multiplied by 2 because buffer is UV interleaved */
737 pu1_chroma += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset + ps_sps->i2_pic_crop_left_offset * 2;
738
739
740 is_u_first = (IV_YUV_420SP_UV == ps_codec->e_ref_chroma_fmt) ? 1 : 0;
741
742 /* In case of 420P output luma copy is disabled for shared mode */
743 disable_luma_copy = 0;
744 if(1 == ps_codec->i4_share_disp_buf)
745 {
746 disable_luma_copy = 1;
747 }
748
749
750
751 {
752 pu1_y_src = pu1_luma + cur_row * ps_codec->i4_strd;
753 pu1_uv_src = pu1_chroma + (cur_row / 2) * ps_codec->i4_strd;
754
755 /* In case of shared mode, with 420P output, get chroma destination */
756 if((1 == ps_codec->i4_share_disp_buf) && (IV_YUV_420P == ps_codec->e_chroma_fmt))
757 {
758 WORD32 i;
759 for(i = 0; i < ps_codec->i4_share_disp_buf_cnt; i++)
760 {
761 WORD32 diff = ps_disp_pic->pu1_luma - ps_codec->s_disp_buffer[i].pu1_bufs[0];
762 if(diff == (ps_codec->i4_strd * PAD_TOP + PAD_LEFT))
763 {
764 pu1_u_dst = ps_codec->s_disp_buffer[i].pu1_bufs[1];
765 pu1_u_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
766
767 pu1_v_dst = ps_codec->s_disp_buffer[i].pu1_bufs[2];
768 pu1_v_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
769 break;
770 }
771 }
772 }
773 pu2_rgb_dst_tmp = (UWORD16 *)pu1_y_dst;
774 pu2_rgb_dst_tmp += cur_row * ps_codec->i4_disp_strd;
775 pu4_rgb_dst_tmp = (UWORD32 *)pu1_y_dst;
776 pu4_rgb_dst_tmp += cur_row * ps_codec->i4_disp_strd;
777 pu1_y_dst_tmp = pu1_y_dst + cur_row * ps_codec->i4_disp_strd;
778 pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * ps_codec->i4_disp_strd;
779 pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
780 pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
781
782 /* In case of multi threaded implementation, format conversion might be called
783 * before reconstruction is completed. If the frame being converted/copied
784 * is same as the frame being reconstructed,
785 * Check how many rows can be format converted
786 * Convert those many rows and then check for remaining rows and so on
787 */
788
789 if((0 == ps_codec->i4_flush_mode) && (ps_codec->i4_disp_buf_id == ps_proc->i4_cur_pic_buf_id) && (1 < ps_codec->i4_num_cores))
790 {
791 WORD32 idx;
792 UWORD8 *pu1_buf;
793 WORD32 status;
794 WORD32 last_row = cur_row + num_rows;
795 WORD32 last_ctb_y;
796 UWORD32 ctb_in_row;
797
798 while(1)
799 {
800 last_row = cur_row + MAX(num_rows, (1 << ps_sps->i1_log2_ctb_size)) +
801 ps_sps->i2_pic_crop_top_offset * crop_unit_y;
802 last_ctb_y = (last_row >> ps_sps->i1_log2_ctb_size) - 1;
803 /* Since deblocking works with a shift of -4, -4 ,wait till next CTB row is processed */
804 last_ctb_y++;
805 /* In case of a conformance window, an extra wait of one row might be needed */
806 last_ctb_y++;
807 last_ctb_y = MIN(last_ctb_y, (ps_sps->i2_pic_ht_in_ctb - 1));
808
809 idx = (last_ctb_y * ps_sps->i2_pic_wd_in_ctb);
810
811 /*Check if the row below is completely processed before proceeding with format conversion*/
812 status = 1;
813 for(ctb_in_row = 0; (WORD32)ctb_in_row < ps_sps->i2_pic_wd_in_ctb; ctb_in_row++)
814 {
815 pu1_buf = (ps_codec->pu1_proc_map + idx + ctb_in_row);
816 status &= *pu1_buf;
817 }
818
819 if(status)
820 {
821 break;
822 }
823 else
824 {
825 ithread_yield();
826 }
827 }
828 }
829
830
831 if((IV_YUV_420SP_UV == ps_codec->e_chroma_fmt) || (IV_YUV_420SP_VU == ps_codec->e_chroma_fmt))
832 {
833
834 ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr(pu1_y_src, pu1_uv_src,
835 pu1_y_dst_tmp, pu1_uv_dst_tmp,
836 ps_codec->i4_disp_wd,
837 num_rows,
838 ps_codec->i4_strd,
839 ps_codec->i4_strd,
840 ps_codec->i4_disp_strd,
841 ps_codec->i4_disp_strd);
842 }
843 else if(IV_YUV_420P == ps_codec->e_chroma_fmt)
844 {
845
846 if(0 == disable_luma_copy)
847 {
848 // copy luma
849 WORD32 i;
850 WORD32 num_cols = ps_codec->i4_disp_wd;
851
852 for(i = 0; i < num_rows; i++)
853 {
854 memcpy(pu1_y_dst_tmp, pu1_y_src, num_cols);
855 pu1_y_dst_tmp += ps_codec->i4_disp_strd;
856 pu1_y_src += ps_codec->i4_strd;
857 }
858
859 disable_luma_copy = 1;
860 }
861
862 ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr(pu1_y_src, pu1_uv_src,
863 pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp,
864 ps_codec->i4_disp_wd,
865 num_rows,
866 ps_codec->i4_strd,
867 ps_codec->i4_strd,
868 ps_codec->i4_disp_strd,
869 (ps_codec->i4_disp_strd / 2),
870 is_u_first,
871 disable_luma_copy);
872
873 }
874 else if(IV_RGB_565 == ps_codec->e_chroma_fmt)
875 {
876
877 ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr(pu1_y_src, pu1_uv_src,
878 pu2_rgb_dst_tmp,
879 ps_codec->i4_disp_wd,
880 num_rows,
881 ps_codec->i4_strd,
882 ps_codec->i4_strd,
883 ps_codec->i4_disp_strd,
884 is_u_first);
885
886 }
887 else if(IV_RGBA_8888 == ps_codec->e_chroma_fmt)
888 {
889 ASSERT(is_u_first == 1);
890
891 ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr(pu1_y_src,
892 pu1_uv_src,
893 pu4_rgb_dst_tmp,
894 ps_codec->i4_disp_wd,
895 num_rows,
896 ps_codec->i4_strd,
897 ps_codec->i4_strd,
898 ps_codec->i4_disp_strd,
899 is_u_first);
900
901 }
902
903
904
905 }
906 return (ret);
907 }
908
909