1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264e_fmt_conv.c
25 *
26 * @brief
27 *  Contains functions for format conversion or frame copy of output buffer
28 *
29 * @author
30 *  ittiam
31 *
32 * @par List of Functions:
33 *  - ih264e_fmt_conv_420sp_to_rgb565()
34 *  - ih264e_fmt_conv_420sp_to_rgba8888()
35 *  - ih264e_fmt_conv_420sp_to_420sp()
36 *  - ih264e_fmt_conv_420sp_to_420sp_swap_uv()
37 *  - ih264e_fmt_conv_420sp_to_420p()
38 *  - ih264e_fmt_conv_420p_to_420sp()
39 *  - ih264e_fmt_conv_422i_to_420sp()
40 *  - ih264e_fmt_conv()
41 *
42 * @remarks
43 *  None
44 *
45 *******************************************************************************
46 */
47 
48 /*****************************************************************************/
49 /* File Includes                                                             */
50 /*****************************************************************************/
51 
52 /* System Include files */
53 #include <stdio.h>
54 #include <stddef.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <assert.h>
58 
59 /* User Include files */
60 #include "ih264_typedefs.h"
61 #include "iv2.h"
62 #include "ive2.h"
63 #include "ih264e.h"
64 #include "ithread.h"
65 #include "ih264_defs.h"
66 #include "ih264_debug.h"
67 #include "ime_distortion_metrics.h"
68 #include "ime_defs.h"
69 #include "ime_structs.h"
70 #include "ih264_error.h"
71 #include "ih264_structs.h"
72 #include "ih264_trans_quant_itrans_iquant.h"
73 #include "ih264_inter_pred_filters.h"
74 #include "ih264_mem_fns.h"
75 #include "ih264_padding.h"
76 #include "ih264_intra_pred_filters.h"
77 #include "ih264_deblk_edge_filters.h"
78 #include "ih264_cabac_tables.h"
79 #include "ih264_macros.h"
80 #include "ih264_platform_macros.h"
81 #include "ih264_buf_mgr.h"
82 #include "ih264e_defs.h"
83 #include "ih264e_error.h"
84 #include "ih264e_bitstream.h"
85 #include "irc_cntrl_param.h"
86 #include "irc_frame_info_collector.h"
87 #include "ih264e_rate_control.h"
88 #include "ih264e_cabac_structs.h"
89 #include "ih264e_structs.h"
90 #include "ih264e_fmt_conv.h"
91 
92 
93 /*****************************************************************************/
94 /* Function Definitions                                                      */
95 /*****************************************************************************/
96 
ih264e_fmt_conv_420sp_to_rgb565(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD16 * pu2_rgb_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)97 void ih264e_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
98                                      UWORD8 *pu1_uv_src,
99                                      UWORD16 *pu2_rgb_dst,
100                                      WORD32 wd,
101                                      WORD32 ht,
102                                      WORD32 src_y_strd,
103                                      WORD32 src_uv_strd,
104                                      WORD32 dst_strd,
105                                      WORD32 is_u_first)
106 {
107     WORD16 i2_r, i2_g, i2_b;
108     UWORD32 u4_r, u4_g, u4_b;
109     WORD16 i2_i, i2_j;
110     UWORD8 *pu1_y_src_nxt;
111     UWORD16 *pu2_rgb_dst_NextRow;
112 
113     UWORD8 *pu1_u_src, *pu1_v_src;
114 
115     if (is_u_first)
116     {
117         pu1_u_src = (UWORD8 *) pu1_uv_src;
118         pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
119     }
120     else
121     {
122         pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
123         pu1_v_src = (UWORD8 *) pu1_uv_src;
124     }
125 
126     pu1_y_src_nxt = pu1_y_src + src_y_strd;
127     pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
128 
129     for (i2_i = 0; i2_i < (ht >> 1); i2_i++)
130     {
131         for (i2_j = (wd >> 1); i2_j > 0; i2_j--)
132         {
133             i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
134             i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
135                             >> 13;
136             i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
137 
138             pu1_u_src += 2;
139             pu1_v_src += 2;
140             /* pixel 0 */
141             /* B */
142             u4_b = CLIP_U8(*pu1_y_src + i2_b);
143             u4_b >>= 3;
144             /* G */
145             u4_g = CLIP_U8(*pu1_y_src + i2_g);
146             u4_g >>= 2;
147             /* R */
148             u4_r = CLIP_U8(*pu1_y_src + i2_r);
149             u4_r >>= 3;
150 
151             pu1_y_src++;
152             *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
153 
154             /* pixel 1 */
155             /* B */
156             u4_b = CLIP_U8(*pu1_y_src + i2_b);
157             u4_b >>= 3;
158             /* G */
159             u4_g = CLIP_U8(*pu1_y_src + i2_g);
160             u4_g >>= 2;
161             /* R */
162             u4_r = CLIP_U8(*pu1_y_src + i2_r);
163             u4_r >>= 3;
164 
165             pu1_y_src++;
166             *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
167 
168             /* pixel 2 */
169             /* B */
170             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
171             u4_b >>= 3;
172             /* G */
173             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
174             u4_g >>= 2;
175             /* R */
176             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
177             u4_r >>= 3;
178 
179             pu1_y_src_nxt++;
180             *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
181 
182             /* pixel 3 */
183             /* B */
184             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
185             u4_b >>= 3;
186             /* G */
187             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
188             u4_g >>= 2;
189             /* R */
190             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
191             u4_r >>= 3;
192 
193             pu1_y_src_nxt++;
194             *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
195 
196         }
197 
198         pu1_u_src = pu1_u_src + src_uv_strd - wd;
199         pu1_v_src = pu1_v_src + src_uv_strd - wd;
200 
201         pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
202         pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
203 
204         pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
205         pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
206     }
207 
208 }
209 
ih264e_fmt_conv_420sp_to_rgba8888(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD32 * pu4_rgba_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)210 void ih264e_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
211                                        UWORD8 *pu1_uv_src,
212                                        UWORD32 *pu4_rgba_dst,
213                                        WORD32 wd,
214                                        WORD32 ht,
215                                        WORD32 src_y_strd,
216                                        WORD32 src_uv_strd,
217                                        WORD32 dst_strd,
218                                        WORD32 is_u_first)
219 {
220     WORD16 i2_r, i2_g, i2_b;
221     UWORD32 u4_r, u4_g, u4_b;
222     WORD16 i2_i, i2_j;
223     UWORD8 *pu1_y_src_nxt;
224     UWORD32 *pu4_rgba_dst_NextRow;
225     UWORD8 *pu1_u_src, *pu1_v_src;
226 
227     if (is_u_first)
228     {
229         pu1_u_src = (UWORD8 *) pu1_uv_src;
230         pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
231     }
232     else
233     {
234         pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
235         pu1_v_src = (UWORD8 *) pu1_uv_src;
236     }
237 
238     pu1_y_src_nxt = pu1_y_src + src_y_strd;
239 
240     pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
241 
242     for (i2_i = 0; i2_i < (ht >> 1); i2_i++)
243     {
244         for (i2_j = (wd >> 1); i2_j > 0; i2_j--)
245         {
246             i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
247             i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
248                             >> 13;
249             i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
250 
251             pu1_u_src += 2;
252             pu1_v_src += 2;
253             /* pixel 0 */
254             /* B */
255             u4_b = CLIP_U8(*pu1_y_src + i2_b);
256             /* G */
257             u4_g = CLIP_U8(*pu1_y_src + i2_g);
258             /* R */
259             u4_r = CLIP_U8(*pu1_y_src + i2_r);
260 
261             pu1_y_src++;
262             *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
263 
264             /* pixel 1 */
265             /* B */
266             u4_b = CLIP_U8(*pu1_y_src + i2_b);
267             /* G */
268             u4_g = CLIP_U8(*pu1_y_src + i2_g);
269             /* R */
270             u4_r = CLIP_U8(*pu1_y_src + i2_r);
271 
272             pu1_y_src++;
273             *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
274 
275             /* pixel 2 */
276             /* B */
277             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
278             /* G */
279             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
280             /* R */
281             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
282 
283             pu1_y_src_nxt++;
284             *pu4_rgba_dst_NextRow++ =
285                             ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
286 
287             /* pixel 3 */
288             /* B */
289             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
290             /* G */
291             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
292             /* R */
293             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
294 
295             pu1_y_src_nxt++;
296             *pu4_rgba_dst_NextRow++ =
297                             ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
298 
299         }
300 
301         pu1_u_src = pu1_u_src + src_uv_strd - wd;
302         pu1_v_src = pu1_v_src + src_uv_strd - wd;
303 
304         pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
305         pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
306 
307         pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
308         pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
309     }
310 
311 }
312 
313 /**
314 *******************************************************************************
315 *
316 * @brief Function used for copying a 420SP buffer
317 *
318 * @par   Description
319 *  Function used for copying a 420SP buffer
320 *
321 * @param[in] pu1_y_src
322 *  Input Y pointer
323 *
324 * @param[in] pu1_uv_src
325 *  Input UV pointer (UV is interleaved either in UV or VU format)
326 *
327 * @param[in] pu1_y_dst
328 *  Output Y pointer
329 *
330 * @param[in] pu1_uv_dst
331 *  Output UV pointer (UV is interleaved in the same format as that of input)
332 *
333 * @param[in] wd
334 *  Width
335 *
336 * @param[in] ht
337 *  Height
338 *
339 * @param[in] src_y_strd
340 *  Input Y Stride
341 *
342 * @param[in] src_uv_strd
343 *  Input UV stride
344 *
345 * @param[in] dst_y_strd
346 *  Output Y stride
347 *
348 * @param[in] dst_uv_strd
349 *  Output UV stride
350 *
351 * @returns None
352 *
353 * @remarks In case there is a need to perform partial frame copy then
354 * by passion appropriate source and destination pointers and appropriate
355 * values for wd and ht it can be done
356 *
357 *******************************************************************************
358 */
ih264e_fmt_conv_420sp_to_420sp(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)359 void ih264e_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
360                                     UWORD8 *pu1_uv_src,
361                                     UWORD8 *pu1_y_dst,
362                                     UWORD8 *pu1_uv_dst,
363                                     WORD32 wd,
364                                     WORD32 ht,
365                                     WORD32 src_y_strd,
366                                     WORD32 src_uv_strd,
367                                     WORD32 dst_y_strd,
368                                     WORD32 dst_uv_strd)
369 {
370     UWORD8 *pu1_src, *pu1_dst;
371     WORD32 num_rows, num_cols, src_strd, dst_strd;
372     WORD32 i;
373 
374     /* copy luma */
375     pu1_src = (UWORD8 *) pu1_y_src;
376     pu1_dst = (UWORD8 *) pu1_y_dst;
377 
378     num_rows = ht;
379     num_cols = wd;
380 
381     src_strd = src_y_strd;
382     dst_strd = dst_y_strd;
383 
384     for (i = 0; i < num_rows; i++)
385     {
386         memcpy(pu1_dst, pu1_src, num_cols);
387         pu1_dst += dst_strd;
388         pu1_src += src_strd;
389     }
390 
391     /* copy U and V */
392     pu1_src = (UWORD8 *) pu1_uv_src;
393     pu1_dst = (UWORD8 *) pu1_uv_dst;
394 
395     num_rows = ht >> 1;
396     num_cols = wd;
397 
398     src_strd = src_uv_strd;
399     dst_strd = dst_uv_strd;
400 
401     for (i = 0; i < num_rows; i++)
402     {
403         memcpy(pu1_dst, pu1_src, num_cols);
404         pu1_dst += dst_strd;
405         pu1_src += src_strd;
406     }
407     return;
408 }
409 
410 
ih264e_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)411 void ih264e_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
412                                             UWORD8 *pu1_uv_src,
413                                             UWORD8 *pu1_y_dst,
414                                             UWORD8 *pu1_uv_dst,
415                                             WORD32 wd,
416                                             WORD32 ht,
417                                             WORD32 src_y_strd,
418                                             WORD32 src_uv_strd,
419                                             WORD32 dst_y_strd,
420                                             WORD32 dst_uv_strd)
421 {
422     UWORD8 *pu1_src, *pu1_dst;
423     WORD32 num_rows, num_cols, src_strd, dst_strd;
424     WORD32 i;
425 
426     /* copy luma */
427     pu1_src = (UWORD8 *) pu1_y_src;
428     pu1_dst = (UWORD8 *) pu1_y_dst;
429 
430     num_rows = ht;
431     num_cols = wd;
432 
433     src_strd = src_y_strd;
434     dst_strd = dst_y_strd;
435 
436     for (i = 0; i < num_rows; i++)
437     {
438         memcpy(pu1_dst, pu1_src, num_cols);
439         pu1_dst += dst_strd;
440         pu1_src += src_strd;
441     }
442 
443     /* copy U and V */
444     pu1_src = (UWORD8 *) pu1_uv_src;
445     pu1_dst = (UWORD8 *) pu1_uv_dst;
446 
447     num_rows = ht >> 1;
448     num_cols = wd;
449 
450     src_strd = src_uv_strd;
451     dst_strd = dst_uv_strd;
452 
453     for (i = 0; i < num_rows; i++)
454     {
455         WORD32 j;
456         for (j = 0; j < num_cols; j += 2)
457         {
458             pu1_dst[j + 0] = pu1_src[j + 1];
459             pu1_dst[j + 1] = pu1_src[j + 0];
460         }
461         pu1_dst += dst_strd;
462         pu1_src += src_strd;
463     }
464     return;
465 }
466 
ih264e_fmt_conv_420sp_to_420p(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd,WORD32 is_u_first,WORD32 disable_luma_copy)467 void ih264e_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
468                                    UWORD8 *pu1_uv_src,
469                                    UWORD8 *pu1_y_dst,
470                                    UWORD8 *pu1_u_dst,
471                                    UWORD8 *pu1_v_dst,
472                                    WORD32 wd,
473                                    WORD32 ht,
474                                    WORD32 src_y_strd,
475                                    WORD32 src_uv_strd,
476                                    WORD32 dst_y_strd,
477                                    WORD32 dst_uv_strd,
478                                    WORD32 is_u_first,
479                                    WORD32 disable_luma_copy)
480 {
481     UWORD8 *pu1_src, *pu1_dst;
482     UWORD8 *pu1_u_src, *pu1_v_src;
483     WORD32 num_rows, num_cols, src_strd, dst_strd;
484     WORD32 i, j;
485 
486     if (0 == disable_luma_copy)
487     {
488         /* copy luma */
489         pu1_src = (UWORD8 *) pu1_y_src;
490         pu1_dst = (UWORD8 *) pu1_y_dst;
491 
492         num_rows = ht;
493         num_cols = wd;
494 
495         src_strd = src_y_strd;
496         dst_strd = dst_y_strd;
497 
498         for (i = 0; i < num_rows; i++)
499         {
500             memcpy(pu1_dst, pu1_src, num_cols);
501             pu1_dst += dst_strd;
502             pu1_src += src_strd;
503         }
504     }
505     /* de-interleave U and V and copy to destination */
506     if (is_u_first)
507     {
508         pu1_u_src = (UWORD8 *) pu1_uv_src;
509         pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
510     }
511     else
512     {
513         pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
514         pu1_v_src = (UWORD8 *) pu1_uv_src;
515     }
516 
517     num_rows = ht >> 1;
518     num_cols = wd >> 1;
519 
520     src_strd = src_uv_strd;
521     dst_strd = dst_uv_strd;
522 
523     for (i = 0; i < num_rows; i++)
524     {
525         for (j = 0; j < num_cols; j++)
526         {
527             pu1_u_dst[j] = pu1_u_src[j * 2];
528             pu1_v_dst[j] = pu1_v_src[j * 2];
529         }
530 
531         pu1_u_dst += dst_strd;
532         pu1_v_dst += dst_strd;
533         pu1_u_src += src_strd;
534         pu1_v_src += src_strd;
535     }
536     return;
537 }
538 
539 /**
540 *******************************************************************************
541 *
542 * @brief Function used to perform color space conversion from 420P to 420SP
543 *
544 * @par   Description
545 * Function used to perform color space conversion from 420P to 420SP
546 *
547 * @param[in] pu1_y_src
548 *  Input Y pointer
549 *
550 * @param[in] pu1_u_src
551 *  Input U pointer
552 *
553 * @param[in] pu1_v_dst
554 *  Input V pointer
555 *
556 * @param[in] pu1_y_dst
557 *  Output Y pointer
558 *
559 * @param[in] pu1_uv_dst
560 *  Output UV pointer
561 *
562 * @param[in] u4_width
563 *  Width
564 *
565 * @param[in] u4_height
566 *  Height
567 *
568 * @param[in] src_y_strd
569 *  Input Y Stride
570 *
571 * @param[in] src_u_strd
572 *  Input U stride
573 *
574 * @param[in] src_v_strd
575 *  Input V stride
576 *
577 * @param[in] dst_y_strd
578 *  Output Y stride
579 *
580 * @param[in] dst_uv_strd
581 *  Output UV stride
582 *
583 * @param[in] convert_uv_only
584 *  Flag to indicate if only UV copy needs to be done
585 *
586 * @returns none
587 *
588 * @remarks In case there is a need to perform partial frame copy then
589 * by passion appropriate source and destination pointers and appropriate
590 * values for wd and ht it can be done
591 *
592 *******************************************************************************
593 */
ih264e_fmt_conv_420p_to_420sp(UWORD8 * pu1_y_src,UWORD8 * pu1_u_src,UWORD8 * pu1_v_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,UWORD16 u2_height,UWORD16 u2_width,UWORD16 src_y_strd,UWORD16 src_u_strd,UWORD16 src_v_strd,UWORD16 dst_y_strd,UWORD16 dst_uv_strd,UWORD32 convert_uv_only)594 void ih264e_fmt_conv_420p_to_420sp(UWORD8 *pu1_y_src,
595                                    UWORD8 *pu1_u_src,
596                                    UWORD8 *pu1_v_src,
597                                    UWORD8 *pu1_y_dst,
598                                    UWORD8 *pu1_uv_dst,
599                                    UWORD16 u2_height,
600                                    UWORD16 u2_width,
601                                    UWORD16 src_y_strd,
602                                    UWORD16 src_u_strd,
603                                    UWORD16 src_v_strd,
604                                    UWORD16 dst_y_strd,
605                                    UWORD16 dst_uv_strd,
606                                    UWORD32 convert_uv_only)
607 {
608     UWORD8 *pu1_src, *pu1_dst;
609     UWORD8 *pu1_src_u, *pu1_src_v;
610     UWORD16 i;
611     UWORD32 u2_width_uv;
612     UWORD32 dest_inc_Y = 0, dest_inc_UV = 0;
613 
614     dest_inc_UV = dst_uv_strd;
615 
616     if (0 == convert_uv_only)
617     {
618 
619         /* Copy Y buffer */
620         pu1_dst = (UWORD8 *) pu1_y_dst;
621         pu1_src = (UWORD8 *) pu1_y_src;
622 
623         dest_inc_Y = dst_y_strd;
624 
625         for (i = 0; i < u2_height; i++)
626         {
627             memcpy((void *) pu1_dst, (void *) pu1_src, u2_width);
628             pu1_dst += dest_inc_Y;
629             pu1_src += src_y_strd;
630         }
631     }
632 
633     /* Interleave Cb and Cr buffers */
634     pu1_src_u = pu1_u_src;
635     pu1_src_v = pu1_v_src;
636     pu1_dst = pu1_uv_dst;
637 
638     u2_height = (u2_height + 1) >> 1;
639     u2_width_uv = (u2_width + 1) >> 1;
640     for (i = 0; i < u2_height; i++)
641     {
642         UWORD32 j;
643         for (j = 0; j < u2_width_uv; j++)
644         {
645             *pu1_dst++ = *pu1_src_u++;
646             *pu1_dst++ = *pu1_src_v++;
647         }
648 
649         pu1_dst += dest_inc_UV - u2_width;
650         pu1_src_u += src_u_strd - u2_width_uv;
651         pu1_src_v += src_v_strd - u2_width_uv;
652     }
653 }
654 
655 /**
656 *******************************************************************************
657 *
658 * @brief Function used to convert 422 interleaved to 420sp
659 *
660 * @par   Description
661 *  Function used to convert 422 interleaved to 420sp
662 *
663 * @param[in] pu1_y_buf
664 *  Output Y pointer
665 *
666 * @param[in] pu1_u_buf
667 *  Output u pointer
668 *
669 * @param[in[ pu1_v_buf
670 *  Output V pointer
671 *
672 * @param[in] pu1_422i_buf
673 *  Input 422i pointer
674 *
675 * @param[in] u4_y_width
676 *  Width of Y component
677 *
678 * @param[in] u4_y_height
679 *  Height of Y component
680 *
681 * @param[in] u4_y_stride
682 *  Stride of pu1_y_buf
683 *
684 * @param[in] u4_u_stride
685 *  Stride of pu1_u_buf
686 *
687 * @param[in] u4_v_stride
688 *  Stride of pu1_v_buf
689 *
690 * @param[in] u4_422i_stride
691 *  Stride of pu1_422i_buf
692 *
693 * @returns None
694 *
695 * @remarks For conversion
696 * pu1_v_buf = pu1_u_buf+1
697 * u4_u_stride = u4_v_stride
698 *
699 * The extra parameters are for maintaining API with assembly function
700 *
701 *******************************************************************************
702 */
ih264e_fmt_conv_422i_to_420sp(UWORD8 * pu1_y_buf,UWORD8 * pu1_u_buf,UWORD8 * pu1_v_buf,UWORD8 * pu1_422i_buf,WORD32 u4_y_width,WORD32 u4_y_height,WORD32 u4_y_stride,WORD32 u4_u_stride,WORD32 u4_v_stride,WORD32 u4_422i_stride)703 void ih264e_fmt_conv_422i_to_420sp(UWORD8 *pu1_y_buf,
704                                    UWORD8 *pu1_u_buf,
705                                    UWORD8 *pu1_v_buf,
706                                    UWORD8 *pu1_422i_buf,
707                                    WORD32 u4_y_width,
708                                    WORD32 u4_y_height,
709                                    WORD32 u4_y_stride,
710                                    WORD32 u4_u_stride,
711                                    WORD32 u4_v_stride,
712                                    WORD32 u4_422i_stride)
713 {
714     WORD32 row, col;
715     UWORD8 *row_even_422 = pu1_422i_buf;
716     UWORD8 *row_odd_422 = row_even_422 + (u4_422i_stride << 1);
717     UWORD8 *row_even_luma = pu1_y_buf;
718     /* Since at the end of loop, we have row_even_luma += (luma_width << 1),
719      * it should be same here right? */
720     UWORD8 *row_odd_luma = row_even_luma + u4_y_stride;
721     UWORD8 *row_cb = pu1_u_buf;
722     UWORD8 *row_cr = pu1_v_buf;
723 
724     for (row = 0; row < u4_y_height; row = row + 2)
725     {
726         for (col = 0; col < (u4_y_width << 1); col = col + 4)
727         {
728             UWORD8 cb_even = row_even_422[col];
729             UWORD8 cr_even = row_even_422[col + 2];
730 
731             row_cb[col >> 1] = cb_even;
732             row_cr[col >> 1] = cr_even;
733 
734             row_even_luma[col >> 1] = row_even_422[col + 1];
735             row_even_luma[(col >> 1) + 1] = row_even_422[col + 3];
736 
737             row_odd_luma[col >> 1] = row_odd_422[col + 1];
738             row_odd_luma[(col >> 1) + 1] = row_odd_422[col + 3];
739         }
740 
741         row_even_422 += (u4_422i_stride << 2);
742         row_odd_422 += (u4_422i_stride << 2);
743 
744         row_even_luma += (u4_y_stride << 1);
745         row_odd_luma += (u4_y_stride << 1);
746 
747         row_cb += u4_u_stride;
748         row_cr += u4_v_stride;
749     }
750 }
751 
752 /**
753 *******************************************************************************
754 *
755 * @brief Function used from format conversion or frame copy
756 *
757 * @par   Description
758 * Function used from copying or converting a reference frame to display buffer
759 * in non shared mode
760 *
761 * @param[in] pu1_y_dst
762 *  Output Y pointer
763 *
764 * @param[in] pu1_u_dst
765 *  Output U/UV pointer ( UV is interleaved in the same format as that of input)
766 *
767 * @param[in] pu1_v_dst
768 *  Output V pointer ( used in 420P output case)
769 *
770 * @param[in] u4_dst_y_strd
771 *  Stride of destination Y buffer
772 *
773 * @param[in] u4_dst_u_strd
774 *  Stride of destination  U/V buffer
775 *
776 * @param[in] blocking
777 *  To indicate whether format conversion should wait till frame is reconstructed
778 *  and then return after complete copy is done. To be set to 1 when called at the
779 *  end of frame processing and set to 0 when called between frame processing modules
780 *  in order to utilize available MCPS
781 *
782 * @returns error status
783 *
784 * @remarks
785 * Assumes that the stride of U and V buffers are same.
786 * This is correct in most cases
787 * If a case comes where this is not true we need to modify the fmt conversion
788 * functions called inside also
789 *
790 *******************************************************************************
791 */
ih264e_fmt_conv(codec_t * ps_codec,pic_buf_t * ps_pic,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,UWORD32 u4_dst_y_strd,UWORD32 u4_dst_uv_strd,WORD32 cur_row,WORD32 num_rows)792 IH264E_ERROR_T ih264e_fmt_conv(codec_t *ps_codec,
793                                pic_buf_t *ps_pic,
794                                UWORD8 *pu1_y_dst,
795                                UWORD8 *pu1_u_dst,
796                                UWORD8 *pu1_v_dst,
797                                UWORD32 u4_dst_y_strd,
798                                UWORD32 u4_dst_uv_strd,
799                                WORD32 cur_row,
800                                WORD32 num_rows)
801 {
802     IH264E_ERROR_T ret = IH264E_SUCCESS;
803     UWORD8 *pu1_y_src, *pu1_uv_src;
804     UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
805     UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
806     UWORD16 *pu2_rgb_dst_tmp;
807     UWORD32 *pu4_rgb_dst_tmp;
808     WORD32 is_u_first;
809     UWORD8 *pu1_luma;
810     UWORD8 *pu1_chroma;
811     WORD32 dst_stride, wd;
812 
813 
814     if (0 == num_rows)
815         return ret;
816 
817     pu1_luma = ps_pic->pu1_luma;
818     pu1_chroma = ps_pic->pu1_chroma;
819 
820 
821     dst_stride = ps_codec->s_cfg.u4_wd;
822     wd = ps_codec->s_cfg.u4_disp_wd;
823     is_u_first = (IV_YUV_420SP_UV == ps_codec->e_codec_color_format) ? 1 : 0;
824 
825     /* In case of 420P output luma copy is disabled for shared mode */
826     {
827         pu1_y_src = pu1_luma + cur_row * ps_codec->i4_rec_strd;
828         pu1_uv_src = pu1_chroma + (cur_row / 2) * ps_codec->i4_rec_strd;
829 
830         pu2_rgb_dst_tmp = (UWORD16 *) pu1_y_dst;
831         pu2_rgb_dst_tmp += cur_row * dst_stride;
832         pu4_rgb_dst_tmp = (UWORD32 *) pu1_y_dst;
833         pu4_rgb_dst_tmp += cur_row * dst_stride;
834 
835         pu1_y_dst_tmp = pu1_y_dst + cur_row * u4_dst_y_strd;
836         pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
837         pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
838         pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * u4_dst_uv_strd;
839 
840         /* If the call is non-blocking and there are no rows to be copied then return */
841         /* In non-shared mode, reference buffers are in 420SP UV format,
842          * if output also is in 420SP_UV, then just copy
843          * if output is in 420SP_VU then swap UV values
844          */
845         if ((IV_YUV_420SP_UV == ps_codec->s_cfg.e_recon_color_fmt) ||
846                         (IV_YUV_420SP_VU == ps_codec->s_cfg.e_recon_color_fmt))
847         {
848             ih264e_fmt_conv_420sp_to_420sp(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp,
849                                            pu1_uv_dst_tmp, wd, num_rows,
850                                            ps_codec->i4_rec_strd,
851                                            ps_codec->i4_rec_strd, u4_dst_y_strd,
852                                            u4_dst_uv_strd);
853         }
854         else if (IV_YUV_420P == ps_codec->s_cfg.e_recon_color_fmt)
855         {
856             ih264e_fmt_conv_420sp_to_420p(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp,
857                                           pu1_u_dst_tmp, pu1_v_dst_tmp, wd,
858                                           num_rows, ps_codec->i4_rec_strd,
859                                           ps_codec->i4_rec_strd, u4_dst_y_strd,
860                                           u4_dst_uv_strd, is_u_first, 0);
861         }
862     }
863     return(ret);
864 }
865 
866