1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_intra_modes_eval.c
25 *
26 * @brief
27 * This file contains definitions of routines that perform rate distortion
28 * analysis on a macroblock if they are to be coded as intra.
29 *
30 * @author
31 * ittiam
32 *
33 * @par List of Functions:
34 * - ih264e_derive_neighbor_availability_of_mbs()
35 * - ih264e_derive_ngbr_avbl_of_mb_partitions()
36 * - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff()
37 * - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff()
38 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff()
39 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton()
40 * - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff()
41 * - ih264e_evaluate_intra16x16_modes()
42 * - ih264e_evaluate_intra4x4_modes()
43 * - ih264e_evaluate_intra_chroma_modes()
44 *
45 * @remarks
46 * None
47 *
48 *******************************************************************************
49 */
50
51 /*****************************************************************************/
52 /* File Includes */
53 /*****************************************************************************/
54
55 /* System include files */
56 #include <stdio.h>
57 #include <string.h>
58 #include <limits.h>
59 #include <assert.h>
60
61 /* User include files */
62 #include "ih264e_config.h"
63 #include "ih264_typedefs.h"
64 #include "ih264e_defs.h"
65 #include "iv2.h"
66 #include "ive2.h"
67 #include "ih264_debug.h"
68 #include "ih264_defs.h"
69 #include "ih264_macros.h"
70 #include "ih264_intra_pred_filters.h"
71 #include "ih264_structs.h"
72 #include "ih264_common_tables.h"
73 #include "ih264_trans_quant_itrans_iquant.h"
74 #include "ih264_inter_pred_filters.h"
75 #include "ih264_mem_fns.h"
76 #include "ih264_padding.h"
77 #include "ih264_deblk_edge_filters.h"
78 #include "ih264_cabac_tables.h"
79 #include "ime_distortion_metrics.h"
80 #include "ih264e_error.h"
81 #include "ih264e_bitstream.h"
82 #include "ime_defs.h"
83 #include "ime_structs.h"
84 #include "irc_cntrl_param.h"
85 #include "irc_frame_info_collector.h"
86 #include "ih264e_rate_control.h"
87 #include "ih264e_cabac_structs.h"
88 #include "ih264e_structs.h"
89 #include "ih264e_intra_modes_eval.h"
90 #include "ih264e_globals.h"
91 #include "ime_platform_macros.h"
92
93
94 /*****************************************************************************/
95 /* Function Definitions */
96 /*****************************************************************************/
97
98 /**
99 ******************************************************************************
100 *
101 * @brief
102 * derivation process for macroblock availability
103 *
104 * @par Description
105 * Calculates the availability of the left, top, topright and topleft macroblocks.
106 *
107 * @param[in] ps_proc_ctxt
108 * pointer to proc context (handle)
109 *
110 * @remarks Based on section 6.4.5 in H264 spec
111 *
112 * @return none
113 *
114 ******************************************************************************
115 */
ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t * ps_proc)116 void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc)
117 {
118 UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx;
119 UWORD8 *pu1_slice_idx_b;
120 UWORD8 *pu1_slice_idx_a;
121 UWORD8 *pu1_slice_idx_c;
122 UWORD8 *pu1_slice_idx_d;
123 block_neighbors_t *ps_ngbr_avbl;
124 WORD32 i4_mb_x, i4_mb_y;
125 WORD32 i4_wd_mbs;
126
127 i4_mb_x = ps_proc->i4_mb_x;
128 i4_mb_y = ps_proc->i4_mb_y;
129
130 i4_wd_mbs = ps_proc->i4_wd_mbs;
131
132 pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x;
133 pu1_slice_idx_a = pu1_slice_idx_curr - 1;
134 pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs;
135 pu1_slice_idx_c = pu1_slice_idx_b + 1;
136 pu1_slice_idx_d = pu1_slice_idx_b - 1;
137 ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
138
139 /**********************************************************************/
140 /* The macroblock is marked as available, unless one of the following */
141 /* conditions is true in which case the macroblock shall be marked as */
142 /* not available. */
143 /* 1. mbAddr < 0 */
144 /* 2 mbAddr > CurrMbAddr */
145 /* 3. the macroblock with address mbAddr belongs to a different slice */
146 /* than the macroblock with address CurrMbAddr */
147 /**********************************************************************/
148
149 /* left macroblock availability */
150 if (i4_mb_x == 0)
151 { /* macroblocks along first column */
152 ps_ngbr_avbl->u1_mb_a = 0;
153 }
154 else
155 { /* macroblocks belong to same slice? */
156 if (*pu1_slice_idx_a != *pu1_slice_idx_curr)
157 ps_ngbr_avbl->u1_mb_a = 0;
158 else
159 ps_ngbr_avbl->u1_mb_a = 1;
160 }
161
162 /* top macroblock availability */
163 if (i4_mb_y == 0)
164 { /* macroblocks along first row */
165 ps_ngbr_avbl->u1_mb_b = 0;
166 }
167 else
168 { /* macroblocks belong to same slice? */
169 if (*pu1_slice_idx_b != *pu1_slice_idx_curr)
170 ps_ngbr_avbl->u1_mb_b = 0;
171 else
172 ps_ngbr_avbl->u1_mb_b = 1;
173 }
174
175 /* top right macroblock availability */
176 if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0)
177 { /* macroblocks along last column */
178 ps_ngbr_avbl->u1_mb_c = 0;
179 }
180 else
181 { /* macroblocks belong to same slice? */
182 if (*pu1_slice_idx_c != *pu1_slice_idx_curr)
183 ps_ngbr_avbl->u1_mb_c = 0;
184 else
185 ps_ngbr_avbl->u1_mb_c = 1;
186 }
187
188 /* top left macroblock availability */
189 if (i4_mb_x == 0 || i4_mb_y == 0)
190 { /* macroblocks along first column */
191 ps_ngbr_avbl->u1_mb_d = 0;
192 }
193 else
194 { /* macroblocks belong to same slice? */
195 if (*pu1_slice_idx_d != *pu1_slice_idx_curr)
196 ps_ngbr_avbl->u1_mb_d = 0;
197 else
198 ps_ngbr_avbl->u1_mb_d = 1;
199 }
200 }
201
202 /**
203 ******************************************************************************
204 *
205 * @brief
206 * derivation process for subblock/partition availability
207 *
208 * @par Description
209 * Calculates the availability of the left, top, topright and topleft subblock
210 * or partitions.
211 *
212 * @param[in] ps_proc_ctxt
213 * pointer to macroblock context (handle)
214 *
215 * @param[in] i1_pel_pos_x
216 * column position of the pel wrt the current block
217 *
218 * @param[in] i1_pel_pos_y
219 * row position of the pel in wrt current block
220 *
221 * @remarks Assumptions: before calling this function it is assumed that
222 * the neighbor availability of the current macroblock is already derived.
223 * Based on table 6-3 of H264 specification
224 *
225 * @return availability status (yes or no)
226 *
227 ******************************************************************************
228 */
ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t * ps_ngbr_avbl,WORD8 i1_pel_pos_x,WORD8 i1_pel_pos_y)229 UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl,
230 WORD8 i1_pel_pos_x,
231 WORD8 i1_pel_pos_y)
232 {
233 UWORD8 u1_neighbor_avail=0;
234
235 /**********************************************************************/
236 /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */
237 /* various columns of a macroblock */
238 /* */
239 /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */
240 /* various rows of a macroblock */
241 /* */
242 /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */
243 /* outside the bound of an mb ie., represents its neighbors. */
244 /**********************************************************************/
245 if (i1_pel_pos_x < 0)
246 { /* column(-1) */
247 if (i1_pel_pos_y < 0)
248 { /* row(-1) */
249 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
250 }
251 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
252 { /* all rows of a macroblock */
253 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
254 }
255 else /* if (i1_pel_pos_y >= 16) */
256 { /* rows(+16) */
257 u1_neighbor_avail = 0; /* current mb bottom left availability */
258 }
259 }
260 else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
261 { /* all columns of a macroblock */
262 if (i1_pel_pos_y < 0)
263 { /* row(-1) */
264 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
265 }
266 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
267 { /* all rows of a macroblock */
268 u1_neighbor_avail = 1; /* current mb availability */
269 /* availability of the partition is dependent on the position of the partition inside the mb */
270 /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */
271 }
272 else /* if (i1_pel_pos_y >= 16) */
273 { /* rows(+16) */
274 u1_neighbor_avail = 0; /* current mb bottom availability */
275 }
276 }
277 else if (i1_pel_pos_x >= 16)
278 { /* column(+16) */
279 if (i1_pel_pos_y < 0)
280 { /* row(-1) */
281 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
282 }
283 else /* if (i1_pel_pos_y >= 0) */
284 { /* all other rows */
285 u1_neighbor_avail = 0; /* current mb right & bottom right availability */
286 }
287 }
288
289 return u1_neighbor_avail;
290 }
291
292 /**
293 ******************************************************************************
294 *
295 * @brief
296 * evaluate best intra 16x16 mode (rate distortion opt off)
297 *
298 * @par Description
299 * This function evaluates all the possible intra 16x16 modes and finds the mode
300 * that best represents the macro-block (least distortion) and occupies fewer
301 * bits in the bit-stream.
302 *
303 * @param[in] ps_proc_ctxt
304 * pointer to process context (handle)
305 *
306 * @remarks
307 * Ideally the cost of encoding a macroblock is calculated as
308 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
309 * input block and the reconstructed block and rate is the number of bits taken
310 * to place the macroblock in the bit-stream. In this routine the rate does not
311 * exactly point to the total number of bits it takes, rather it points to header
312 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
313 * and residual bits fall in to texture bits the number of bits taken to encoding
314 * mbtype is considered as rate, we compute cost. Further we will approximate
315 * the distortion as the deviation b/w input and the predicted block as opposed
316 * to input and reconstructed block.
317 *
318 * NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
319 * the SAD and cost are one and the same.
320 *
321 * @return none
322 *
323 ******************************************************************************
324 */
325
ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)326 void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
327 {
328 /* Codec Context */
329 codec_t *ps_codec = ps_proc->ps_codec;
330
331 /* SAD(distortion metric) of an 8x8 block */
332 WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
333
334 /* lambda */
335 UWORD32 u4_lambda = ps_proc->u4_lambda;
336
337 /* cost = distortion + lambda*rate */
338 WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX;
339
340 /* intra mode */
341 UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
342
343 /* neighbor pels for intra prediction */
344 UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
345
346 /* neighbor availability */
347 WORD32 i4_ngbr_avbl;
348
349 /* pointer to src macro block */
350 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
351 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
352
353 /* pointer to prediction macro block */
354 UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
355 UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
356
357 /* strides */
358 WORD32 i4_src_strd = ps_proc->i4_src_strd;
359 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
360 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
361
362 /* pointer to neighbors left, top, topleft */
363 UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
364 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
365 UWORD8 *pu1_mb_d = pu1_mb_b - 1;
366
367 /* valid intra modes map */
368 UWORD32 u4_valid_intra_modes;
369
370 /* lut for valid intra modes */
371 const UWORD8 u1_valid_intra_modes[8] = {4, 6, 12, 14, 5, 7, 13, 15};
372
373 /* temp var */
374 UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
375
376 /* init temp var */
377 if (ps_proc->i4_slice_type != ISLICE)
378 {
379 /* Offset for MBtype */
380 offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23;
381 u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
382 }
383
384 /* locating neighbors that are available for prediction */
385 /* TODO : update the neighbor availability information basing on constrained intra pred information */
386 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines
387 * basing on neighbors available and hence evade the computation of neighbor availability totally. */
388 /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
389 i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1);
390 ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
391
392 /* gather prediction pels from the neighbors, if particular set is not available
393 * it is set to zero*/
394 /* left pels */
395 if (ps_proc->ps_ngbr_avbl->u1_mb_a)
396 {
397 for(i = 0; i < 16; i++)
398 pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd];
399 }
400 else
401 {
402 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE);
403 }
404 /* top pels */
405 if (ps_proc->ps_ngbr_avbl->u1_mb_b)
406 {
407 ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16);
408 /*for(i = 0; i < 16; i++)
409 pu1_ngbr_pels_i16[16+1+i] = pu1_mb_b[i];*/
410 }
411 else
412 {
413 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE);
414 }
415 /* topleft pels */
416 if (ps_proc->ps_ngbr_avbl->u1_mb_d)
417 pu1_ngbr_pels_i16[16] = *pu1_mb_d;
418 else
419 pu1_ngbr_pels_i16[16] = 0;
420
421 /* set valid intra modes for evaluation */
422 // u4_valid_intra_modes = 15;
423 //// ih264e_filter_intra16x16modes(pu1_mb_curr, i4_src_strd, &u4_valid_intra_modes);
424 // if (!ps_proc->ps_ngbr_avbl->u1_mb_a)
425 // u4_valid_intra_modes &= ~(1 << HORZ_I16x16);
426 // if (!ps_proc->ps_ngbr_avbl->u1_mb_b)
427 // u4_valid_intra_modes &= ~(1 << VERT_I16x16);
428 //// if (!ps_proc->ps_ngbr_avbl->u1_mb_a || !ps_proc->ps_ngbr_avbl->u1_mb_b || !ps_proc->ps_ngbr_avbl->u1_mb_d)
429 // if (i4_ngbr_avbl != 7)
430 // u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
431
432 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
433
434 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
435 u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
436
437 /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
438 ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16,
439 i4_src_strd, i4_pred_strd,
440 i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least,
441 u4_valid_intra_modes);
442
443 /* cost = distortion + lambda*rate */
444 i4_mb_cost_least = i4_mb_distortion_least;
445
446 if (( (u4_valid_intra_modes >> 3) & 1) != 0 && (ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST ||
447 ps_proc->i4_slice_type == ISLICE))
448 {
449 /* intra prediction for PLANE mode*/
450 (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
451
452 /* evaluate distortion between the actual blk and the estimated blk for the given mode */
453 ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion);
454
455 /* cost = distortion + lambda*rate */
456 i4_mb_cost = i4_mb_distortion;
457
458 /* update the least cost information if necessary */
459 if(i4_mb_cost < i4_mb_distortion_least)
460 {
461 u4_intra_mode = PLANE_I16x16;
462
463 i4_mb_cost_least = i4_mb_cost;
464 i4_mb_distortion_least = i4_mb_distortion;
465 }
466 }
467
468 u4_best_intra_16x16_mode = u4_intra_mode;
469
470 DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
471
472 ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
473
474 /* cost = distortion + lambda*rate */
475 i4_mb_cost_least = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode];
476
477
478 /* update the type of the mb if necessary */
479 if (i4_mb_cost_least < ps_proc->i4_mb_cost)
480 {
481 ps_proc->i4_mb_cost = i4_mb_cost_least;
482 ps_proc->i4_mb_distortion = i4_mb_distortion_least;
483 ps_proc->u4_mb_type = I16x16;
484 }
485
486 return ;
487 }
488
489
490 /**
491 ******************************************************************************
492 *
493 * @brief
494 * evaluate best intra 8x8 mode (rate distortion opt on)
495 *
496 * @par Description
497 * This function evaluates all the possible intra 8x8 modes and finds the mode
498 * that best represents the macro-block (least distortion) and occupies fewer
499 * bits in the bit-stream.
500 *
501 * @param[in] ps_proc_ctxt
502 * pointer to proc ctxt
503 *
504 * @remarks Ideally the cost of encoding a macroblock is calculated as
505 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
506 * input block and the reconstructed block and rate is the number of bits taken
507 * to place the macroblock in the bit-stream. In this routine the rate does not
508 * exactly point to the total number of bits it takes, rather it points to header
509 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
510 * and residual bits fall in to texture bits the number of bits taken to encoding
511 * mbtype is considered as rate, we compute cost. Further we will approximate
512 * the distortion as the deviation b/w input and the predicted block as opposed
513 * to input and reconstructed block.
514 *
515 * NOTE: TODO: This function needs to be tested
516 *
517 * @return none
518 *
519 ******************************************************************************
520 */
ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)521 void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
522 {
523 /* Codec Context */
524 codec_t *ps_codec = ps_proc->ps_codec;
525
526 /* SAD(distortion metric) of an 4x4 block */
527 WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
528
529 /* lambda */
530 UWORD32 u4_lambda = ps_proc->u4_lambda;
531
532 /* cost = distortion + lambda*rate */
533 WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
534
535 /* cost due to mbtype */
536 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
537
538 /* intra mode */
539 UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
540
541 /* neighbor pels for intra prediction */
542 UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
543
544 /* pointer to curr partition */
545 UWORD8 *pu1_mb_curr;
546
547 /* pointer to prediction macro block */
548 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
549
550 /* strides */
551 WORD32 i4_src_strd = ps_proc->i4_src_strd;
552 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
553
554 /* neighbors left, top, top right, top left */
555 UWORD8 *pu1_mb_a;
556 UWORD8 *pu1_mb_b;
557 UWORD8 *pu1_mb_d;
558
559 /* neighbor availability */
560 WORD32 i4_ngbr_avbl;
561 block_neighbors_t s_ngbr_avbl;
562
563 /* temp vars */
564 UWORD32 b8, u4_pix_x, u4_pix_y;
565
566 /* ngbr mb syntax information */
567 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
568 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
569
570 /* valid intra modes map */
571 UWORD32 u4_valid_intra_modes;
572
573 for(b8 = 0; b8 < 4; b8++)
574 {
575 u4_pix_x = (b8 & 0x01) << 3;
576 u4_pix_y = (b8 >> 1) << 3;
577
578 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
579 /* when rdopt is off, we use the input as reference for constructing prediction buffer */
580 /* as opposed to using the recon pels. (open loop intra prediction) */
581 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
582 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
583 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
584
585 /* locating neighbors that are available for prediction */
586 /* TODO : update the neighbor availability information basing on constrained intra pred information */
587 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
588 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
589 s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
590 s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
591 s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
592 s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
593
594 /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
595 i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) +
596 (s_ngbr_avbl.u1_mb_a << 4);
597 /* if top partition is available and top right is not available for intra prediction, then */
598 /* padd top right samples using top sample and make top right also available */
599 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
600 ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
601
602
603 ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
604 i4_src_strd, i4_ngbr_avbl);
605
606 i4_partition_cost_least = INT_MAX;
607 /* set valid intra modes for evaluation */
608 u4_valid_intra_modes = 0x1ff;
609
610 if (!s_ngbr_avbl.u1_mb_b)
611 {
612 u4_valid_intra_modes &= ~(1 << VERT_I4x4);
613 u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
614 u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
615 }
616 if (!s_ngbr_avbl.u1_mb_a)
617 {
618 u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
619 u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
620 }
621 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
622 {
623 u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
624 u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
625 u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
626 }
627
628 /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */
629 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
630 {
631 u4_estimated_intra_8x8_mode = DC_I8x8;
632 }
633 else
634 {
635 UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
636 UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
637
638 if (u4_pix_x == 0)
639 {
640 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
641 {
642 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1];
643 }
644 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
645 {
646 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2];
647 }
648 }
649 else
650 {
651 u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1];
652 }
653
654 if (u4_pix_y == 0)
655 {
656 if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
657 {
658 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2];
659 }
660 else if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
661 {
662 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2];
663 }
664 }
665 else
666 {
667 u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2];
668 }
669
670 u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
671 }
672
673 /* perform intra mode 8x8 evaluation */
674 for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1)
675 {
676 if ( (u4_valid_intra_modes & 1) == 0)
677 continue;
678
679 /* intra prediction */
680 (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl);
681
682 /* evaluate distortion between the actual blk and the estimated blk for the given mode */
683 ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion);
684
685 i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits);
686
687 /* update the least cost information if necessary */
688 if (i4_partition_cost < i4_partition_cost_least)
689 {
690 i4_partition_cost_least = i4_partition_cost;
691 i4_partition_distortion_least = i4_partition_distortion;
692 u4_best_intra_8x8_mode = u4_intra_mode;
693 }
694 }
695 /* macroblock distortion */
696 i4_total_cost += i4_partition_cost_least;
697 i4_total_distortion += i4_partition_distortion_least;
698 /* mb partition mode */
699 ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
700
701 }
702
703 /* update the type of the mb if necessary */
704 if (i4_total_cost < ps_proc->i4_mb_cost)
705 {
706 ps_proc->i4_mb_cost = i4_total_cost;
707 ps_proc->i4_mb_distortion = i4_total_distortion;
708 ps_proc->u4_mb_type = I8x8;
709 }
710
711 return ;
712 }
713
714
715 /**
716 ******************************************************************************
717 *
718 * @brief
719 * evaluate best intra 4x4 mode (rate distortion opt off)
720 *
721 * @par Description
722 * This function evaluates all the possible intra 4x4 modes and finds the mode
723 * that best represents the macro-block (least distortion) and occupies fewer
724 * bits in the bit-stream.
725 *
726 * @param[in] ps_proc_ctxt
727 * pointer to proc ctxt
728 *
729 * @remarks
730 * Ideally the cost of encoding a macroblock is calculated as
731 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
732 * input block and the reconstructed block and rate is the number of bits taken
733 * to place the macroblock in the bit-stream. In this routine the rate does not
734 * exactly point to the total number of bits it takes, rather it points to header
735 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
736 * and residual bits fall in to texture bits the number of bits taken to encoding
737 * mbtype is considered as rate, we compute cost. Further we will approximate
738 * the distortion as the deviation b/w input and the predicted block as opposed
739 * to input and reconstructed block.
740 *
741 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
742 * 24*lambda is added to the SAD before comparison with the best SAD for
743 * inter prediction. This is an empirical value to prevent using too many intra
744 * blocks.
745 *
746 * @return none
747 *
748 ******************************************************************************
749 */
ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)750 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
751 {
752 /* Codec Context */
753 codec_t *ps_codec = ps_proc->ps_codec;
754
755 /* SAD(distortion metric) of an 4x4 block */
756 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
757
758 /* lambda */
759 UWORD32 u4_lambda = ps_proc->u4_lambda;
760
761 /* cost = distortion + lambda*rate */
762 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
763
764 /* cost due to mbtype */
765 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
766
767 /* intra mode */
768 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
769
770 /* neighbor pels for intra prediction */
771 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
772
773 /* pointer to curr partition */
774 UWORD8 *pu1_mb_curr;
775
776 /* pointer to prediction macro block */
777 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
778
779 /* strides */
780 WORD32 i4_src_strd = ps_proc->i4_src_strd;
781 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
782
783 /* neighbors left, top, top right, top left */
784 UWORD8 *pu1_mb_a;
785 UWORD8 *pu1_mb_b;
786 UWORD8 *pu1_mb_c;
787 UWORD8 *pu1_mb_d;
788
789 /* neighbor availability */
790 WORD32 i4_ngbr_avbl;
791 block_neighbors_t s_ngbr_avbl;
792
793 /* temp vars */
794 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
795
796 /* scan order inside 4x4 block */
797 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
798
799 /* ngbr sub mb modes */
800 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
801 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
802
803 /* valid intra modes map */
804 UWORD32 u4_valid_intra_modes;
805 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
806
807 i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3);
808 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
809
810 for (b8 = 0; b8 < 4; b8++)
811 {
812 u4_blk_x = (b8 & 0x01) << 3;
813 u4_blk_y = (b8 >> 1) << 3;
814 for (b4 = 0; b4 < 4; b4++)
815 {
816 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
817 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
818
819 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
820 /* when rdopt is off, we use the input as reference for constructing prediction buffer */
821 /* as opposed to using the recon pels. (open loop intra prediction) */
822 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
823 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
824 pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */
825 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
826
827 /* locating neighbors that are available for prediction */
828 /* TODO : update the neighbor availability information basing on constrained intra pred information */
829 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
830 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
831
832 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
833 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
834 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
835 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
836 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
837 /* set valid intra modes for evaluation */
838 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
839
840 /* if top partition is available and top right is not available for intra prediction, then */
841 /* padd top right samples using top sample and make top right also available */
842 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
843
844 /* gather prediction pels from the neighbors */
845 if (s_ngbr_avbl.u1_mb_a)
846 {
847 for(i = 0; i < 4; i++)
848 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd];
849 }
850 else
851 {
852 memset(pu1_ngbr_pels_i4, 0, 4);
853 }
854
855 if (s_ngbr_avbl.u1_mb_b)
856 {
857 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
858 }
859 else
860 {
861 memset(pu1_ngbr_pels_i4 + 5, 0, 4);
862 }
863
864 if (s_ngbr_avbl.u1_mb_d)
865 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
866 else
867 pu1_ngbr_pels_i4[4] = 0;
868
869 if (s_ngbr_avbl.u1_mb_c)
870 {
871 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
872 }
873 else if (s_ngbr_avbl.u1_mb_b)
874 {
875 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
876 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
877 }
878
879 i4_partition_cost_least = INT_MAX;
880
881 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
882 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
883 {
884 u4_estimated_intra_4x4_mode = DC_I4x4;
885 }
886 else
887 {
888 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
889 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
890
891 if (u4_pix_x == 0)
892 {
893 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
894 {
895 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
896 }
897 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
898 {
899 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
900 }
901 }
902 else
903 {
904 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
905 }
906
907 if (u4_pix_y == 0)
908 {
909 if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
910 {
911 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
912 }
913 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
914 {
915 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
916 }
917 }
918 else
919 {
920 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
921 }
922
923 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
924 }
925
926 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
927
928 /* mode evaluation and prediction */
929 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
930 pu1_ngbr_pels_i4,
931 pu1_pred_mb, i4_src_strd,
932 i4_pred_strd, i4_ngbr_avbl,
933 &u4_best_intra_4x4_mode,
934 &i4_partition_cost_least,
935 u4_valid_intra_modes,
936 u4_lambda,
937 u4_estimated_intra_4x4_mode);
938
939
940 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits);
941
942 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
943 /* macroblock distortion */
944 i4_total_distortion += i4_partition_distortion_least;
945 i4_total_cost += i4_partition_cost_least;
946 /* mb partition mode */
947 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
948 }
949 }
950
951 /* update the type of the mb if necessary */
952 if (i4_total_cost < ps_proc->i4_mb_cost)
953 {
954 ps_proc->i4_mb_cost = i4_total_cost;
955 ps_proc->i4_mb_distortion = i4_total_distortion;
956 ps_proc->u4_mb_type = I4x4;
957 }
958
959 return ;
960 }
961
962 /**
963 ******************************************************************************
964 *
965 * @brief evaluate best intra 4x4 mode (rate distortion opt on)
966 *
967 * @par Description
968 * This function evaluates all the possible intra 4x4 modes and finds the mode
969 * that best represents the macro-block (least distortion) and occupies fewer
970 * bits in the bit-stream.
971 *
972 * @param[in] ps_proc_ctxt
973 * pointer to proc ctxt
974 *
975 * @remarks
976 * Ideally the cost of encoding a macroblock is calculated as
977 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
978 * input block and the reconstructed block and rate is the number of bits taken
979 * to place the macroblock in the bit-stream. In this routine the rate does not
980 * exactly point to the total number of bits it takes, rather it points to header
981 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
982 * and residual bits fall in to texture bits the number of bits taken to encoding
983 * mbtype is considered as rate, we compute cost. Further we will approximate
984 * the distortion as the deviation b/w input and the predicted block as opposed
985 * to input and reconstructed block.
986 *
987 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
988 * 24*lambda is added to the SAD before comparison with the best SAD for
989 * inter prediction. This is an empirical value to prevent using too many intra
990 * blocks.
991 *
992 * @return none
993 *
994 ******************************************************************************
995 */
ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t * ps_proc)996 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc)
997 {
998 /* Codec Context */
999 codec_t *ps_codec = ps_proc->ps_codec;
1000
1001 /* SAD(distortion metric) of an 4x4 block */
1002 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
1003
1004 /* lambda */
1005 UWORD32 u4_lambda = ps_proc->u4_lambda;
1006
1007 /* cost = distortion + lambda*rate */
1008 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
1009
1010 /* cost due to mbtype */
1011 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
1012
1013 /* intra mode */
1014 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
1015
1016 /* neighbor pels for intra prediction */
1017 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1018
1019 /* pointer to curr partition */
1020 UWORD8 *pu1_mb_curr;
1021 UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
1022 UWORD8 *pu1_ref_mb_intra_4x4;
1023
1024 /* pointer to residual macro block */
1025 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1026
1027 /* pointer to prediction macro block */
1028 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1029
1030 /* strides */
1031 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1032 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1033 WORD32 i4_ref_strd_left, i4_ref_strd_top;
1034
1035 /* neighbors left, top, top right, top left */
1036 UWORD8 *pu1_mb_a;
1037 UWORD8 *pu1_mb_b;
1038 UWORD8 *pu1_mb_c;
1039 UWORD8 *pu1_mb_d;
1040
1041 /* number of non zero coeffs*/
1042 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
1043
1044 /* quantization parameters */
1045 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1046
1047 /* neighbor availability */
1048 WORD32 i4_ngbr_avbl;
1049 block_neighbors_t s_ngbr_avbl;
1050
1051 /* temp vars */
1052 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
1053
1054 /* scan order inside 4x4 block */
1055 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
1056
1057 /* ngbr sub mb modes */
1058 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
1059 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1060
1061 /* valid intra modes map */
1062 UWORD32 u4_valid_intra_modes;
1063 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
1064
1065 /* Dummy variable for 4x4 trans function */
1066 WORD16 i2_dc_dummy;
1067
1068 /* compute ngbr availability for sub blks */
1069 i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3);
1070 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
1071
1072 for(b8 = 0; b8 < 4; b8++)
1073 {
1074 u4_blk_x = (b8 & 0x01) << 3;
1075 u4_blk_y = (b8 >> 1) << 3;
1076 for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
1077 {
1078 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
1079 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
1080
1081 pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
1082 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
1083 if (u4_pix_x == 0)
1084 {
1085 i4_ref_strd_left = ps_proc->i4_rec_strd;
1086 pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left);
1087 }
1088 else
1089 {
1090 i4_ref_strd_left = i4_pred_strd;
1091 pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
1092 }
1093 if (u4_pix_y == 0)
1094 {
1095 i4_ref_strd_top = ps_proc->i4_rec_strd;
1096 pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top);
1097 }
1098 else
1099 {
1100 i4_ref_strd_top = i4_pred_strd;
1101 pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
1102 }
1103
1104 pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */
1105 pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
1106 pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */
1107 if (u4_pix_y == 0)
1108 pu1_mb_d = pu1_mb_b - 1;
1109 else
1110 pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
1111
1112 /* locating neighbors that are available for prediction */
1113 /* TODO : update the neighbor availability information basing on constrained intra pred information */
1114 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
1115 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
1116
1117 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1118 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
1119 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
1120 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
1121 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
1122 /* set valid intra modes for evaluation */
1123 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
1124
1125 /* if top partition is available and top right is not available for intra prediction, then */
1126 /* padd top right samples using top sample and make top right also available */
1127 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
1128
1129 /* gather prediction pels from the neighbors */
1130 if (s_ngbr_avbl.u1_mb_a)
1131 {
1132 for(i = 0; i < 4; i++)
1133 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left];
1134 }
1135 else
1136 {
1137 memset(pu1_ngbr_pels_i4,0,4);
1138 }
1139 if(s_ngbr_avbl.u1_mb_b)
1140 {
1141 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1142 }
1143 else
1144 {
1145 memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
1146 }
1147 if (s_ngbr_avbl.u1_mb_d)
1148 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1149 else
1150 pu1_ngbr_pels_i4[4] = 0;
1151 if (s_ngbr_avbl.u1_mb_c)
1152 {
1153 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
1154 }
1155 else if (s_ngbr_avbl.u1_mb_b)
1156 {
1157 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
1158 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
1159 }
1160
1161 i4_partition_cost_least = INT_MAX;
1162
1163 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
1164 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
1165 {
1166 u4_estimated_intra_4x4_mode = DC_I4x4;
1167 }
1168 else
1169 {
1170 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
1171 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
1172
1173 if (u4_pix_x == 0)
1174 {
1175 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
1176 {
1177 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
1178 }
1179 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
1180 {
1181 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
1182 }
1183 }
1184 else
1185 {
1186 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
1187 }
1188
1189 if (u4_pix_y == 0)
1190 {
1191 if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
1192 {
1193 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
1194 }
1195 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
1196 {
1197 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
1198 }
1199 }
1200 else
1201 {
1202 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
1203 }
1204
1205 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
1206 }
1207
1208 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
1209
1210 /*mode evaluation and prediction*/
1211 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
1212 pu1_ngbr_pels_i4,
1213 pu1_pred_mb, i4_src_strd,
1214 i4_pred_strd, i4_ngbr_avbl,
1215 &u4_best_intra_4x4_mode,
1216 &i4_partition_cost_least,
1217 u4_valid_intra_modes,
1218 u4_lambda,
1219 u4_estimated_intra_4x4_mode);
1220
1221
1222 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits);
1223
1224 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
1225
1226 /* macroblock distortion */
1227 i4_total_distortion += i4_partition_distortion_least;
1228 i4_total_cost += i4_partition_cost_least;
1229
1230 /* mb partition mode */
1231 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
1232
1233
1234 /********************************************************/
1235 /* error estimation, */
1236 /* transform */
1237 /* quantization */
1238 /********************************************************/
1239 ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb,
1240 pi2_res_mb, i4_src_strd,
1241 i4_pred_strd,
1242 /* No op stride, this implies a buff of lenght 1x16 */
1243 ps_qp_params->pu2_scale_mat,
1244 ps_qp_params->pu2_thres_mat,
1245 ps_qp_params->u1_qbits,
1246 ps_qp_params->u4_dead_zone,
1247 pu1_nnz, &i2_dc_dummy);
1248
1249 /********************************************************/
1250 /* ierror estimation, */
1251 /* itransform */
1252 /* iquantization */
1253 /********************************************************/
1254 ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb,
1255 pu1_ref_mb_intra_4x4,
1256 i4_pred_strd, i4_pred_strd,
1257 ps_qp_params->pu2_iscale_mat,
1258 ps_qp_params->pu2_weigh_mat,
1259 ps_qp_params->u1_qp_div,
1260 ps_proc->pv_scratch_buff, 0,
1261 NULL);
1262 }
1263 }
1264
1265 /* update the type of the mb if necessary */
1266 if (i4_total_cost < ps_proc->i4_mb_cost)
1267 {
1268 ps_proc->i4_mb_cost = i4_total_cost;
1269 ps_proc->i4_mb_distortion = i4_total_distortion;
1270 ps_proc->u4_mb_type = I4x4;
1271 }
1272
1273 return ;
1274 }
1275
1276 /**
1277 ******************************************************************************
1278 *
1279 * @brief
1280 * evaluate best chroma intra 8x8 mode (rate distortion opt off)
1281 *
1282 * @par Description
1283 * This function evaluates all the possible chroma intra 8x8 modes and finds
1284 * the mode that best represents the macroblock (least distortion) and occupies
1285 * fewer bits in the bitstream.
1286 *
1287 * @param[in] ps_proc_ctxt
1288 * pointer to macroblock context (handle)
1289 *
1290 * @remarks
1291 * For chroma best intra pred mode is calculated based only on SAD
1292 *
1293 * @returns none
1294 *
1295 ******************************************************************************
1296 */
1297
ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)1298 void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
1299 {
1300 /* Codec Context */
1301 codec_t *ps_codec = ps_proc->ps_codec;
1302
1303 /* SAD(distortion metric) of an 8x8 block */
1304 WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
1305
1306 /* intra mode */
1307 UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
1308
1309 /* neighbor pels for intra prediction */
1310 UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
1311
1312 /* pointer to curr macro block */
1313 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
1314 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
1315
1316 /* pointer to prediction macro block */
1317 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
1318 UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
1319
1320 /* strides */
1321 WORD32 i4_src_strd_c = ps_proc->i4_src_chroma_strd;
1322 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1323 WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd;
1324
1325 /* neighbors left, top, top left */
1326 UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
1327 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
1328 UWORD8 *pu1_mb_d = pu1_mb_b - 2;
1329
1330 /* neighbor availability */
1331 const UWORD8 u1_valid_intra_modes[8] = {1, 3, 9, 11, 5, 7, 13, 15,};
1332 WORD32 i4_ngbr_avbl;
1333
1334 /* valid intra modes map */
1335 UWORD32 u4_valid_intra_modes;
1336
1337 /* temp var */
1338 UWORD8 i;
1339
1340 /* locating neighbors that are available for prediction */
1341 /* TODO : update the neighbor availability information basing on constrained intra pred information */
1342 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines
1343 * basing on neighbors available and hence evade the computation of neighbor availability totally. */
1344 /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
1345 i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1);
1346 ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
1347
1348 /* gather prediction pels from the neighbors */
1349 /* left pels */
1350 if (ps_proc->ps_ngbr_avbl->u1_mb_a)
1351 {
1352 for (i = 0; i < 16; i += 2)
1353 {
1354 pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
1355 pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
1356 }
1357 }
1358 else
1359 {
1360 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
1361 }
1362
1363 /* top pels */
1364 if (ps_proc->ps_ngbr_avbl->u1_mb_b)
1365 {
1366 ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
1367 }
1368 else
1369 {
1370 ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
1371 }
1372
1373 /* top left pels */
1374 if (ps_proc->ps_ngbr_avbl->u1_mb_d)
1375 {
1376 pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
1377 pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
1378 }
1379
1380 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
1381
1382 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
1383 u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
1384
1385 i4_chroma_mb_distortion = INT_MAX;
1386
1387 /* perform intra mode chroma 8x8 evaluation */
1388 /* intra prediction */
1389 ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb,
1390 pu1_ngbr_pels_c_i8x8,
1391 pu1_pred_mb,
1392 i4_src_strd_c,
1393 i4_pred_strd,
1394 i4_ngbr_avbl,
1395 &u4_best_chroma_intra_8x8_mode,
1396 &i4_chroma_mb_distortion,
1397 u4_valid_intra_modes);
1398
1399 if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/
1400 {
1401 (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl);
1402
1403 /* evaluate distortion(sad) */
1404 ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion);
1405
1406 /* update the least distortion information if necessary */
1407 if(i4_mb_distortion < i4_chroma_mb_distortion)
1408 {
1409 i4_chroma_mb_distortion = i4_mb_distortion;
1410 u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
1411 }
1412 }
1413
1414 DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode);
1415
1416 ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
1417
1418 return ;
1419 }
1420
1421
1422 /**
1423 ******************************************************************************
1424 *
1425 * @brief
1426 * Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
1427 * prediction.
1428 *
1429 * @par Description
1430 * This function evaluates first three 16x16 modes and compute corresponding sad
1431 * and return the buffer predicted with best mode.
1432 *
1433 * @param[in] pu1_src
1434 * UWORD8 pointer to the source
1435 *
1436 * @param[in] pu1_ngbr_pels_i16
1437 * UWORD8 pointer to neighbouring pels
1438 *
1439 * @param[out] pu1_dst
1440 * UWORD8 pointer to the destination
1441 *
1442 * @param[in] src_strd
1443 * integer source stride
1444 *
1445 * @param[in] dst_strd
1446 * integer destination stride
1447 *
1448 * @param[in] u4_n_avblty
1449 * availability of neighbouring pixels
1450 *
1451 * @param[in] u4_intra_mode
1452 * Pointer to the variable in which best mode is returned
1453 *
1454 * @param[in] pu4_sadmin
1455 * Pointer to the variable in which minimum sad is returned
1456 *
1457 * @param[in] u4_valid_intra_modes
1458 * Says what all modes are valid
1459 *
1460 * @returns none
1461 *
1462 ******************************************************************************
1463 */
ih264e_evaluate_intra16x16_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels_i16,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)1464 void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
1465 UWORD8 *pu1_ngbr_pels_i16,
1466 UWORD8 *pu1_dst,
1467 UWORD32 src_strd,
1468 UWORD32 dst_strd,
1469 WORD32 u4_n_avblty,
1470 UWORD32 *u4_intra_mode,
1471 WORD32 *pu4_sadmin,
1472 UWORD32 u4_valid_intra_modes)
1473 {
1474 UWORD8 *pu1_neighbour;
1475 UWORD8 *pu1_src_temp = pu1_src;
1476 UWORD8 left = 0, top = 0;
1477 WORD32 u4_dcval = 0;
1478 WORD32 i, j;
1479 WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
1480 i4_min_sad = INT_MAX;
1481 UWORD8 val;
1482
1483 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1484 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1485
1486 /* left available */
1487 if (left)
1488 {
1489 i4_sad_horz = 0;
1490
1491 for (i = 0; i < 16; i++)
1492 {
1493 val = pu1_ngbr_pels_i16[15 - i];
1494
1495 u4_dcval += val;
1496
1497 for (j = 0; j < 16; j++)
1498 {
1499 i4_sad_horz += ABS(val - pu1_src_temp[j]);
1500 }
1501
1502 pu1_src_temp += src_strd;
1503 }
1504 u4_dcval += 8;
1505 }
1506
1507 pu1_src_temp = pu1_src;
1508 /* top available */
1509 if (top)
1510 {
1511 i4_sad_vert = 0;
1512
1513 for (i = 0; i < 16; i++)
1514 {
1515 u4_dcval += pu1_ngbr_pels_i16[17 + i];
1516
1517 for (j = 0; j < 16; j++)
1518 {
1519 i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
1520 }
1521 pu1_src_temp += src_strd;
1522
1523 }
1524 u4_dcval += 8;
1525 }
1526
1527 u4_dcval = (u4_dcval) >> (3 + left + top);
1528
1529 pu1_src_temp = pu1_src;
1530
1531 /* none available */
1532 u4_dcval += (left == 0) * (top == 0) * 128;
1533
1534 i4_sad_dc = 0;
1535
1536 for (i = 0; i < 16; i++)
1537 {
1538 for (j = 0; j < 16; j++)
1539 {
1540 i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
1541 }
1542 pu1_src_temp += src_strd;
1543 }
1544
1545 if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */
1546 i4_sad_dc = INT_MAX;
1547
1548 if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */
1549 i4_sad_vert = INT_MAX;
1550
1551 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */
1552 i4_sad_horz = INT_MAX;
1553
1554 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
1555
1556 /* Finding Minimum sad and doing corresponding prediction */
1557 if (i4_min_sad < *pu4_sadmin)
1558 {
1559 *pu4_sadmin = i4_min_sad;
1560 if (i4_min_sad == i4_sad_vert)
1561 {
1562 *u4_intra_mode = VERT_I16x16;
1563 pu1_neighbour = pu1_ngbr_pels_i16 + 17;
1564 for (j = 0; j < 16; j++)
1565 {
1566 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
1567 pu1_dst += dst_strd;
1568 }
1569 }
1570 else if (i4_min_sad == i4_sad_horz)
1571 {
1572 *u4_intra_mode = HORZ_I16x16;
1573 for (j = 0; j < 16; j++)
1574 {
1575 val = pu1_ngbr_pels_i16[15 - j];
1576 memset(pu1_dst, val, MB_SIZE);
1577 pu1_dst += dst_strd;
1578 }
1579 }
1580 else
1581 {
1582 *u4_intra_mode = DC_I16x16;
1583 for (j = 0; j < 16; j++)
1584 {
1585 memset(pu1_dst, u4_dcval, MB_SIZE);
1586 pu1_dst += dst_strd;
1587 }
1588 }
1589 }
1590 return;
1591 }
1592
1593 /**
1594 ******************************************************************************
1595 *
1596 * @brief
1597 * Evaluate best intra 4x4 mode and perform prediction.
1598 *
1599 * @par Description
1600 * This function evaluates 4x4 modes and compute corresponding sad
1601 * and return the buffer predicted with best mode.
1602 *
1603 * @param[in] pu1_src
1604 * UWORD8 pointer to the source
1605 *
1606 * @param[in] pu1_ngbr_pels
1607 * UWORD8 pointer to neighbouring pels
1608 *
1609 * @param[out] pu1_dst
1610 * UWORD8 pointer to the destination
1611 *
1612 * @param[in] src_strd
1613 * integer source stride
1614 *
1615 * @param[in] dst_strd
1616 * integer destination stride
1617 *
1618 * @param[in] u4_n_avblty
1619 * availability of neighbouring pixels
1620 *
1621 * @param[in] u4_intra_mode
1622 * Pointer to the variable in which best mode is returned
1623 *
1624 * @param[in] pu4_sadmin
1625 * Pointer to the variable in which minimum cost is returned
1626 *
1627 * @param[in] u4_valid_intra_modes
1628 * Says what all modes are valid
1629 *
1630 * @param[in] u4_lambda
1631 * Lamda value for computing cost from SAD
1632 *
1633 * @param[in] u4_predictd_mode
1634 * Predicted mode for cost computation
1635 *
1636 * @returns none
1637 *
1638 ******************************************************************************
1639 */
ih264e_evaluate_intra_4x4_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes,UWORD32 u4_lambda,UWORD32 u4_predictd_mode)1640 void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src,
1641 UWORD8 *pu1_ngbr_pels,
1642 UWORD8 *pu1_dst,
1643 UWORD32 src_strd,
1644 UWORD32 dst_strd,
1645 WORD32 u4_n_avblty,
1646 UWORD32 *u4_intra_mode,
1647 WORD32 *pu4_sadmin,
1648 UWORD32 u4_valid_intra_modes,
1649 UWORD32 u4_lambda,
1650 UWORD32 u4_predictd_mode)
1651 {
1652 UWORD8 *pu1_src_temp = pu1_src;
1653 UWORD8 *pu1_pred = pu1_ngbr_pels;
1654 UWORD8 left = 0, top = 0;
1655 UWORD8 u1_pred_val = 0;
1656 UWORD8 u1_pred_vals[4] = {0};
1657 UWORD8 *pu1_pred_val = NULL;
1658 /* To store FILT121 operated values*/
1659 UWORD8 u1_pred_vals_diag_121[15] = {0};
1660 /* To store FILT11 operated values*/
1661 UWORD8 u1_pred_vals_diag_11[15] = {0};
1662 UWORD8 u1_pred_vals_vert_r[8] = {0};
1663 UWORD8 u1_pred_vals_horz_d[10] = {0};
1664 UWORD8 u1_pred_vals_horz_u[10] = {0};
1665 WORD32 u4_dcval = 0;
1666 WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1667 INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1668
1669 WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1670 INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1671 WORD32 i, i4_min_cost = INT_MAX;
1672
1673 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1674 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1675
1676 /* Computing SAD */
1677
1678 /* VERT mode valid */
1679 if (u4_valid_intra_modes & 1)
1680 {
1681 pu1_pred = pu1_ngbr_pels + 5;
1682 i4_sad[VERT_I4x4] = 0;
1683 i4_cost[VERT_I4x4] = 0;
1684
1685 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1686 pu1_src_temp += src_strd;
1687 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1688 pu1_src_temp += src_strd;
1689 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1690 pu1_src_temp += src_strd;
1691 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1692
1693 i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ?
1694 u4_lambda : 4 * u4_lambda);
1695 }
1696
1697 /* HORZ mode valid */
1698 if (u4_valid_intra_modes & 2)
1699 {
1700 i4_sad[HORZ_I4x4] = 0;
1701 i4_cost[HORZ_I4x4] =0;
1702 pu1_src_temp = pu1_src;
1703
1704 u1_pred_val = pu1_ngbr_pels[3];
1705
1706 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1707 + ABS(pu1_src_temp[1] - u1_pred_val)
1708 + ABS(pu1_src_temp[2] - u1_pred_val)
1709 + ABS(pu1_src_temp[3] - u1_pred_val);
1710 pu1_src_temp += src_strd;
1711
1712 u1_pred_val = pu1_ngbr_pels[2];
1713
1714 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1715 + ABS(pu1_src_temp[1] - u1_pred_val)
1716 + ABS(pu1_src_temp[2] - u1_pred_val)
1717 + ABS(pu1_src_temp[3] - u1_pred_val);
1718 pu1_src_temp += src_strd;
1719
1720 u1_pred_val = pu1_ngbr_pels[1];
1721
1722 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1723 + ABS(pu1_src_temp[1] - u1_pred_val)
1724 + ABS(pu1_src_temp[2] - u1_pred_val)
1725 + ABS(pu1_src_temp[3] - u1_pred_val);
1726 pu1_src_temp += src_strd;
1727
1728 u1_pred_val = pu1_ngbr_pels[0];
1729
1730 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1731 + ABS(pu1_src_temp[1] - u1_pred_val)
1732 + ABS(pu1_src_temp[2] - u1_pred_val)
1733 + ABS(pu1_src_temp[3] - u1_pred_val);
1734
1735 i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ?
1736 u4_lambda : 4 * u4_lambda);
1737 }
1738
1739 /* DC mode valid */
1740 if (u4_valid_intra_modes & 4)
1741 {
1742 i4_sad[DC_I4x4] = 0;
1743 i4_cost[DC_I4x4] = 0;
1744 pu1_src_temp = pu1_src;
1745
1746 if (left)
1747 u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2]
1748 + pu1_ngbr_pels[3] + 2;
1749 if (top)
1750 u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7]
1751 + pu1_ngbr_pels[8] + 2;
1752
1753 u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
1754
1755 /* none available */
1756 memset(u1_pred_vals, u4_dcval, 4);
1757 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1758 pu1_src_temp += src_strd;
1759 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1760 pu1_src_temp += src_strd;
1761 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1762 pu1_src_temp += src_strd;
1763 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1764 pu1_src_temp += src_strd;
1765
1766 i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ?
1767 u4_lambda : 4 * u4_lambda);
1768 }
1769
1770 /* if modes other than VERT, HORZ and DC are valid */
1771 if (u4_valid_intra_modes > 7)
1772 {
1773 pu1_pred = pu1_ngbr_pels;
1774 pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
1775
1776 /* Performing FILT121 and FILT11 operation for all neighbour values*/
1777 for (i = 0; i < 13; i++)
1778 {
1779 u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
1780 u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
1781
1782 pu1_pred++;
1783 }
1784
1785 if (u4_valid_intra_modes & 8)/* DIAG_DL */
1786 {
1787 i4_sad[DIAG_DL_I4x4] = 0;
1788 i4_cost[DIAG_DL_I4x4] = 0;
1789 pu1_src_temp = pu1_src;
1790 pu1_pred_val = u1_pred_vals_diag_121 + 5;
1791
1792 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
1793 pu1_src_temp += src_strd;
1794 USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
1795 pu1_src_temp += src_strd;
1796 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
1797 pu1_src_temp += src_strd;
1798 USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
1799 pu1_src_temp += src_strd;
1800 i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ?
1801 u4_lambda : 4 * u4_lambda);
1802 }
1803
1804 if (u4_valid_intra_modes & 16)/* DIAG_DR */
1805 {
1806 i4_sad[DIAG_DR_I4x4] = 0;
1807 i4_cost[DIAG_DR_I4x4] = 0;
1808 pu1_src_temp = pu1_src;
1809 pu1_pred_val = u1_pred_vals_diag_121 + 3;
1810
1811 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
1812 pu1_src_temp += src_strd;
1813 USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
1814 pu1_src_temp += src_strd;
1815 USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
1816 pu1_src_temp += src_strd;
1817 USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
1818 pu1_src_temp += src_strd;
1819 i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ?
1820 u4_lambda : 4 * u4_lambda);
1821
1822 }
1823
1824 if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/
1825 {
1826 i4_sad[VERT_R_I4x4] = 0;
1827
1828 pu1_src_temp = pu1_src;
1829 u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
1830 memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
1831 u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
1832 memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
1833
1834 pu1_pred_val = u1_pred_vals_diag_11 + 4;
1835 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1836 pu1_pred_val = u1_pred_vals_diag_121 + 3;
1837 pu1_src_temp += src_strd;
1838 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1839 pu1_src_temp += src_strd;
1840 USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
1841 pu1_src_temp += src_strd;
1842 USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4),
1843 i4_sad[VERT_R_I4x4]);
1844
1845 i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ?
1846 u4_lambda : 4 * u4_lambda);
1847 }
1848
1849 if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/
1850 {
1851 i4_sad[HORZ_D_I4x4] = 0;
1852
1853 pu1_src_temp = pu1_src;
1854 u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
1855 memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
1856 u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
1857 u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
1858 u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
1859 u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
1860 u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
1861 u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
1862
1863 pu1_pred_val = u1_pred_vals_horz_d;
1864 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
1865 pu1_src_temp += src_strd;
1866 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
1867 pu1_src_temp += src_strd;
1868 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
1869 pu1_src_temp += src_strd;
1870 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
1871
1872 i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ?
1873 u4_lambda : 4 * u4_lambda);
1874 }
1875
1876 if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/
1877 {
1878 i4_sad[VERT_L_I4x4] = 0;
1879 pu1_src_temp = pu1_src;
1880 pu1_pred_val = u1_pred_vals_diag_11 + 5;
1881 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1882 pu1_src_temp += src_strd;
1883 pu1_pred_val = u1_pred_vals_diag_121 + 5;
1884 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1885 pu1_src_temp += src_strd;
1886 pu1_pred_val = u1_pred_vals_diag_11 + 6;
1887 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1888 pu1_src_temp += src_strd;
1889 pu1_pred_val = u1_pred_vals_diag_121 + 6;
1890 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1891
1892 i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ?
1893 u4_lambda : 4 * u4_lambda);
1894 }
1895
1896 if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/
1897 {
1898 i4_sad[HORZ_U_I4x4] = 0;
1899 pu1_src_temp = pu1_src;
1900 u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
1901 u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
1902 u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
1903 u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
1904 u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
1905 u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
1906
1907 memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
1908
1909 pu1_pred_val = u1_pred_vals_horz_u;
1910 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
1911 pu1_src_temp += src_strd;
1912 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
1913 pu1_src_temp += src_strd;
1914 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
1915 pu1_src_temp += src_strd;
1916 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
1917
1918 i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ?
1919 u4_lambda : 4 * u4_lambda);
1920 }
1921
1922 i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]),
1923 MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
1924 MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
1925
1926 }
1927 else
1928 {
1929 /* Only first three modes valid */
1930 i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
1931 }
1932
1933 *pu4_sadmin = i4_min_cost;
1934
1935 if (i4_min_cost == i4_cost[0])
1936 {
1937 *u4_intra_mode = VERT_I4x4;
1938 pu1_pred_val = pu1_ngbr_pels + 5;
1939 memcpy(pu1_dst, (pu1_pred_val), 4);
1940 pu1_dst += dst_strd;
1941 memcpy(pu1_dst, (pu1_pred_val), 4);
1942 pu1_dst += dst_strd;
1943 memcpy(pu1_dst, (pu1_pred_val), 4);
1944 pu1_dst += dst_strd;
1945 memcpy(pu1_dst, (pu1_pred_val), 4);
1946 }
1947 else if (i4_min_cost == i4_cost[1])
1948 {
1949 *u4_intra_mode = HORZ_I4x4;
1950 memset(pu1_dst, pu1_ngbr_pels[3], 4);
1951 pu1_dst += dst_strd;
1952 memset(pu1_dst, pu1_ngbr_pels[2], 4);
1953 pu1_dst += dst_strd;
1954 memset(pu1_dst, pu1_ngbr_pels[1], 4);
1955 pu1_dst += dst_strd;
1956 memset(pu1_dst, pu1_ngbr_pels[0], 4);
1957 }
1958 else if (i4_min_cost == i4_cost[2])
1959 {
1960 *u4_intra_mode = DC_I4x4;
1961 memset(pu1_dst, u4_dcval, 4);
1962 pu1_dst += dst_strd;
1963 memset(pu1_dst, u4_dcval, 4);
1964 pu1_dst += dst_strd;
1965 memset(pu1_dst, u4_dcval, 4);
1966 pu1_dst += dst_strd;
1967 memset(pu1_dst, u4_dcval, 4);
1968 }
1969
1970 else if (i4_min_cost == i4_cost[3])
1971 {
1972 *u4_intra_mode = DIAG_DL_I4x4;
1973 pu1_pred_val = u1_pred_vals_diag_121 + 5;
1974 memcpy(pu1_dst, (pu1_pred_val), 4);
1975 pu1_dst += dst_strd;
1976 memcpy(pu1_dst, (pu1_pred_val + 1), 4);
1977 pu1_dst += dst_strd;
1978 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
1979 pu1_dst += dst_strd;
1980 memcpy(pu1_dst, (pu1_pred_val + 3), 4);
1981 }
1982 else if (i4_min_cost == i4_cost[4])
1983 {
1984 *u4_intra_mode = DIAG_DR_I4x4;
1985 pu1_pred_val = u1_pred_vals_diag_121 + 3;
1986
1987 memcpy(pu1_dst, (pu1_pred_val), 4);
1988 pu1_dst += dst_strd;
1989 memcpy(pu1_dst, (pu1_pred_val - 1), 4);
1990 pu1_dst += dst_strd;
1991 memcpy(pu1_dst, (pu1_pred_val - 2), 4);
1992 pu1_dst += dst_strd;
1993 memcpy(pu1_dst, (pu1_pred_val - 3), 4);
1994 }
1995
1996 else if (i4_min_cost == i4_cost[5])
1997 {
1998 *u4_intra_mode = VERT_R_I4x4;
1999 pu1_pred_val = u1_pred_vals_diag_11 + 4;
2000 memcpy(pu1_dst, (pu1_pred_val), 4);
2001 pu1_dst += dst_strd;
2002 pu1_pred_val = u1_pred_vals_diag_121 + 3;
2003 memcpy(pu1_dst, (pu1_pred_val), 4);
2004 pu1_dst += dst_strd;
2005 memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
2006 pu1_dst += dst_strd;
2007 memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
2008 }
2009 else if (i4_min_cost == i4_cost[6])
2010 {
2011 *u4_intra_mode = HORZ_D_I4x4;
2012 pu1_pred_val = u1_pred_vals_horz_d;
2013 memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2014 pu1_dst += dst_strd;
2015 memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2016 pu1_dst += dst_strd;
2017 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2018 pu1_dst += dst_strd;
2019 memcpy(pu1_dst, (pu1_pred_val), 4);
2020 pu1_dst += dst_strd;
2021 }
2022 else if (i4_min_cost == i4_cost[7])
2023 {
2024 *u4_intra_mode = VERT_L_I4x4;
2025 pu1_pred_val = u1_pred_vals_diag_11 + 5;
2026 memcpy(pu1_dst, (pu1_pred_val), 4);
2027 pu1_dst += dst_strd;
2028 pu1_pred_val = u1_pred_vals_diag_121 + 5;
2029 memcpy(pu1_dst, (pu1_pred_val), 4);
2030 pu1_dst += dst_strd;
2031 pu1_pred_val = u1_pred_vals_diag_11 + 6;
2032 memcpy(pu1_dst, (pu1_pred_val), 4);
2033 pu1_dst += dst_strd;
2034 pu1_pred_val = u1_pred_vals_diag_121 + 6;
2035 memcpy(pu1_dst, (pu1_pred_val), 4);
2036 }
2037 else if (i4_min_cost == i4_cost[8])
2038 {
2039 *u4_intra_mode = HORZ_U_I4x4;
2040 pu1_pred_val = u1_pred_vals_horz_u;
2041 memcpy(pu1_dst, (pu1_pred_val), 4);
2042 pu1_dst += dst_strd;
2043 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2044 pu1_dst += dst_strd;
2045 memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2046 pu1_dst += dst_strd;
2047 memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2048 pu1_dst += dst_strd;
2049 }
2050
2051 return;
2052 }
2053
2054 /**
2055 ******************************************************************************
2056 *
2057 * @brief:
2058 * Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction.
2059 *
2060 * @par Description
2061 * This function evaluates first three intra chroma modes and compute corresponding sad
2062 * and return the buffer predicted with best mode.
2063 *
2064 * @param[in] pu1_src
2065 * UWORD8 pointer to the source
2066 *
2067 * @param[in] pu1_ngbr_pels
2068 * UWORD8 pointer to neighbouring pels
2069 *
2070 * @param[out] pu1_dst
2071 * UWORD8 pointer to the destination
2072 *
2073 * @param[in] src_strd
2074 * integer source stride
2075 *
2076 * @param[in] dst_strd
2077 * integer destination stride
2078 *
2079 * @param[in] u4_n_avblty
2080 * availability of neighbouring pixels
2081 *
2082 * @param[in] u4_intra_mode
2083 * Pointer to the variable in which best mode is returned
2084 *
2085 * @param[in] pu4_sadmin
2086 * Pointer to the variable in which minimum sad is returned
2087 *
2088 * @param[in] u4_valid_intra_modes
2089 * Says what all modes are valid
2090 *
2091 * @return none
2092 *
2093 ******************************************************************************
2094 */
ih264e_evaluate_intra_chroma_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)2095 void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
2096 UWORD8 *pu1_ngbr_pels,
2097 UWORD8 *pu1_dst,
2098 UWORD32 src_strd,
2099 UWORD32 dst_strd,
2100 WORD32 u4_n_avblty,
2101 UWORD32 *u4_intra_mode,
2102 WORD32 *pu4_sadmin,
2103 UWORD32 u4_valid_intra_modes)
2104 {
2105 UWORD8 *pu1_neighbour;
2106 UWORD8 *pu1_src_temp = pu1_src;
2107 UWORD8 left = 0, top = 0;
2108 WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */
2109 u4_dcval_u_t[2] = { 0, 0 }; /*sum top neighbours for 'U'*/
2110
2111 WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/
2112 u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/
2113
2114 WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX,
2115 i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
2116 UWORD8 val_u, val_v;
2117
2118 WORD32 u4_dc_val[2][2][2];/* -----------
2119 | | | Chroma can have four
2120 | 00 | 01 | separate dc value...
2121 ----------- u4_dc_val corresponds to this dc values
2122 | | | with u4_dc_val[2][2][U] and u4_dc_val[2][2][V]
2123 | 10 | 11 |
2124 ----------- */
2125 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
2126 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
2127
2128 /*Evaluating HORZ*/
2129 if (left)/* Ifleft available*/
2130 {
2131 i4_sad_horz = 0;
2132
2133 for (i = 0; i < 8; i++)
2134 {
2135 val_v = pu1_ngbr_pels[15 - 2 * i];
2136 val_u = pu1_ngbr_pels[15 - 2 * i - 1];
2137 row = i / 4;
2138 u4_dcval_u_l[row] += val_u;
2139 u4_dcval_v_l[row] += val_v;
2140 for (j = 0; j < 8; j++)
2141 {
2142 i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/
2143 i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
2144 }
2145
2146 pu1_src_temp += src_strd;
2147 }
2148 u4_dcval_u_l[0] += 2;
2149 u4_dcval_u_l[1] += 2;
2150 u4_dcval_v_l[0] += 2;
2151 u4_dcval_v_l[1] += 2;
2152 }
2153
2154 /*Evaluating VERT**/
2155 pu1_src_temp = pu1_src;
2156 if (top) /* top available*/
2157 {
2158 i4_sad_vert = 0;
2159
2160 for (i = 0; i < 8; i++)
2161 {
2162 col = i / 4;
2163
2164 val_u = pu1_ngbr_pels[18 + i * 2];
2165 val_v = pu1_ngbr_pels[18 + i * 2 + 1];
2166 u4_dcval_u_t[col] += val_u;
2167 u4_dcval_v_t[col] += val_v;
2168
2169 for (j = 0; j < 16; j++)
2170 {
2171 i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/
2172 }
2173 pu1_src_temp += src_strd;
2174
2175 }
2176 u4_dcval_u_t[0] += 2;
2177 u4_dcval_u_t[1] += 2;
2178 u4_dcval_v_t[0] += 2;
2179 u4_dcval_v_t[1] += 2;
2180 }
2181
2182 /* computing DC value*/
2183 /* Equation 8-128 in spec*/
2184 u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
2185 u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
2186 u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
2187 u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
2188
2189 if (top)
2190 {
2191 /* Equation 8-132 in spec*/
2192 u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
2193 u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
2194 }
2195 else
2196 {
2197 u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
2198 u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
2199 }
2200
2201 if (left)
2202 {
2203 u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
2204 u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
2205 }
2206 else
2207 {
2208 u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
2209 u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
2210 }
2211
2212 if (!(left || top))
2213 {
2214 /*none available*/
2215 u4_dc_val[0][0][0] = u4_dc_val[0][0][1] =
2216 u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
2217 u4_dc_val[1][0][0] = u4_dc_val[1][0][1] =
2218 u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
2219 }
2220
2221 /* Evaluating DC */
2222 pu1_src_temp = pu1_src;
2223 i4_sad_dc = 0;
2224 for (i = 0; i < 8; i++)
2225 {
2226 for (j = 0; j < 8; j++)
2227 {
2228 col = j / 4;
2229 row = i / 4;
2230 val_u = u4_dc_val[row][col][0];
2231 val_v = u4_dc_val[row][col][1];
2232
2233 i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/
2234 i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
2235 }
2236 pu1_src_temp += src_strd;
2237 }
2238
2239 if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/
2240 i4_sad_dc = INT_MAX;
2241 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/
2242 i4_sad_horz = INT_MAX;
2243 if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/
2244 i4_sad_vert = INT_MAX;
2245
2246 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
2247
2248 /* Finding Minimum sad and doing corresponding prediction*/
2249 if (i4_min_sad < *pu4_sadmin)
2250 {
2251 *pu4_sadmin = i4_min_sad;
2252
2253 if (i4_min_sad == i4_sad_dc)
2254 {
2255 *u4_intra_mode = DC_CH_I8x8;
2256 for (i = 0; i < 8; i++)
2257 {
2258 for (j = 0; j < 8; j++)
2259 {
2260 col = j / 4;
2261 row = i / 4;
2262
2263 pu1_dst[2 * j] = u4_dc_val[row][col][0];
2264 pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
2265 }
2266 pu1_dst += dst_strd;
2267 }
2268 }
2269 else if (i4_min_sad == i4_sad_horz)
2270 {
2271 *u4_intra_mode = HORZ_CH_I8x8;
2272 for (j = 0; j < 8; j++)
2273 {
2274 val_v = pu1_ngbr_pels[15 - 2 * j];
2275 val_u = pu1_ngbr_pels[15 - 2 * j - 1];
2276
2277 for (i = 0; i < 8; i++)
2278 {
2279 pu1_dst[2 * i] = val_u;
2280 pu1_dst[2 * i + 1] = val_v;
2281
2282 }
2283 pu1_dst += dst_strd;
2284 }
2285 }
2286 else
2287 {
2288 *u4_intra_mode = VERT_CH_I8x8;
2289 pu1_neighbour = pu1_ngbr_pels + 18;
2290 for (j = 0; j < 8; j++)
2291 {
2292 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
2293 pu1_dst += dst_strd;
2294 }
2295 }
2296 }
2297
2298 return;
2299 }
2300