1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 *  ihevcd_intra_ref_substitution.c
22 *
23 * @brief
24 *  Contains ref substitution functions
25 *
26 * @author
27 *  Naveen
28 *
29 * @par List of Functions:
30 * @remarks
31 *  None
32 *
33 *******************************************************************************
34 */
35 /*****************************************************************************/
36 /* File Includes                                                             */
37 /*****************************************************************************/
38 #include <stdio.h>
39 #include <stddef.h>
40 #include <stdlib.h>
41 #include <string.h>
42 
43 #include "ihevc_typedefs.h"
44 #include "ihevc_platform_macros.h"
45 #include "ihevc_intra_pred.h"
46 #include "ihevc_mem_fns.h"
47 #include "ihevc_chroma_intra_pred.h"
48 #include "ihevc_common_tables.h"
49 #include "ihevc_defs.h"
50 #include "ihevc_mem_fns.h"
51 #include "ihevc_macros.h"
52 
53 #define MAX_CU_SIZE 64
54 #define BIT_DEPTH 8
55 #define T32_4NT 128
56 #define T16_4NT 64
57 #define T16C_4NT 64
58 #define T8C_4NT 32
59 /****************************************************************************/
60 /* Function Macros                                                          */
61 /****************************************************************************/
62 
63 #define GET_BIT(y,x) ((y) & (1 << x)) && (1 << x)
64 #define GET_BITS(y,x) ((y) & (1 << x)) && (1 << x)
65 /**
66 *******************************************************************************
67 *
68 * @brief
69 *  Reference substitution process for samples unavailable  for prediction
70 * Refer to section 8.4.4.2.2
71 *
72 * @par Description:
73 *
74 *
75 * @param[in] pu1_top_left
76 *  UWORD8 pointer to the top-left
77 *
78 * @param[in] pu1_top
79 *  UWORD8 pointer to the top
80 *
81 * @param[in] pu1_left
82 *  UWORD8 pointer to the left
83 *
84 * @param[in] src_strd
85 *  WORD32 Source stride
86 *
87 * @param[in] nbr_flags
88 *  WORD32 neighbor availability flags
89 *
90 * @param[in] nt
91 *  WORD32 transform Block size
92 *
93 * @param[in] dst_strd
94 *  WORD32 Destination stride
95 *
96 * @returns
97 *
98 * @remarks
99 *  None
100 *
101 *******************************************************************************
102 */
103 
ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 * pu1_top_left,UWORD8 * pu1_top,UWORD8 * pu1_left,WORD32 src_strd,WORD32 nt,WORD32 nbr_flags,UWORD8 * pu1_dst,WORD32 dst_strd)104 void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
105                                                   UWORD8 *pu1_top,
106                                                   UWORD8 *pu1_left,
107                                                   WORD32 src_strd,
108                                                   WORD32 nt,
109                                                   WORD32 nbr_flags,
110                                                   UWORD8 *pu1_dst,
111                                                   WORD32 dst_strd)
112 {
113     UWORD8 pu1_ref_u, pu1_ref_v;
114     WORD32 dc_val, i, j;
115     WORD32 total_samples = (4 * nt) + 1;
116     WORD32 get_bits;
117     WORD32 next;
118     WORD32 bot_left, left, top, tp_right, tp_left;
119     WORD32 idx, nbr_id_from_bl, frwd_nbr_flag;
120     WORD32 a_nbr_flag[5];
121     UNUSED(dst_strd);
122     /* Neighbor Flag Structure*/
123     /* WORD32 nbr_flags MSB-->LSB   TOP LEFT | TOP-RIGHT |  TOP   | LEFT    | BOTTOM LEFT*/
124     /*                              (1 bit)     (4 bits)  (4 bits) (4 bits)  (4 bits)  */
125 
126     if(nbr_flags == 0)
127     {
128 /* If no neighbor flags are present, fill the neighbor samples with DC value */
129         /*dc_val = 1 << (BIT_DEPTH - 1);*/
130         dc_val = 1 << (8 - 1);
131         for(i = 0; i < (2 * total_samples); i++)
132         {
133             pu1_dst[i] = dc_val;
134         }
135     }
136     else
137     {
138         /* Else fill the corresponding samples */
139 
140         /* Check for the neighbors availibility */
141         tp_left     = (nbr_flags & 0x10000);
142         tp_right    = (nbr_flags & 0x0f000);
143         top         = (nbr_flags & 0x00f00);
144         left        = (nbr_flags & 0x000f0);
145         bot_left    = (nbr_flags & 0x0000f);
146 
147         /* Fill nbrs depending on avalibility */
148         /* Top -Left nbrs  */
149         if(0 != tp_left)
150         {
151             pu1_dst[(4 * nt)] = *pu1_top_left; // U top-left sample
152             pu1_dst[(4 * nt) + 1] = *(pu1_top_left + 1); // V top-left sample
153         }
154         /* Left nbrs  */
155         if(0 != left)
156         {
157             for(i = 0, j = 0; i < (2 * nt); i += 2)
158             {
159                 pu1_dst[(4 * nt) - 2 - i] = pu1_left[j * src_strd]; // U left samples
160                 pu1_dst[(4 * nt) - 1 - i] = pu1_left[(j * src_strd) + 1]; // V left samples
161                 j++;
162             }
163         }
164         /* Bottom - Left nbrs  */
165         if(0 != bot_left)
166         {
167             for(i = (2 * nt), j = nt; i < (4 * nt); i += 2)
168             {
169                 pu1_dst[(4 * nt) - 2 - i] = pu1_left[j * src_strd]; // U left samples
170                 pu1_dst[(4 * nt) - 1 - i] = pu1_left[(j * src_strd) + 1]; // V left samples
171                 j++;
172             }
173         }
174         /* Top nbrs  */
175         if(0 != top)
176         {
177             ihevc_memcpy_mul_8_a9q(&pu1_dst[(4 * nt) + 2], pu1_top, 2 * nt);
178             // U-V interleaved Top-top right samples
179         }
180 
181         /* Top - Right nbrs  */
182         if(0 != tp_right)
183         {
184             ihevc_memcpy_mul_8_a9q(&pu1_dst[(4 * nt) + 2 + 2 * nt], pu1_top + 2 * nt, 2 * nt);
185             // U-V interleaved Top-top right samples
186         }
187 
188         if(nt == 4)
189         {
190             /* 1 bit extraction for all the neighboring blocks */
191             tp_left = (nbr_flags & 0x10000) >> 16;
192             bot_left = (nbr_flags & 0x8) >> 3;
193             left = (nbr_flags & 0x80) >> 7;
194             top = (nbr_flags & 0x100) >> 8;
195             tp_right = (nbr_flags & 0x1000) >> 12;
196 
197             next = 1;
198             a_nbr_flag[0] = bot_left;
199             a_nbr_flag[1] = left;
200             a_nbr_flag[2] = tp_left;
201             a_nbr_flag[3] = top;
202             a_nbr_flag[4] = tp_right;
203 
204             /* If bottom -left is not available, reverse substitution process*/
205             if(bot_left == 0)
206             {
207                 /* Check for the 1st available sample from bottom-left*/
208                 while(!a_nbr_flag[next])
209                     next++;
210 
211                 /* If Left, top-left are available*/
212                 if(next <= 2)
213                 {
214                     UWORD16 *pu2_dst;
215                     idx = (nt * next);
216                     pu2_dst = (UWORD16 *)&pu1_dst[2 * idx];
217                     ihevc_memset_16bit_a9q((UWORD16 *)pu1_dst, pu2_dst[0], idx);
218                 }
219                 else /* If top, top-right are available */
220                 {
221                     UWORD16 *pu2_dst;
222                     /* Idx is changed to copy 1 pixel value for top-left ,if top-left is not available*/
223                     idx = (nt * (next - 1)) + 1;
224                     pu2_dst = (UWORD16 *)&pu1_dst[2 * idx];
225                     ihevc_memset_16bit_a9q((UWORD16 *)pu1_dst, pu2_dst[0], idx);
226                 }
227             }
228 
229             if(left == 0)
230             {
231                 UWORD16 *pu2_dst = (UWORD16 *)&pu1_dst[(2 * nt) - 2];
232                 ihevc_memset_16bit_a9q((UWORD16 *)&pu1_dst[(2 * nt)], pu2_dst[0], nt);
233 
234 
235             }
236             if(tp_left == 0)
237             {
238                 pu1_dst[4 * nt] = pu1_dst[(4 * nt) - 2];
239                 pu1_dst[(4 * nt) + 1] = pu1_dst[(4 * nt) - 1];
240             }
241             if(top == 0)
242             {
243                 UWORD16 *pu2_dst = (UWORD16 *)&pu1_dst[(4 * nt)];
244                 ihevc_memset_16bit_a9q((UWORD16 *)&pu1_dst[(4 * nt) + 2], pu2_dst[0], nt);
245 
246 
247             }
248             if(tp_right == 0)
249             {
250                 UWORD16 *pu2_dst = (UWORD16 *)&pu1_dst[(6 * nt)];
251                 ihevc_memset_16bit_a9q((UWORD16 *)&pu1_dst[(6 * nt) + 2], pu2_dst[0], nt);
252 
253 
254             }
255         }
256         else if(nt == 8)
257         {
258             WORD32 nbr_flags_temp = 0;
259             nbr_flags_temp = ((nbr_flags & 0xC) >> 2) + ((nbr_flags & 0xC0) >> 4)
260                             + ((nbr_flags & 0x300) >> 4)
261                             + ((nbr_flags & 0x3000) >> 6)
262                             + ((nbr_flags & 0x10000) >> 8);
263 
264             /* compute trailing zeors based on nbr_flag for substitution process of below left see section .*/
265             /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
266             {
267                 nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * 4; /* for bottom left and left */
268                 if(nbr_id_from_bl == 32)
269                     nbr_id_from_bl = 16;
270                 if(nbr_id_from_bl == 16)
271                 {
272                     /* for top left : 1 pel per nbr bit */
273                     if(!((nbr_flags_temp >> 8) & 0x1))
274                     {
275                         nbr_id_from_bl++;
276                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 4; /* top and top right;  8 pels per nbr bit */
277 
278                     }
279                 }
280                 /* Reverse Substitution Process*/
281                 if(nbr_id_from_bl)
282                 {
283                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
284                     pu1_ref_u = pu1_dst[2 * nbr_id_from_bl];
285                     pu1_ref_v = pu1_dst[(2 * nbr_id_from_bl) + 1];
286                     for(i = 2 * (nbr_id_from_bl - 1); i >= 0; i -= 2)
287                     {
288                         pu1_dst[i] = pu1_ref_u;
289                         pu1_dst[i + 1] = pu1_ref_v;
290                     }
291                 }
292             }
293 
294             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
295             while(nbr_id_from_bl < ((T8C_4NT)+1))
296             {
297                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
298                 /* Divide by 8 to obtain the original index */
299                 frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/
300 
301                 /* The Top-left flag is at the last bit location of nbr_flags*/
302                 if(nbr_id_from_bl == (T8C_4NT / 2))
303                 {
304                     get_bits = GET_BIT(nbr_flags_temp, 8);
305 
306                     /* only pel substitution for TL */
307                     if(!get_bits)
308                     {
309                         pu1_dst[2 * nbr_id_from_bl] = pu1_dst[(2 * nbr_id_from_bl) - 2];
310                         pu1_dst[(2 * nbr_id_from_bl) + 1] = pu1_dst[(2 * nbr_id_from_bl) - 1];
311                     }
312                 }
313                 else
314                 {
315                     get_bits = GET_BIT(nbr_flags_temp, frwd_nbr_flag);
316                     if(!get_bits)
317                     {
318                         UWORD16 *pu2_dst;
319                         /* 8 pel substitution (other than TL) */
320                         pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2];
321                         ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4);
322                     }
323 
324                 }
325                 nbr_id_from_bl += (nbr_id_from_bl == (T8C_4NT / 2)) ? 1 : 4;
326             }
327 
328         }
329         else if(nt == 16)
330         {
331             /* compute trailing ones based on mbr_flag for substitution process of below left see section .*/
332             /* as each bit in nbr flags corresponds to 4 pels for bot_left, left, top and topright but 1 pel for topleft */
333             {
334                 nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 4; /* for bottom left and left */
335 
336                 if(nbr_id_from_bl == 32)
337                 {
338                     /* for top left : 1 pel per nbr bit */
339                     if(!((nbr_flags >> 16) & 0x1))
340                     {
341                         /* top left not available */
342                         nbr_id_from_bl++;
343                         /* top and top right;  4 pels per nbr bit */
344                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 4;
345                     }
346                 }
347                 /* Reverse Substitution Process*/
348                 if(nbr_id_from_bl)
349                 {
350                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
351                     pu1_ref_u = pu1_dst[2 * nbr_id_from_bl];
352                     pu1_ref_v = pu1_dst[2 * nbr_id_from_bl + 1];
353                     for(i = (2 * (nbr_id_from_bl - 1)); i >= 0; i -= 2)
354                     {
355                         pu1_dst[i] = pu1_ref_u;
356                         pu1_dst[i + 1] = pu1_ref_v;
357                     }
358                 }
359             }
360 
361             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
362             while(nbr_id_from_bl < ((T16C_4NT)+1))
363             {
364                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
365                 /* Devide by 4 to obtain the original index */
366                 frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/
367 
368                 /* The Top-left flag is at the last bit location of nbr_flags*/
369                 if(nbr_id_from_bl == (T16C_4NT / 2))
370                 {
371                     get_bits = GET_BIT(nbr_flags, 16);
372                     /* only pel substitution for TL */
373                     if(!get_bits)
374                     {
375                         pu1_dst[2 * nbr_id_from_bl] = pu1_dst[(2 * nbr_id_from_bl) - 2];
376                         pu1_dst[(2 * nbr_id_from_bl) + 1] = pu1_dst[(2 * nbr_id_from_bl) - 1];
377                     }
378                 }
379                 else
380                 {
381                     get_bits = GET_BIT(nbr_flags, frwd_nbr_flag);
382                     if(!get_bits)
383                     {
384                         UWORD16 *pu2_dst;
385                         /* 4 pel substitution (other than TL) */
386                         pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2];
387                         ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4);
388                     }
389 
390                 }
391                 nbr_id_from_bl += (nbr_id_from_bl == (T16C_4NT / 2)) ? 1 : 4;
392             }
393         }
394     }
395 }
396 
397 
ihevc_intra_pred_luma_ref_substitution_a9q(UWORD8 * pu1_top_left,UWORD8 * pu1_top,UWORD8 * pu1_left,WORD32 src_strd,WORD32 nt,WORD32 nbr_flags,UWORD8 * pu1_dst,WORD32 dst_strd)398 void ihevc_intra_pred_luma_ref_substitution_a9q(UWORD8 *pu1_top_left,
399                                                 UWORD8 *pu1_top,
400                                                 UWORD8 *pu1_left,
401                                                 WORD32 src_strd,
402                                                 WORD32 nt,
403                                                 WORD32 nbr_flags,
404                                                 UWORD8 *pu1_dst,
405                                                 WORD32 dst_strd)
406 {
407     UWORD8 pu1_ref;
408     WORD32 dc_val, i;
409     WORD32 total_samples = (4 * nt) + 1;
410     WORD32 two_nt = 2 * nt;
411 
412     WORD32 three_nt = 3 * nt;
413     WORD32 get_bits;
414     WORD32 next;
415     WORD32 bot_left, left, top, tp_right, tp_left;
416 
417     WORD32 idx, nbr_id_from_bl, frwd_nbr_flag;
418     UNUSED(dst_strd);
419     /*dc_val = 1 << (BIT_DEPTH - 1);*/
420     dc_val = 1 << (8 - 1);
421 
422 
423     /* Neighbor Flag Structure*/
424     /* MSB ---> LSB */
425     /*    Top-Left | Top-Right | Top | Left | Bottom-Left
426               1         4         4     4         4
427      */
428     /* If no neighbor flags are present, fill the neighbor samples with DC value */
429     if(nbr_flags == 0)
430     {
431         for(i = 0; i < total_samples; i++)
432         {
433             pu1_dst[i] = dc_val;
434         }
435     }
436     else
437     {
438         if(nt <= 8)
439         {
440             /* 1 bit extraction for all the neighboring blocks */
441             tp_left = (nbr_flags & 0x10000) >> 16;
442             bot_left = (nbr_flags & 0x8) >> 3;
443             left = (nbr_flags & 0x80) >> 7;
444             top = (nbr_flags & 0x100) >> 8;
445             tp_right = (nbr_flags & 0x1000) >> 12;
446 
447             /* Else fill the corresponding samples */
448             if(tp_left)
449                 pu1_dst[two_nt] = *pu1_top_left;
450             else
451                 pu1_dst[two_nt] = 0;
452 
453 
454             if(left)
455             {
456                 for(i = 0; i < nt; i++)
457                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
458             }
459             else
460             {
461                 ihevc_memset_a9q(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
462             }
463 
464 
465             if(bot_left)
466             {
467                 for(i = nt; i < two_nt; i++)
468                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
469             }
470             else
471             {
472                 ihevc_memset_a9q(&pu1_dst[two_nt - 1 - (two_nt - 1)], 0, nt);
473             }
474 
475 
476             if(top)
477             {
478                 ihevc_memcpy_a9q(&pu1_dst[two_nt + 1], pu1_top, nt);
479             }
480             else
481             {
482                 ihevc_memset_a9q(&pu1_dst[two_nt + 1], 0, nt);
483             }
484 
485             if(tp_right)
486             {
487                 ihevc_memcpy_a9q(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
488             }
489             else
490             {
491                 ihevc_memset_a9q(&pu1_dst[two_nt + 1 + nt], 0, nt);
492             }
493             next = 1;
494 
495             /* If bottom -left is not available, reverse substitution process*/
496             if(bot_left == 0)
497             {
498                 WORD32 a_nbr_flag[5];
499                 a_nbr_flag[0] = bot_left;
500                 a_nbr_flag[1] = left;
501                 a_nbr_flag[2] = tp_left;
502                 a_nbr_flag[3] = top;
503                 a_nbr_flag[4] = tp_right;
504 
505                 /* Check for the 1st available sample from bottom-left*/
506                 while(!a_nbr_flag[next])
507                     next++;
508 
509                 /* If Left, top-left are available*/
510                 if(next <= 2)
511                 {
512                     idx = nt * next;
513                     pu1_ref = pu1_dst[idx];
514                     for(i = 0; i < idx; i++)
515                         pu1_dst[i] = pu1_ref;
516                 }
517                 else /* If top, top-right are available */
518                 {
519                     /* Idx is changed to copy 1 pixel value for top-left ,if top-left is not available*/
520                     idx = (nt * (next - 1)) + 1;
521                     pu1_ref = pu1_dst[idx];
522                     for(i = 0; i < idx; i++)
523                         pu1_dst[i] = pu1_ref;
524                 }
525             }
526 
527             /* Forward Substitution Process */
528             /* If left is Unavailable, copy the last bottom-left value */
529             if(left == 0)
530             {
531                 ihevc_memset_a9q(&pu1_dst[nt], pu1_dst[nt - 1], nt);
532 
533             }
534             /* If top-left is Unavailable, copy the last left value */
535             if(tp_left == 0)
536                 pu1_dst[two_nt] = pu1_dst[two_nt - 1];
537             /* If top is Unavailable, copy the last top-left value */
538             if(top == 0)
539             {
540                 ihevc_memset_a9q(&pu1_dst[two_nt + 1], pu1_dst[two_nt], nt);
541             }
542             /* If to right is Unavailable, copy the last top value */
543             if(tp_right == 0)
544             {
545                 ihevc_memset_a9q(&pu1_dst[three_nt + 1], pu1_dst[three_nt], nt);
546 
547             }
548         }
549 
550         if(nt == 16)
551         {
552             WORD32 nbr_flags_temp = 0;
553             nbr_flags_temp = ((nbr_flags & 0xC) >> 2) + ((nbr_flags & 0xC0) >> 4)
554                             + ((nbr_flags & 0x300) >> 4)
555                             + ((nbr_flags & 0x3000) >> 6)
556                             + ((nbr_flags & 0x10000) >> 8);
557 
558             /* Else fill the corresponding samples */
559             if(nbr_flags & 0x10000)
560                 pu1_dst[two_nt] = *pu1_top_left;
561             else
562                 pu1_dst[two_nt] = 0;
563 
564             if(nbr_flags & 0xC0)
565             {
566                 for(i = 0; i < nt; i++)
567                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
568             }
569             else
570             {
571                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
572             }
573 
574             /* Bottom - left availability is checked for every 8x8 TU position and set accordingly */
575             {
576                 if(nbr_flags & 0x8)
577                 {
578                     for(i = nt; i < (nt + 8); i++)
579                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
580                 }
581                 else
582                 {
583                     ihevc_memset_mul_8_a9q(&pu1_dst[nt - 8], 0, 8);
584                 }
585 
586                 if(nbr_flags & 0x4)
587                 {
588                     for(i = (nt + 8); i < two_nt; i++)
589                         pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
590                 }
591                 else
592                 {
593                     ihevc_memset_mul_8_a9q(&pu1_dst[0], 0, 8);
594                 }
595             }
596 
597 
598             if(nbr_flags & 0x300)
599             {
600                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1], pu1_top, nt);
601             }
602             else
603             {
604                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1], 0, nt);
605             }
606 
607             if(nbr_flags & 0x3000)
608             {
609                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
610             }
611             else
612             {
613                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], 0, nt);
614             }
615             /* compute trailing zeors based on nbr_flag for substitution process of below left see section .*/
616             /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
617             {
618                 nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * 8; /* for below left and left */
619 
620                 if(nbr_id_from_bl == 64)
621                     nbr_id_from_bl = 32;
622 
623                 if(nbr_id_from_bl == 32)
624                 {
625                     /* for top left : 1 pel per nbr bit */
626                     if(!((nbr_flags_temp >> 8) & 0x1))
627                     {
628                         nbr_id_from_bl++;
629                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 8; /* top and top right;  8 pels per nbr bit */
630                         //nbr_id_from_bl += idx * 8;
631                     }
632                 }
633                 /* Reverse Substitution Process*/
634                 if(nbr_id_from_bl)
635                 {
636                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
637                     pu1_ref = pu1_dst[nbr_id_from_bl];
638                     for(i = (nbr_id_from_bl - 1); i >= 0; i--)
639                     {
640                         pu1_dst[i] = pu1_ref;
641                     }
642                 }
643             }
644 
645             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
646             while(nbr_id_from_bl < ((T16_4NT) + 1))
647             {
648                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
649                 /* Devide by 8 to obtain the original index */
650                 frwd_nbr_flag = (nbr_id_from_bl >> 3); /*+ (nbr_id_from_bl & 0x1);*/
651 
652                 /* The Top-left flag is at the last bit location of nbr_flags*/
653                 if(nbr_id_from_bl == (T16_4NT / 2))
654                 {
655                     get_bits = GET_BITS(nbr_flags_temp, 8);
656 
657                     /* only pel substitution for TL */
658                     if(!get_bits)
659                         pu1_dst[nbr_id_from_bl] = pu1_dst[nbr_id_from_bl - 1];
660                 }
661                 else
662                 {
663                     get_bits = GET_BITS(nbr_flags_temp, frwd_nbr_flag);
664                     if(!get_bits)
665                     {
666                         /* 8 pel substitution (other than TL) */
667                         pu1_ref = pu1_dst[nbr_id_from_bl - 1];
668                         ihevc_memset_mul_8_a9q(pu1_dst + nbr_id_from_bl, pu1_ref, 8);
669 
670 
671                     }
672 
673                 }
674                 nbr_id_from_bl += (nbr_id_from_bl == (T16_4NT / 2)) ? 1 : 8;
675             }
676 
677 
678         }
679 
680         if(nt == 32)
681         {
682             /* Else fill the corresponding samples */
683             if(nbr_flags & 0x10000)
684                 pu1_dst[two_nt] = *pu1_top_left;
685             else
686                 pu1_dst[two_nt] = 0;
687 
688             if(nbr_flags & 0xF0)
689             {
690                 for(i = 0; i < nt; i++)
691                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
692             }
693             else
694             {
695                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
696             }
697 
698             /* Bottom - left availability is checked for every 8x8 TU position and set accordingly */
699             {
700                 if(nbr_flags & 0x8)
701                 {
702                     for(i = nt; i < (nt + 8); i++)
703                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
704                 }
705                 else
706                 {
707                     ihevc_memset_mul_8_a9q(&pu1_dst[24], 0, 8);
708                 }
709 
710                 if(nbr_flags & 0x4)
711                 {
712                     for(i = (nt + 8); i < (nt + 16); i++)
713                         pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
714                 }
715                 else
716                 {
717                     ihevc_memset_mul_8_a9q(&pu1_dst[16], 0, 8);
718                 }
719 
720                 if(nbr_flags & 0x2)
721                 {
722                     for(i = (nt + 16); i < (nt + 24); i++)
723                         pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
724                 }
725                 else
726                 {
727                     ihevc_memset_mul_8_a9q(&pu1_dst[8], 0, 8);
728                 }
729 
730                 if(nbr_flags & 0x1)
731                 {
732                     for(i = (nt + 24); i < (two_nt); i++)
733                         pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
734                 }
735                 else
736                 {
737                     ihevc_memset_mul_8_a9q(&pu1_dst[0], 0, 8);
738                 }
739             }
740 
741             if(nbr_flags & 0xF00)
742             {
743                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1], pu1_top, nt);
744             }
745             else
746             {
747                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1], 0, nt);
748             }
749 
750             if(nbr_flags & 0xF000)
751             {
752                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
753             }
754             else
755             {
756                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], 0, nt);
757             }
758             /* compute trailing ones based on mbr_flag for substitution process of below left see section .*/
759             /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
760             {
761                 nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 8; /* for below left and left */
762 
763                 if(nbr_id_from_bl == 64)
764                 {
765                     /* for top left : 1 pel per nbr bit */
766                     if(!((nbr_flags >> 16) & 0x1))
767                     {
768                         /* top left not available */
769                         nbr_id_from_bl++;
770                         /* top and top right;  8 pels per nbr bit */
771                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 8;
772                     }
773                 }
774                 /* Reverse Substitution Process*/
775                 if(nbr_id_from_bl)
776                 {
777                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
778                     pu1_ref = pu1_dst[nbr_id_from_bl];
779                     for(i = (nbr_id_from_bl - 1); i >= 0; i--)
780                         pu1_dst[i] = pu1_ref;
781                 }
782             }
783 
784             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
785             while(nbr_id_from_bl < ((T32_4NT) + 1))
786             {
787                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
788                 /* Devide by 8 to obtain the original index */
789                 frwd_nbr_flag = (nbr_id_from_bl >> 3); /*+ (nbr_id_from_bl & 0x1);*/
790 
791                 /* The Top-left flag is at the last bit location of nbr_flags*/
792                 if(nbr_id_from_bl == (T32_4NT / 2))
793                 {
794                     get_bits = GET_BITS(nbr_flags, 16);
795                     /* only pel substitution for TL */
796                     if(!get_bits)
797                         pu1_dst[nbr_id_from_bl] = pu1_dst[nbr_id_from_bl - 1];
798                 }
799                 else
800                 {
801                     get_bits = GET_BITS(nbr_flags, frwd_nbr_flag);
802                     if(!get_bits)
803                     {
804                         /* 8 pel substitution (other than TL) */
805                         pu1_ref = pu1_dst[nbr_id_from_bl - 1];
806                         ihevc_memset_mul_8_a9q(&pu1_dst[nbr_id_from_bl], pu1_ref, 8);
807 
808                     }
809 
810                 }
811                 nbr_id_from_bl += (nbr_id_from_bl == (T32_4NT / 2)) ? 1 : 8;
812             }
813         }
814 
815     }
816 }
817