1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264e_half_pel.c
25 *
26 * @brief
27 *  This file contains functions that are used for computing subpixel planes
28 *
29 * @author
30 *  ittiam
31 *
32 * @par List of Functions:
33 *  - ih264e_sixtapfilter_horz
34 *  - ih264e_sixtap_filter_2dvh_vert
35 *
36 * @remarks
37 *  None
38 *
39 *******************************************************************************
40 */
41 
42 /*****************************************************************************/
43 /* File Includes                                                             */
44 /*****************************************************************************/
45 
46 /* System include files */
47 #include <stdio.h>
48 #include <assert.h>
49 #include <limits.h>
50 
51 /* User include files */
52 #include "ih264_typedefs.h"
53 #include "ithread.h"
54 #include "ih264_platform_macros.h"
55 #include "ih264_defs.h"
56 #include "ih264e_half_pel.h"
57 #include "ih264_macros.h"
58 #include "ih264e_debug.h"
59 #include "ih264_inter_pred_filters.h"
60 #include "ih264_mem_fns.h"
61 #include "ih264_padding.h"
62 #include "ih264_intra_pred_filters.h"
63 #include "ih264_deblk_edge_filters.h"
64 
65 
66 /*****************************************************************************/
67 /* Function Definitions                                                      */
68 /*****************************************************************************/
69 
70 /**
71 *******************************************************************************
72 *
73 * @brief
74 *  Interprediction luma filter for horizontal input (Filter run for width = 17
75 *  and height =16)
76 *
77 * @par Description:
78 *  Applies a 6 tap horizontal filter .The output is  clipped to 8 bits
79 *  sec 8.4.2.2.1 titled "Luma sample interpolation process"
80 *
81 * @param[in] pu1_src
82 *  UWORD8 pointer to the source
83 *
84 * @param[out] pu1_dst
85 *  UWORD8 pointer to the destination
86 *
87 * @param[in] src_strd
88 *  integer source stride
89 *
90 * @param[in] dst_strd
91 *  integer destination stride
92 *
93 * @returns
94 *
95 * @remarks
96 *  None
97 *
98 *******************************************************************************
99 */
100 void ih264e_sixtapfilter_horz(UWORD8 *pu1_src,
101                               UWORD8 *pu1_dst,
102                               WORD32 src_strd,
103                               WORD32 dst_strd)
104 {
105     UWORD32  u4_i, u4_j;
106     UWORD32  u4_w, u4_h;
107 
108     /* width and height of interpolation */
109     u4_w = HP_PL_WD;
110     u4_h = MB_SIZE;
111 
112     pu1_src -= 2;
113 
114     for (u4_i = 0; u4_i < u4_h; u4_i++)
115     {
116         for (u4_j = 0; u4_j < u4_w; u4_j++, pu1_dst++, pu1_src++)
117         {
118             WORD16 i16_temp;
119 
120             i16_temp = ih264_g_six_tap[0] * (*pu1_src + pu1_src[5])
121                             + ih264_g_six_tap[1] * (pu1_src[1] + pu1_src[4])
122                             + ih264_g_six_tap[2] * (pu1_src[2] + pu1_src[3]);
123 
124             i16_temp = (i16_temp + 16) >> 5;
125 
126             *pu1_dst = CLIP_U8(i16_temp);
127         }
128         pu1_src += src_strd - u4_w;
129         pu1_dst += dst_strd - u4_w;
130     }
131 }
132 
133 /**
134 *******************************************************************************
135 *
136 * @brief
137 *  This function implements a two stage cascaded six tap filter. It applies
138 *  the six tap filter in the vertical direction on the predictor values,
139 *  followed by applying the same filter in the horizontal direction on the
140 *  output of the first stage. The six tap filtering operation is described in
141 *  sec 8.4.2.2.1 titled "Luma sample interpolation process" (Filter run for
142 *  width = 17 and height = 17)
143 *
144 * @par Description:
145 *  The function interpolates the predictors first in the vertical direction and
146 *  then in the horizontal direction to output the (1/2,1/2). The output of the
147 *  first stage of the filter is stored in the buffer pointed to by
148 *  pi16_pred1(only in C) in 16 bit precision.
149 *
150 * @param[in] pu1_src
151 *  UWORD8 pointer to the source
152 *
153 * @param[out] pu1_dst1
154 *  UWORD8 pointer to the destination (Horizontal filtered output)
155 *
156 * @param[out] pu1_dst2
157 *  UWORD8 pointer to the destination (output after applying vertical filter to
158 *  the intermediate horizontal output)
159 *
160 * @param[in] src_strd
161 *  integer source stride
162 
163 * @param[in] dst_strd
164 *  integer destination stride of pu1_dst
165 *
166 * @param[in] pi4_pred
167 *  Pointer to 16bit intermediate buffer (used only in c)
168 *
169 * @param[in] i4_pred_strd
170 *  integer destination stride of pi16_pred1
171 *
172 * @returns
173 *
174 * @remarks
175 *  None
176 *
177 *******************************************************************************
178 */
179 void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src,
180                                     UWORD8 *pu1_dst1,
181                                     UWORD8 *pu1_dst2,
182                                     WORD32 src_strd,
183                                     WORD32 dst_strd,
184                                     WORD32 *pi4_pred,
185                                     WORD32 i4_pred_strd)
186 {
187     WORD32 row, col;
188     WORD32 tmp;
189     WORD32 *pi4_pred_temp = pi4_pred;
190     WORD32 ht = HP_PL_HT, wd = HP_PL_WD;
191 
192     for (row = 0; row < ht; row++)
193     {
194         for (col = -2; col < wd + 3; col++)
195         {
196             tmp = ih264_g_six_tap[0] * (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) +
197                             ih264_g_six_tap[1] * (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) +
198                             ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1 * src_strd]);
199 
200             pi4_pred_temp[col] = tmp;
201         }
202 
203         pu1_src += src_strd;
204         pi4_pred_temp += i4_pred_strd;
205     }
206 
207     for (row = 0; row < ht; row++)
208     {
209         for (col = 0; col < wd; col++)
210         {
211             tmp = (pi4_pred[col - 2] + pi4_pred[col + 3]) +
212                             ih264_g_six_tap[1] * (pi4_pred[col - 1] + pi4_pred[col + 2]) +
213                             ih264_g_six_tap[2] * (pi4_pred[col] + pi4_pred[col + 1]);
214 
215             tmp = (tmp + 512) >> 10;
216 
217             pu1_dst2[col] = CLIP_U8(tmp);
218             pu1_dst1[col] = CLIP_U8((pi4_pred[col] + 16) >> 5);
219         }
220         pi4_pred += i4_pred_strd;
221         pu1_dst2 += dst_strd;
222         pu1_dst1 += dst_strd;
223     }
224 }
225 
226