1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 *  ih264_padding_atom_intr.c
24 *
25 * @brief
26 *  Contains function definitions for Padding
27 *
28 * @author
29 *  Srinivas T
30 *
31 * @par List of Functions:
32 *   - ih264_pad_left_luma_ssse3()
33 *   - ih264_pad_left_chroma_ssse3()
34 *   - ih264_pad_right_luma_ssse3()
35 *   - ih264_pad_right_chroma_ssse3()
36 *
37 * @remarks
38 *  None
39 *
40 *******************************************************************************
41 */
42 
43 #include <string.h>
44 #include <assert.h>
45 #include "ih264_typedefs.h"
46 #include "ih264_platform_macros.h"
47 #include "ih264_mem_fns.h"
48 #include "ih264_debug.h"
49 
50 #include <immintrin.h>
51 
52 
53 /**
54 *******************************************************************************
55 *
56 * @brief
57 *   Padding (luma block) at the left of a 2d array
58 *
59 * @par Description:
60 *   The left column of a 2d array is replicated for pad_size times at the left
61 *
62 *
63 * @param[in] pu1_src
64 *  UWORD8 pointer to the source
65 *
66 * @param[in] src_strd
67 *  integer source stride
68 *
69 * @param[in] ht
70 *  integer height of the array
71 *
72 * @param[in] wd
73 *  integer width of the array
74 *
75 * @param[in] pad_size
76 *  integer -padding size of the array
77 *
78 * @param[in] ht
79 *  integer height of the array
80 *
81 * @param[in] wd
82 *  integer width of the array
83 *
84 * @returns
85 *
86 * @remarks
87 *  None
88 *
89 *******************************************************************************
90 */
91 
ih264_pad_left_luma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)92 void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src,
93                                WORD32 src_strd,
94                                WORD32 ht,
95                                WORD32 pad_size)
96 {
97     WORD32 row;
98     WORD32 i;
99     UWORD8 *pu1_dst;
100 
101     ASSERT(pad_size % 8 == 0);
102 
103     for(row = 0; row < ht; row++)
104     {
105         __m128i src_temp0_16x8b;
106 
107         pu1_dst = pu1_src - pad_size;
108         src_temp0_16x8b = _mm_set1_epi8(*pu1_src);
109         for(i = 0; i < pad_size; i += 8)
110         {
111             _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b);
112         }
113         pu1_src += src_strd;
114     }
115 
116 }
117 
118 
119 
120 /**
121 *******************************************************************************
122 *
123 * @brief
124 *   Padding (chroma block) at the left of a 2d array
125 *
126 * @par Description:
127 *   The left column of a 2d array is replicated for pad_size times at the left
128 *
129 *
130 * @param[in] pu1_src
131 *  UWORD8 pointer to the source
132 *
133 * @param[in] src_strd
134 *  integer source stride
135 *
136 * @param[in] ht
137 *  integer height of the array
138 *
139 * @param[in] wd
140 *  integer width of the array (each colour component)
141 *
142 * @param[in] pad_size
143 *  integer -padding size of the array
144 *
145 * @param[in] ht
146 *  integer height of the array
147 *
148 * @param[in] wd
149 *  integer width of the array
150 *
151 * @returns
152 *
153 * @remarks
154 *  None
155 *
156 *******************************************************************************
157 */
158 
ih264_pad_left_chroma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)159 void ih264_pad_left_chroma_ssse3(UWORD8 *pu1_src,
160                                  WORD32 src_strd,
161                                  WORD32 ht,
162                                  WORD32 pad_size)
163 {
164     WORD32 row;
165     WORD32 col;
166     UWORD8 *pu1_dst;
167 
168     ASSERT(pad_size % 8 == 0);
169     for(row = 0; row < ht; row++)
170     {
171         __m128i src_temp0_16x8b;
172 
173         pu1_dst = pu1_src - pad_size;
174         src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *)pu1_src));
175         for(col = 0; col < pad_size; col += 8)
176         {
177             _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
178         }
179         pu1_src += src_strd;
180     }
181 
182 }
183 
184 
185 
186 /**
187 *******************************************************************************
188 *
189 * @brief
190 * Padding (luma block) at the right of a 2d array
191 *
192 * @par Description:
193 * The right column of a 2d array is replicated for pad_size times at the right
194 *
195 *
196 * @param[in] pu1_src
197 *  UWORD8 pointer to the source
198 *
199 * @param[in] src_strd
200 *  integer source stride
201 *
202 * @param[in] ht
203 *  integer height of the array
204 *
205 * @param[in] wd
206 *  integer width of the array
207 *
208 * @param[in] pad_size
209 *  integer -padding size of the array
210 *
211 * @param[in] ht
212 *  integer height of the array
213 *
214 * @param[in] wd
215 *  integer width of the array
216 *
217 * @returns
218 *
219 * @remarks
220 *  None
221 *
222 *******************************************************************************
223 */
224 
ih264_pad_right_luma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)225 void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src,
226                                 WORD32 src_strd,
227                                 WORD32 ht,
228                                 WORD32 pad_size)
229 {
230     WORD32 row;
231     WORD32 col;
232     UWORD8 *pu1_dst;
233 
234     ASSERT(pad_size % 8 == 0);
235 
236     for(row = 0; row < ht; row++)
237     {
238         __m128i src_temp0_16x8b;
239 
240         pu1_dst = pu1_src;
241         src_temp0_16x8b = _mm_set1_epi8(*(pu1_src - 1));
242         for(col = 0; col < pad_size; col += 8)
243         {
244             _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
245         }
246         pu1_src += src_strd;
247     }
248 
249 }
250 
251 
252 
253 /**
254 *******************************************************************************
255 *
256 * @brief
257 * Padding (chroma block) at the right of a 2d array
258 *
259 * @par Description:
260 * The right column of a 2d array is replicated for pad_size times at the right
261 *
262 *
263 * @param[in] pu1_src
264 *  UWORD8 pointer to the source
265 *
266 * @param[in] src_strd
267 *  integer source stride
268 *
269 * @param[in] ht
270 *  integer height of the array
271 *
272 * @param[in] wd
273 *  integer width of the array (each colour component)
274 *
275 * @param[in] pad_size
276 *  integer -padding size of the array
277 *
278 * @param[in] ht
279 *  integer height of the array
280 *
281 * @param[in] wd
282 *  integer width of the array
283 *
284 * @returns
285 *
286 * @remarks
287 *  None
288 *
289 *******************************************************************************
290 */
291 
ih264_pad_right_chroma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)292 void ih264_pad_right_chroma_ssse3(UWORD8 *pu1_src,
293                                   WORD32 src_strd,
294                                   WORD32 ht,
295                                   WORD32 pad_size)
296 {
297     WORD32 row;
298     WORD32 col;
299     UWORD8 *pu1_dst;
300 
301     ASSERT(pad_size % 8 == 0);
302 
303     for(row = 0; row < ht; row++)
304     {
305         __m128i src_temp0_16x8b;
306 
307         pu1_dst = pu1_src;
308         src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *)(pu1_src - 2)));
309         for(col = 0; col < pad_size; col += 8)
310         {
311             _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
312         }
313 
314         pu1_src += src_strd;
315     }
316 }
317 
318