1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevc_padding_atom_intr.c
22 *
23 * @brief
24 * Contains function definitions for Padding
25 *
26 * @author
27 * Srinivas T
28 *
29 * @par List of Functions:
30 * - ihevc_pad_left_luma_ssse3()
31 * - ihevc_pad_left_chroma_ssse3()
32 * - ihevc_pad_right_luma_ssse3()
33 * - ihevc_pad_right_chroma_ssse3()
34 *
35 * @remarks
36 * None
37 *
38 *******************************************************************************
39 */
40
41 #include <string.h>
42 #include <assert.h>
43 #include "ihevc_typedefs.h"
44 #include "ihevc_func_selector.h"
45 #include "ihevc_platform_macros.h"
46 #include "ihevc_mem_fns.h"
47 #include "ihevc_debug.h"
48
49 #include <immintrin.h>
50
51
52 /**
53 *******************************************************************************
54 *
55 * @brief
56 * Padding (luma block) at the left of a 2d array
57 *
58 * @par Description:
59 * The left column of a 2d array is replicated for pad_size times at the left
60 *
61 *
62 * @param[in] pu1_src
63 * UWORD8 pointer to the source
64 *
65 * @param[in] src_strd
66 * integer source stride
67 *
68 * @param[in] ht
69 * integer height of the array
70 *
71 * @param[in] wd
72 * integer width of the array
73 *
74 * @param[in] pad_size
75 * integer -padding size of the array
76 *
77 * @param[in] ht
78 * integer height of the array
79 *
80 * @param[in] wd
81 * integer width of the array
82 *
83 * @returns
84 *
85 * @remarks
86 * None
87 *
88 *******************************************************************************
89 */
90
ihevc_pad_left_luma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)91 void ihevc_pad_left_luma_ssse3(UWORD8 *pu1_src,
92 WORD32 src_strd,
93 WORD32 ht,
94 WORD32 pad_size)
95 {
96 WORD32 row;
97 WORD32 i;
98 UWORD8 *pu1_dst;
99 __m128i const0_16x8b;
100
101 const0_16x8b = _mm_setzero_si128();
102
103 ASSERT(pad_size % 8 == 0);
104
105 for(row = 0; row < ht; row++)
106 {
107 __m128i src_temp0_16x8b;
108
109 src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
110 pu1_dst = pu1_src - pad_size;
111 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
112 for(i = 0; i < pad_size; i += 8)
113 {
114 _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b);
115 }
116 pu1_src += src_strd;
117 }
118
119 }
120
121
122
123 /**
124 *******************************************************************************
125 *
126 * @brief
127 * Padding (chroma block) at the left of a 2d array
128 *
129 * @par Description:
130 * The left column of a 2d array is replicated for pad_size times at the left
131 *
132 *
133 * @param[in] pu1_src
134 * UWORD8 pointer to the source
135 *
136 * @param[in] src_strd
137 * integer source stride
138 *
139 * @param[in] ht
140 * integer height of the array
141 *
142 * @param[in] wd
143 * integer width of the array (each colour component)
144 *
145 * @param[in] pad_size
146 * integer -padding size of the array
147 *
148 * @param[in] ht
149 * integer height of the array
150 *
151 * @param[in] wd
152 * integer width of the array
153 *
154 * @returns
155 *
156 * @remarks
157 * None
158 *
159 *******************************************************************************
160 */
161
ihevc_pad_left_chroma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)162 void ihevc_pad_left_chroma_ssse3(UWORD8 *pu1_src,
163 WORD32 src_strd,
164 WORD32 ht,
165 WORD32 pad_size)
166 {
167 WORD32 row;
168 WORD32 col;
169 UWORD8 *pu1_dst;
170 __m128i const0_16x8b, const1_16x8b;
171 const0_16x8b = _mm_setzero_si128();
172 const1_16x8b = _mm_set1_epi8(1);
173 const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
174
175 ASSERT(pad_size % 8 == 0);
176 for(row = 0; row < ht; row++)
177 {
178 __m128i src_temp0_16x8b;
179
180 src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
181 pu1_dst = pu1_src - pad_size;
182 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
183
184 for(col = 0; col < pad_size; col += 8)
185 {
186 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
187 }
188 pu1_src += src_strd;
189 }
190
191 }
192
193
194
195 /**
196 *******************************************************************************
197 *
198 * @brief
199 * Padding (luma block) at the right of a 2d array
200 *
201 * @par Description:
202 * The right column of a 2d array is replicated for pad_size times at the right
203 *
204 *
205 * @param[in] pu1_src
206 * UWORD8 pointer to the source
207 *
208 * @param[in] src_strd
209 * integer source stride
210 *
211 * @param[in] ht
212 * integer height of the array
213 *
214 * @param[in] wd
215 * integer width of the array
216 *
217 * @param[in] pad_size
218 * integer -padding size of the array
219 *
220 * @param[in] ht
221 * integer height of the array
222 *
223 * @param[in] wd
224 * integer width of the array
225 *
226 * @returns
227 *
228 * @remarks
229 * None
230 *
231 *******************************************************************************
232 */
233
ihevc_pad_right_luma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)234 void ihevc_pad_right_luma_ssse3(UWORD8 *pu1_src,
235 WORD32 src_strd,
236 WORD32 ht,
237 WORD32 pad_size)
238 {
239 WORD32 row;
240 WORD32 col;
241 UWORD8 *pu1_dst;
242 __m128i const0_16x8b;
243
244 ASSERT(pad_size % 8 == 0);
245
246 for(row = 0; row < ht; row++)
247 {
248 __m128i src_temp0_16x8b;
249
250 src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 1));
251 const0_16x8b = _mm_setzero_si128();
252 pu1_dst = pu1_src;
253 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
254 for(col = 0; col < pad_size; col += 8)
255 {
256 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
257 }
258 pu1_src += src_strd;
259 }
260
261 }
262
263
264
265 /**
266 *******************************************************************************
267 *
268 * @brief
269 * Padding (chroma block) at the right of a 2d array
270 *
271 * @par Description:
272 * The right column of a 2d array is replicated for pad_size times at the right
273 *
274 *
275 * @param[in] pu1_src
276 * UWORD8 pointer to the source
277 *
278 * @param[in] src_strd
279 * integer source stride
280 *
281 * @param[in] ht
282 * integer height of the array
283 *
284 * @param[in] wd
285 * integer width of the array (each colour component)
286 *
287 * @param[in] pad_size
288 * integer -padding size of the array
289 *
290 * @param[in] ht
291 * integer height of the array
292 *
293 * @param[in] wd
294 * integer width of the array
295 *
296 * @returns
297 *
298 * @remarks
299 * None
300 *
301 *******************************************************************************
302 */
303
ihevc_pad_right_chroma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)304 void ihevc_pad_right_chroma_ssse3(UWORD8 *pu1_src,
305 WORD32 src_strd,
306 WORD32 ht,
307 WORD32 pad_size)
308 {
309 WORD32 row;
310 WORD32 col;
311 UWORD8 *pu1_dst;
312 __m128i const0_16x8b, const1_16x8b;
313 const0_16x8b = _mm_setzero_si128();
314 const1_16x8b = _mm_set1_epi8(1);
315 const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
316
317 ASSERT(pad_size % 8 == 0);
318
319 for(row = 0; row < ht; row++)
320 {
321 __m128i src_temp0_16x8b;
322
323 src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 2));
324 pu1_dst = pu1_src;
325 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
326 for(col = 0; col < pad_size; col += 8)
327 {
328 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
329 }
330
331 pu1_src += src_strd;
332 }
333 }
334
335