1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 *  ihevc_padding_atom_intr.c
22 *
23 * @brief
24 *  Contains function definitions for Padding
25 *
26 * @author
27 *  Srinivas T
28 *
29 * @par List of Functions:
30 *   - ihevc_pad_left_luma_ssse3()
31 *   - ihevc_pad_left_chroma_ssse3()
32 *   - ihevc_pad_right_luma_ssse3()
33 *   - ihevc_pad_right_chroma_ssse3()
34 *
35 * @remarks
36 *  None
37 *
38 *******************************************************************************
39 */
40 
41 #include <string.h>
42 #include <assert.h>
43 #include "ihevc_typedefs.h"
44 #include "ihevc_func_selector.h"
45 #include "ihevc_platform_macros.h"
46 #include "ihevc_mem_fns.h"
47 #include "ihevc_debug.h"
48 
49 #include <immintrin.h>
50 
51 
52 /**
53 *******************************************************************************
54 *
55 * @brief
56 *   Padding (luma block) at the left of a 2d array
57 *
58 * @par Description:
59 *   The left column of a 2d array is replicated for pad_size times at the left
60 *
61 *
62 * @param[in] pu1_src
63 *  UWORD8 pointer to the source
64 *
65 * @param[in] src_strd
66 *  integer source stride
67 *
68 * @param[in] ht
69 *  integer height of the array
70 *
71 * @param[in] wd
72 *  integer width of the array
73 *
74 * @param[in] pad_size
75 *  integer -padding size of the array
76 *
77 * @param[in] ht
78 *  integer height of the array
79 *
80 * @param[in] wd
81 *  integer width of the array
82 *
83 * @returns
84 *
85 * @remarks
86 *  None
87 *
88 *******************************************************************************
89 */
90 
ihevc_pad_left_luma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)91 void ihevc_pad_left_luma_ssse3(UWORD8 *pu1_src,
92                                WORD32 src_strd,
93                                WORD32 ht,
94                                WORD32 pad_size)
95 {
96     WORD32 row;
97     WORD32 i;
98     UWORD8 *pu1_dst;
99     __m128i const0_16x8b;
100 
101     const0_16x8b = _mm_setzero_si128();
102 
103     ASSERT(pad_size % 8 == 0);
104 
105     for(row = 0; row < ht; row++)
106     {
107         __m128i src_temp0_16x8b;
108 
109         src_temp0_16x8b =  _mm_loadu_si128((__m128i *)pu1_src);
110         pu1_dst = pu1_src - pad_size;
111         src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
112         for(i = 0; i < pad_size; i += 8)
113         {
114             _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b);
115         }
116         pu1_src += src_strd;
117     }
118 
119 }
120 
121 
122 
123 /**
124 *******************************************************************************
125 *
126 * @brief
127 *   Padding (chroma block) at the left of a 2d array
128 *
129 * @par Description:
130 *   The left column of a 2d array is replicated for pad_size times at the left
131 *
132 *
133 * @param[in] pu1_src
134 *  UWORD8 pointer to the source
135 *
136 * @param[in] src_strd
137 *  integer source stride
138 *
139 * @param[in] ht
140 *  integer height of the array
141 *
142 * @param[in] wd
143 *  integer width of the array (each colour component)
144 *
145 * @param[in] pad_size
146 *  integer -padding size of the array
147 *
148 * @param[in] ht
149 *  integer height of the array
150 *
151 * @param[in] wd
152 *  integer width of the array
153 *
154 * @returns
155 *
156 * @remarks
157 *  None
158 *
159 *******************************************************************************
160 */
161 
ihevc_pad_left_chroma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)162 void ihevc_pad_left_chroma_ssse3(UWORD8 *pu1_src,
163                                  WORD32 src_strd,
164                                  WORD32 ht,
165                                  WORD32 pad_size)
166 {
167     WORD32 row;
168     WORD32 col;
169     UWORD8 *pu1_dst;
170     __m128i const0_16x8b, const1_16x8b;
171     const0_16x8b = _mm_setzero_si128();
172     const1_16x8b = _mm_set1_epi8(1);
173     const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
174 
175     ASSERT(pad_size % 8 == 0);
176     for(row = 0; row < ht; row++)
177     {
178         __m128i src_temp0_16x8b;
179 
180         src_temp0_16x8b =  _mm_loadu_si128((__m128i *)pu1_src);
181         pu1_dst = pu1_src - pad_size;
182         src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
183 
184         for(col = 0; col < pad_size; col += 8)
185         {
186             _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
187         }
188         pu1_src += src_strd;
189     }
190 
191 }
192 
193 
194 
195 /**
196 *******************************************************************************
197 *
198 * @brief
199 * Padding (luma block) at the right of a 2d array
200 *
201 * @par Description:
202 * The right column of a 2d array is replicated for pad_size times at the right
203 *
204 *
205 * @param[in] pu1_src
206 *  UWORD8 pointer to the source
207 *
208 * @param[in] src_strd
209 *  integer source stride
210 *
211 * @param[in] ht
212 *  integer height of the array
213 *
214 * @param[in] wd
215 *  integer width of the array
216 *
217 * @param[in] pad_size
218 *  integer -padding size of the array
219 *
220 * @param[in] ht
221 *  integer height of the array
222 *
223 * @param[in] wd
224 *  integer width of the array
225 *
226 * @returns
227 *
228 * @remarks
229 *  None
230 *
231 *******************************************************************************
232 */
233 
ihevc_pad_right_luma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)234 void ihevc_pad_right_luma_ssse3(UWORD8 *pu1_src,
235                                 WORD32 src_strd,
236                                 WORD32 ht,
237                                 WORD32 pad_size)
238 {
239     WORD32 row;
240     WORD32 col;
241     UWORD8 *pu1_dst;
242     __m128i const0_16x8b;
243 
244     ASSERT(pad_size % 8 == 0);
245 
246     for(row = 0; row < ht; row++)
247     {
248         __m128i src_temp0_16x8b;
249 
250         src_temp0_16x8b =  _mm_loadu_si128((__m128i *)(pu1_src - 1));
251         const0_16x8b = _mm_setzero_si128();
252         pu1_dst = pu1_src;
253         src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
254         for(col = 0; col < pad_size; col += 8)
255         {
256             _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
257         }
258         pu1_src += src_strd;
259     }
260 
261 }
262 
263 
264 
265 /**
266 *******************************************************************************
267 *
268 * @brief
269 * Padding (chroma block) at the right of a 2d array
270 *
271 * @par Description:
272 * The right column of a 2d array is replicated for pad_size times at the right
273 *
274 *
275 * @param[in] pu1_src
276 *  UWORD8 pointer to the source
277 *
278 * @param[in] src_strd
279 *  integer source stride
280 *
281 * @param[in] ht
282 *  integer height of the array
283 *
284 * @param[in] wd
285 *  integer width of the array (each colour component)
286 *
287 * @param[in] pad_size
288 *  integer -padding size of the array
289 *
290 * @param[in] ht
291 *  integer height of the array
292 *
293 * @param[in] wd
294 *  integer width of the array
295 *
296 * @returns
297 *
298 * @remarks
299 *  None
300 *
301 *******************************************************************************
302 */
303 
ihevc_pad_right_chroma_ssse3(UWORD8 * pu1_src,WORD32 src_strd,WORD32 ht,WORD32 pad_size)304 void ihevc_pad_right_chroma_ssse3(UWORD8 *pu1_src,
305                                   WORD32 src_strd,
306                                   WORD32 ht,
307                                   WORD32 pad_size)
308 {
309     WORD32 row;
310     WORD32 col;
311     UWORD8 *pu1_dst;
312     __m128i const0_16x8b, const1_16x8b;
313     const0_16x8b = _mm_setzero_si128();
314     const1_16x8b = _mm_set1_epi8(1);
315     const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
316 
317     ASSERT(pad_size % 8 == 0);
318 
319     for(row = 0; row < ht; row++)
320     {
321         __m128i src_temp0_16x8b;
322 
323         src_temp0_16x8b =  _mm_loadu_si128((__m128i *)(pu1_src - 2));
324         pu1_dst = pu1_src;
325         src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
326         for(col = 0; col < pad_size; col += 8)
327         {
328             _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
329         }
330 
331         pu1_src += src_strd;
332     }
333 }
334 
335