1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21  *******************************************************************************
22  * @file
23  *  ih264_mem_fns_atom_intr.c
24  *
25  * @brief
26  *  Functions used for memory operations
27  *
28  * @author
29  *  Ittiam
30  *
31  * @par List of Functions:
32  *
33  * @remarks
34  *  None
35  *
36  *******************************************************************************
37  */
38 
39 /*****************************************************************************/
40 /* File Includes                                                             */
41 /*****************************************************************************/
42 #include <stdio.h>
43 #include <stddef.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <assert.h>
47 
48 #include "ih264_typedefs.h"
49 #include "ih264_mem_fns.h"
50 
51 #include <immintrin.h>
52 
53 /**
54  *******************************************************************************
55  *
56  * @brief
57  *   memcpy of a 8,16 or 32 bytes
58  *
59  * @par Description:
60  *   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
61  *
62  * @param[in] pu1_dst
63  *  UWORD8 pointer to the destination
64  *
65  * @param[in] pu1_src
66  *  UWORD8 pointer to the source
67  *
68  * @param[in] num_bytes
69  *  number of bytes to copy
70  * @returns
71  *
72  * @remarks
73  *  None
74  *
75  *******************************************************************************
76  */
77 
78 
79 
80 
ih264_memcpy_mul_8_ssse3(UWORD8 * pu1_dst,UWORD8 * pu1_src,UWORD32 num_bytes)81 void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes)
82 {
83     int col;
84     for(col = num_bytes; col >= 8; col -= 8)
85     {
86         __m128i src_temp16x8b;
87         src_temp16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
88         pu1_src += 8;
89         _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b);
90         pu1_dst += 8;
91     }
92 }
93 
94 /**
95  *******************************************************************************
96  *
97  * @brief
98  *   memset of a 8,16 or 32 bytes
99  *
100  * @par Description:
101  *   Does memset of 8bit data for 8,16 or 32 number of bytes
102  *
103  * @param[in] pu1_dst
104  *  UWORD8 pointer to the destination
105  *
106  * @param[in] value
107  *  UWORD8 value used for memset
108  *
109  * @param[in] num_bytes
110  *  number of bytes to set
111  * @returns
112  *
113  * @remarks
114  *  None
115  *
116  *******************************************************************************
117  */
118 
119 
ih264_memset_mul_8_ssse3(UWORD8 * pu1_dst,UWORD8 value,UWORD32 num_bytes)120 void ih264_memset_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes)
121 {
122     int col;
123     __m128i src_temp16x8b;
124     src_temp16x8b = _mm_set1_epi8(value);
125     for(col = num_bytes; col >= 8; col -= 8)
126     {
127         _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b);
128         pu1_dst += 8;
129     }
130 }
131 
132 /**
133  *******************************************************************************
134  *
135  * @brief
136  *   memset of 16bit data of a 8,16 or 32 bytes
137  *
138  * @par Description:
139  *   Does memset of 16bit data for 8,16 or 32 number of bytes
140  *
141  * @param[in] pu2_dst
142  *  UWORD8 pointer to the destination
143  *
144  * @param[in] value
145  *  UWORD16 value used for memset
146  *
147  * @param[in] num_words
148  *  number of words to set
149  * @returns
150  *
151  * @remarks
152  *  None
153  *
154  *******************************************************************************
155  */
156 
157 
ih264_memset_16bit_mul_8_ssse3(UWORD16 * pu2_dst,UWORD16 value,UWORD32 num_words)158 void ih264_memset_16bit_mul_8_ssse3(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words)
159 {
160     int col;
161     __m128i src_temp16x8b;
162     src_temp16x8b = _mm_set1_epi16(value);
163     for(col = num_words; col >= 8; col -= 8)
164     {
165         _mm_storeu_si128((__m128i *)(pu2_dst), src_temp16x8b);
166         pu2_dst += 8;
167     }
168 }
169 
170