1@/*****************************************************************************
2@*
3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4@*
5@* Licensed under the Apache License, Version 2.0 (the "License");
6@* you may not use this file except in compliance with the License.
7@* You may obtain a copy of the License at:
8@*
9@* http://www.apache.org/licenses/LICENSE-2.0
10@*
11@* Unless required by applicable law or agreed to in writing, software
12@* distributed under the License is distributed on an "AS IS" BASIS,
13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@* See the License for the specific language governing permissions and
15@* limitations under the License.
16@*
17@*****************************************************************************/
18@/**
19@ *******************************************************************************
20@ * ,:file
21@ *  ihevc_mem_fns_neon.s
22@ *
23@ * ,:brief
24@ *  Contains function definitions for memory manipulation
25@ *
26@ * ,:author
27@ *  Naveen SR
28@ *
29@ * ,:par List of Functions:
30@ *  - ihevc_memcpy()
31@ *  - ihevc_memset_mul_8()
32@ *  - ihevc_memset_16bit_mul_8()
33@ *
34@ * ,:remarks
35@ *  None
36@ *
37@ *******************************************************************************
38@*/
39
40@/**
41@*******************************************************************************
42@*
43@* ,:brief
44@*   memcpy of a 1d array
45@*
46@* ,:par Description:
47@*   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
48@*
49@* ,:param[in] pu1_dst
50@*  UWORD8 pointer to the destination
51@*
52@* ,:param[in] pu1_src
53@*  UWORD8 pointer to the source
54@*
55@* ,:param[in] num_bytes
56@*  number of bytes to copy
57@* ,:returns
58@*
59@* ,:remarks
60@*  None
61@*
62@*******************************************************************************
63@*/
64@void ihevc_memcpy_mul_8(UWORD8 *pu1_dst,
65@                    UWORD8 *pu1_src,
66@                   UWORD8 num_bytes)
67@**************Variables Vs Registers*************************
68@   r0 => *pu1_dst
69@   r1 => *pu1_src
70@   r2 => num_bytes
71
72.text
73.p2align 2
74
75
76
77
78    .global ihevc_memcpy_mul_8_a9q
79.type ihevc_memcpy_mul_8_a9q, %function
80
81ihevc_memcpy_mul_8_a9q:
82
83LOOP_NEON_MEMCPY_MUL_8:
84    @ Memcpy 8 bytes
85    VLD1.8      d0,[r1]!
86    VST1.8      d0,[r0]!
87
88    SUBS        r2,r2,#8
89    BNE         LOOP_NEON_MEMCPY_MUL_8
90    MOV         PC,LR
91
92
93
94@*******************************************************************************
95@*/
96@void ihevc_memcpy(UWORD8 *pu1_dst,
97@                  UWORD8 *pu1_src,
98@                  UWORD8 num_bytes)
99@**************Variables Vs Registers*************************
100@   r0 => *pu1_dst
101@   r1 => *pu1_src
102@   r2 => num_bytes
103
104
105
106    .global ihevc_memcpy_a9q
107.type ihevc_memcpy_a9q, %function
108
109ihevc_memcpy_a9q:
110    SUBS        r2,#8
111    BLT         ARM_MEMCPY
112LOOP_NEON_MEMCPY:
113    @ Memcpy 8 bytes
114    VLD1.8      d0,[r1]!
115    VST1.8      d0,[r0]!
116
117    SUBS        r2,#8
118    BGE         LOOP_NEON_MEMCPY
119    CMP         r2,#-8
120    BXEQ        LR
121
122ARM_MEMCPY:
123    ADD         r2,#8
124
125LOOP_ARM_MEMCPY:
126    LDRB        r3,[r1],#1
127    STRB        r3,[r0],#1
128    SUBS        r2,#1
129    BNE         LOOP_ARM_MEMCPY
130    BX          LR
131
132
133
134
135@void ihevc_memset_mul_8(UWORD8 *pu1_dst,
136@                       UWORD8 value,
137@                       UWORD8 num_bytes)
138@**************Variables Vs Registers*************************
139@   r0 => *pu1_dst
140@   r1 => value
141@   r2 => num_bytes
142
143.text
144.p2align 2
145
146
147
148    .global ihevc_memset_mul_8_a9q
149.type ihevc_memset_mul_8_a9q, %function
150
151ihevc_memset_mul_8_a9q:
152
153@ Assumptions: numbytes is either 8, 16 or 32
154    VDUP.8      d0,r1
155LOOP_MEMSET_MUL_8:
156    @ Memset 8 bytes
157    VST1.8      d0,[r0]!
158
159    SUBS        r2,r2,#8
160    BNE         LOOP_MEMSET_MUL_8
161
162    BX          LR
163
164
165
166
167@void ihevc_memset(UWORD8 *pu1_dst,
168@                       UWORD8 value,
169@                       UWORD8 num_bytes)
170@**************Variables Vs Registers*************************
171@   r0 => *pu1_dst
172@   r1 => value
173@   r2 => num_bytes
174
175
176
177    .global ihevc_memset_a9q
178.type ihevc_memset_a9q, %function
179
180ihevc_memset_a9q:
181    SUBS        r2,#8
182    BLT         ARM_MEMSET
183    VDUP.8      d0,r1
184LOOP_NEON_MEMSET:
185    @ Memcpy 8 bytes
186    VST1.8      d0,[r0]!
187
188    SUBS        r2,#8
189    BGE         LOOP_NEON_MEMSET
190    CMP         r2,#-8
191    BXEQ        LR
192
193ARM_MEMSET:
194    ADD         r2,#8
195
196LOOP_ARM_MEMSET:
197    STRB        r1,[r0],#1
198    SUBS        r2,#1
199    BNE         LOOP_ARM_MEMSET
200    BX          LR
201
202
203
204
205@void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst,
206@                                   UWORD16 value,
207@                                   UWORD8 num_words)
208@**************Variables Vs Registers*************************
209@   r0 => *pu2_dst
210@   r1 => value
211@   r2 => num_words
212
213.text
214.p2align 2
215
216
217
218    .global ihevc_memset_16bit_mul_8_a9q
219.type ihevc_memset_16bit_mul_8_a9q, %function
220
221ihevc_memset_16bit_mul_8_a9q:
222
223@ Assumptions: num_words is either 8, 16 or 32
224
225    @ Memset 8 words
226    VDUP.16     d0,r1
227LOOP_MEMSET_16BIT_MUL_8:
228    VST1.16     d0,[r0]!
229    VST1.16     d0,[r0]!
230
231    SUBS        r2,r2,#8
232    BNE         LOOP_MEMSET_16BIT_MUL_8
233
234    BX          LR
235
236
237
238
239@void ihevc_memset_16bit(UWORD16 *pu2_dst,
240@                       UWORD16 value,
241@                       UWORD8 num_words)
242@**************Variables Vs Registers*************************
243@   r0 => *pu2_dst
244@   r1 => value
245@   r2 => num_words
246
247
248
249    .global ihevc_memset_16bit_a9q
250.type ihevc_memset_16bit_a9q, %function
251
252ihevc_memset_16bit_a9q:
253    SUBS        r2,#8
254    BLT         ARM_MEMSET_16BIT
255    VDUP.16     d0,r1
256LOOP_NEON_MEMSET_16BIT:
257    @ Memset 8 words
258    VST1.16     d0,[r0]!
259    VST1.16     d0,[r0]!
260
261    SUBS        r2,#8
262    BGE         LOOP_NEON_MEMSET_16BIT
263    CMP         r2,#-8
264    BXEQ        LR
265
266ARM_MEMSET_16BIT:
267    ADD         r2,#8
268
269LOOP_ARM_MEMSET_16BIT:
270    STRH        r1,[r0],#2
271    SUBS        r2,#1
272    BNE         LOOP_ARM_MEMSET_16BIT
273    BX          LR
274
275
276
277
278    .section .note.GNU-stack,"",%progbits
279
280