1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *  * Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 *  * Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in
13 *    the documentation and/or other materials provided with the
14 *    distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
23 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <machine/cpu-features.h>
31#include <private/bionic_asm.h>
32#include <private/libc_events.h>
33
34        /*
35         * Optimized memset() for ARM.
36         *
37         * memset() returns its first argument.
38         */
39
40        .cpu        cortex-a15
41        .fpu        neon
42        .syntax     unified
43
44ENTRY(__memset_chk)
45        cmp         r2, r3
46        bls         .L_done
47
48        // Preserve lr for backtrace.
49        push        {lr}
50        .cfi_def_cfa_offset 4
51        .cfi_rel_offset lr, 0
52
53
54        ldr         r0, error_message
55        ldr         r1, error_code
561:
57        add         r0, pc
58        bl          __fortify_chk_fail
59error_code:
60        .word       BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
61error_message:
62        .word       error_string-(1b+8)
63END(__memset_chk)
64
65ENTRY(bzero)
66        mov         r2, r1
67        mov         r1, #0
68.L_done:
69        // Fall through to memset...
70END(bzero)
71
72ENTRY(memset)
73        pldw        [r0]
74        mov         r3, r0
75
76        // Duplicate the low byte of r1
77        mov         r1, r1, lsl #24
78        orr         r1, r1, r1, lsr #8
79        orr         r1, r1, r1, lsr #16
80
81        cmp         r2, #16
82        blo         .L_less_than_16
83
84        // This section handles regions 16 bytes or larger
85        //
86        // Use aligned vst1.8 and vstm when possible.  Register values will be:
87        //   ip is scratch
88        //   q0, q1, and r1 contain the memset value
89        //   r2 is the number of bytes to set
90        //   r3 is the advancing destination pointer
91        vdup.32     q0, r1
92
93        ands        ip, r3, 0xF
94        beq         .L_memset_aligned
95
96        // Align dest pointer to 16-byte boundary.
97        pldw        [r0, #64]
98        rsb         ip, ip, #16
99
100        // Pre-adjust the byte count to reflect post-aligment value.  Expecting
101        // 8-byte alignment to be rather common so we special case that one.
102        sub         r2, r2, ip
103
104        /* set 1 byte */
105        tst         ip, #1
106        it          ne
107        strbne      r1, [r3], #1
108        /* set 2 bytes */
109        tst         ip, #2
110        it          ne
111        strhne      r1, [r3], #2
112        /* set 4 bytes */
113        movs        ip, ip, lsl #29
114        it          mi
115        strmi       r1, [r3], #4
116        /* set 8 bytes */
117        itt         cs
118        strcs       r1, [r3], #4
119        strcs       r1, [r3], #4
120
121.L_memset_aligned:
122        // Destination is now 16-byte aligned.  Determine how to handle
123        // remaining bytes.
124        vmov        q1, q0
125        cmp         r2, #128
126        blo         .L_less_than_128
127
128        // We need to set a larger block of memory.  Use four Q regs to
129        // set a full cache line in one instruction.  Pre-decrement
130        // r2 to simplify end-of-loop detection
131        vmov        q2, q0
132        vmov        q3, q0
133        pldw        [r0, #128]
134        sub         r2, r2, #128
135        .align 4
136.L_memset_loop_128:
137        pldw        [r3, #192]
138        vstm        r3!, {q0, q1, q2, q3}
139        vstm        r3!, {q0, q1, q2, q3}
140        subs        r2, r2, #128
141        bhs         .L_memset_loop_128
142
143        // Un-bias r2 so it contains the number of bytes left.  Early
144        // exit if we are done.
145        adds        r2, r2, #128
146        beq         2f
147
148        .align 4
149.L_less_than_128:
150        // set 64 bytes
151        movs        ip, r2, lsl #26
152        bcc         1f
153        vst1.8      {q0, q1}, [r3, :128]!
154        vst1.8      {q0, q1}, [r3, :128]!
155        beq         2f
1561:
157        // set 32 bytes
158        bpl         1f
159        vst1.8      {q0, q1}, [r3, :128]!
1601:
161        // set 16 bytes
162        movs        ip, r2, lsl #28
163        bcc         1f
164        vst1.8      {q0}, [r3, :128]!
165        beq         2f
1661:
167        // set 8 bytes
168        bpl         1f
169        vst1.8      {d0}, [r3, :64]!
1701:
171        // set 4 bytes
172        tst         r2, #4
173        it          ne
174        strne       r1, [r3], #4
1751:
176        // set 2 bytes
177        movs        ip, r2, lsl #31
178        it          cs
179        strhcs      r1, [r3], #2
180        // set 1 byte
181        it          mi
182        strbmi      r1, [r3]
1832:
184        bx          lr
185
186.L_less_than_16:
187        // Store up to 15 bytes without worrying about byte alignment
188        movs        ip, r2, lsl #29
189        bcc         1f
190        str         r1, [r3], #4
191        str         r1, [r3], #4
192        beq         2f
1931:
194        it          mi
195        strmi       r1, [r3], #4
196        movs        ip, r2, lsl #31
197        it          mi
198        strbmi      r1, [r3], #1
199        itt         cs
200        strbcs      r1, [r3], #1
201        strbcs      r1, [r3]
2022:
203        bx          lr
204END(memset)
205
206        .data
207error_string:
208        .string     "memset: prevented write past end of buffer"
209