1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * * Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 23 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 24 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <machine/cpu-features.h> 31#include <private/bionic_asm.h> 32#include <private/libc_events.h> 33 34 /* 35 * Optimized memset() for ARM. 36 * 37 * memset() returns its first argument. 38 */ 39 40 .cpu cortex-a15 41 .fpu neon 42 .syntax unified 43 44ENTRY(__memset_chk) 45 cmp r2, r3 46 bls .L_done 47 48 // Preserve lr for backtrace. 49 push {lr} 50 .cfi_def_cfa_offset 4 51 .cfi_rel_offset lr, 0 52 53 54 ldr r0, error_message 55 ldr r1, error_code 561: 57 add r0, pc 58 bl __fortify_chk_fail 59error_code: 60 .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW 61error_message: 62 .word error_string-(1b+8) 63END(__memset_chk) 64 65ENTRY(bzero) 66 mov r2, r1 67 mov r1, #0 68.L_done: 69 // Fall through to memset... 70END(bzero) 71 72ENTRY(memset) 73 pldw [r0] 74 mov r3, r0 75 76 // Duplicate the low byte of r1 77 mov r1, r1, lsl #24 78 orr r1, r1, r1, lsr #8 79 orr r1, r1, r1, lsr #16 80 81 cmp r2, #16 82 blo .L_less_than_16 83 84 // This section handles regions 16 bytes or larger 85 // 86 // Use aligned vst1.8 and vstm when possible. Register values will be: 87 // ip is scratch 88 // q0, q1, and r1 contain the memset value 89 // r2 is the number of bytes to set 90 // r3 is the advancing destination pointer 91 vdup.32 q0, r1 92 93 ands ip, r3, 0xF 94 beq .L_memset_aligned 95 96 // Align dest pointer to 16-byte boundary. 97 pldw [r0, #64] 98 rsb ip, ip, #16 99 100 // Pre-adjust the byte count to reflect post-aligment value. Expecting 101 // 8-byte alignment to be rather common so we special case that one. 102 sub r2, r2, ip 103 104 /* set 1 byte */ 105 tst ip, #1 106 it ne 107 strbne r1, [r3], #1 108 /* set 2 bytes */ 109 tst ip, #2 110 it ne 111 strhne r1, [r3], #2 112 /* set 4 bytes */ 113 movs ip, ip, lsl #29 114 it mi 115 strmi r1, [r3], #4 116 /* set 8 bytes */ 117 itt cs 118 strcs r1, [r3], #4 119 strcs r1, [r3], #4 120 121.L_memset_aligned: 122 // Destination is now 16-byte aligned. Determine how to handle 123 // remaining bytes. 124 vmov q1, q0 125 cmp r2, #128 126 blo .L_less_than_128 127 128 // We need to set a larger block of memory. Use four Q regs to 129 // set a full cache line in one instruction. Pre-decrement 130 // r2 to simplify end-of-loop detection 131 vmov q2, q0 132 vmov q3, q0 133 pldw [r0, #128] 134 sub r2, r2, #128 135 .align 4 136.L_memset_loop_128: 137 pldw [r3, #192] 138 vstm r3!, {q0, q1, q2, q3} 139 vstm r3!, {q0, q1, q2, q3} 140 subs r2, r2, #128 141 bhs .L_memset_loop_128 142 143 // Un-bias r2 so it contains the number of bytes left. Early 144 // exit if we are done. 145 adds r2, r2, #128 146 beq 2f 147 148 .align 4 149.L_less_than_128: 150 // set 64 bytes 151 movs ip, r2, lsl #26 152 bcc 1f 153 vst1.8 {q0, q1}, [r3, :128]! 154 vst1.8 {q0, q1}, [r3, :128]! 155 beq 2f 1561: 157 // set 32 bytes 158 bpl 1f 159 vst1.8 {q0, q1}, [r3, :128]! 1601: 161 // set 16 bytes 162 movs ip, r2, lsl #28 163 bcc 1f 164 vst1.8 {q0}, [r3, :128]! 165 beq 2f 1661: 167 // set 8 bytes 168 bpl 1f 169 vst1.8 {d0}, [r3, :64]! 1701: 171 // set 4 bytes 172 tst r2, #4 173 it ne 174 strne r1, [r3], #4 1751: 176 // set 2 bytes 177 movs ip, r2, lsl #31 178 it cs 179 strhcs r1, [r3], #2 180 // set 1 byte 181 it mi 182 strbmi r1, [r3] 1832: 184 bx lr 185 186.L_less_than_16: 187 // Store up to 15 bytes without worrying about byte alignment 188 movs ip, r2, lsl #29 189 bcc 1f 190 str r1, [r3], #4 191 str r1, [r3], #4 192 beq 2f 1931: 194 it mi 195 strmi r1, [r3], #4 196 movs ip, r2, lsl #31 197 it mi 198 strbmi r1, [r3], #1 199 itt cs 200 strbcs r1, [r3], #1 201 strbcs r1, [r3] 2022: 203 bx lr 204END(memset) 205 206 .data 207error_string: 208 .string "memset: prevented write past end of buffer" 209