1/* 2Copyright (c) 2011 Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#ifndef L 32# define L(label) .L##label 33#endif 34 35#ifndef cfi_startproc 36# define cfi_startproc .cfi_startproc 37#endif 38 39#ifndef cfi_endproc 40# define cfi_endproc .cfi_endproc 41#endif 42 43#ifndef cfi_rel_offset 44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 45#endif 46 47#ifndef cfi_restore 48# define cfi_restore(reg) .cfi_restore reg 49#endif 50 51#ifndef cfi_adjust_cfa_offset 52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 53#endif 54 55#ifndef ENTRY 56# define ENTRY(name) \ 57 .type name, @function; \ 58 .globl name; \ 59 .p2align 4; \ 60name: \ 61 cfi_startproc 62#endif 63 64#ifndef END 65# define END(name) \ 66 cfi_endproc; \ 67 .size name, .-name 68#endif 69 70#define CFI_PUSH(REG) \ 71 cfi_adjust_cfa_offset (4); \ 72 cfi_rel_offset (REG, 0) 73 74#define CFI_POP(REG) \ 75 cfi_adjust_cfa_offset (-4); \ 76 cfi_restore (REG) 77 78#define PUSH(REG) pushl REG; CFI_PUSH (REG) 79#define POP(REG) popl REG; CFI_POP (REG) 80 81#define PARMS 4 82 83 84#define STR1 PARMS 85#define STR2 STR1+4 86 87 .text 88ENTRY (wcschr) 89 90 mov STR1(%esp), %ecx 91 movd STR2(%esp), %xmm1 92 93 mov %ecx, %eax 94 punpckldq %xmm1, %xmm1 95 pxor %xmm2, %xmm2 96 punpckldq %xmm1, %xmm1 97 98 and $63, %eax 99 cmp $48, %eax 100 ja L(cross_cache) 101 102 movdqu (%ecx), %xmm0 103 pcmpeqd %xmm0, %xmm2 104 pcmpeqd %xmm1, %xmm0 105 pmovmskb %xmm2, %edx 106 pmovmskb %xmm0, %eax 107 or %eax, %edx 108 jnz L(matches) 109 and $-16, %ecx 110 jmp L(loop) 111 112 .p2align 4 113L(cross_cache): 114 PUSH (%edi) 115 mov %ecx, %edi 116 mov %eax, %ecx 117 and $-16, %edi 118 and $15, %ecx 119 movdqa (%edi), %xmm0 120 pcmpeqd %xmm0, %xmm2 121 pcmpeqd %xmm1, %xmm0 122 pmovmskb %xmm2, %edx 123 pmovmskb %xmm0, %eax 124 125 sarl %cl, %edx 126 sarl %cl, %eax 127 test %eax, %eax 128 jz L(unaligned_no_match) 129 130 add %edi, %ecx 131 POP (%edi) 132 133 test %edx, %edx 134 jz L(match_case1) 135 test %al, %al 136 jz L(match_higth_case2) 137 test $15, %al 138 jnz L(match_case2_4) 139 test $15, %dl 140 jnz L(return_null) 141 lea 4(%ecx), %eax 142 ret 143 144 CFI_PUSH (%edi) 145 146 .p2align 4 147L(unaligned_no_match): 148 mov %edi, %ecx 149 POP (%edi) 150 151 test %edx, %edx 152 jnz L(return_null) 153 154 pxor %xmm2, %xmm2 155 156/* Loop start on aligned string. */ 157 .p2align 4 158L(loop): 159 add $16, %ecx 160 movdqa (%ecx), %xmm0 161 pcmpeqd %xmm0, %xmm2 162 pcmpeqd %xmm1, %xmm0 163 pmovmskb %xmm2, %edx 164 pmovmskb %xmm0, %eax 165 or %eax, %edx 166 jnz L(matches) 167 add $16, %ecx 168 169 movdqa (%ecx), %xmm0 170 pcmpeqd %xmm0, %xmm2 171 pcmpeqd %xmm1, %xmm0 172 pmovmskb %xmm2, %edx 173 pmovmskb %xmm0, %eax 174 or %eax, %edx 175 jnz L(matches) 176 add $16, %ecx 177 178 movdqa (%ecx), %xmm0 179 pcmpeqd %xmm0, %xmm2 180 pcmpeqd %xmm1, %xmm0 181 pmovmskb %xmm2, %edx 182 pmovmskb %xmm0, %eax 183 or %eax, %edx 184 jnz L(matches) 185 add $16, %ecx 186 187 movdqa (%ecx), %xmm0 188 pcmpeqd %xmm0, %xmm2 189 pcmpeqd %xmm1, %xmm0 190 pmovmskb %xmm2, %edx 191 pmovmskb %xmm0, %eax 192 or %eax, %edx 193 jz L(loop) 194 195 .p2align 4 196L(matches): 197 pmovmskb %xmm2, %edx 198 test %eax, %eax 199 jz L(return_null) 200 test %edx, %edx 201 jz L(match_case1) 202 203 .p2align 4 204L(match_case2): 205 test %al, %al 206 jz L(match_higth_case2) 207 test $15, %al 208 jnz L(match_case2_4) 209 test $15, %dl 210 jnz L(return_null) 211 lea 4(%ecx), %eax 212 ret 213 214 .p2align 4 215L(match_case2_4): 216 mov %ecx, %eax 217 ret 218 219 .p2align 4 220L(match_higth_case2): 221 test %dl, %dl 222 jnz L(return_null) 223 test $15, %ah 224 jnz L(match_case2_12) 225 test $15, %dh 226 jnz L(return_null) 227 lea 12(%ecx), %eax 228 ret 229 230 .p2align 4 231L(match_case2_12): 232 lea 8(%ecx), %eax 233 ret 234 235 .p2align 4 236L(match_case1): 237 test %al, %al 238 jz L(match_higth_case1) 239 240 test $0x01, %al 241 jnz L(exit0) 242 lea 4(%ecx), %eax 243 ret 244 245 .p2align 4 246L(match_higth_case1): 247 test $0x01, %ah 248 jnz L(exit3) 249 lea 12(%ecx), %eax 250 ret 251 252 .p2align 4 253L(exit0): 254 mov %ecx, %eax 255 ret 256 257 .p2align 4 258L(exit3): 259 lea 8(%ecx), %eax 260 ret 261 262 .p2align 4 263L(return_null): 264 xor %eax, %eax 265 ret 266 267END (wcschr) 268