1/* 2 * strchr - find a character in a string 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 */ 8 9/* Assumptions: 10 * 11 * ARMv8-a, AArch64 12 * Neon Available. 13 */ 14 15#include "../asmdefs.h" 16 17/* Arguments and results. */ 18#define srcin x0 19#define chrin w1 20 21#define result x0 22 23#define src x2 24#define tmp1 x3 25#define wtmp2 w4 26#define tmp3 x5 27 28#define vrepchr v0 29#define vdata1 v1 30#define vdata2 v2 31#define vhas_nul1 v3 32#define vhas_nul2 v4 33#define vhas_chr1 v5 34#define vhas_chr2 v6 35#define vrepmask_0 v7 36#define vrepmask_c v16 37#define vend1 v17 38#define vend2 v18 39 40/* Core algorithm. 41 42 For each 32-byte hunk we calculate a 64-bit syndrome value, with 43 two bits per byte (LSB is always in bits 0 and 1, for both big 44 and little-endian systems). For each tuple, bit 0 is set iff 45 the relevant byte matched the requested character; bit 1 is set 46 iff the relevant byte matched the NUL end of string (we trigger 47 off bit0 for the special case of looking for NUL). Since the bits 48 in the syndrome reflect exactly the order in which things occur 49 in the original string a count_trailing_zeros() operation will 50 identify exactly which byte is causing the termination, and why. */ 51 52/* Locals and temporaries. */ 53 54ENTRY (__strchr_aarch64) 55 /* Magic constant 0x40100401 to allow us to identify which lane 56 matches the requested byte. Magic constant 0x80200802 used 57 similarly for NUL termination. */ 58 mov wtmp2, #0x0401 59 movk wtmp2, #0x4010, lsl #16 60 dup vrepchr.16b, chrin 61 bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 62 dup vrepmask_c.4s, wtmp2 63 ands tmp1, srcin, #31 64 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ 65 b.eq L(loop) 66 67 /* Input string is not 32-byte aligned. Rather than forcing 68 the padding bytes to a safe value, we calculate the syndrome 69 for all the bytes, but then mask off those bits of the 70 syndrome that are related to the padding. */ 71 ld1 {vdata1.16b, vdata2.16b}, [src], #32 72 neg tmp1, tmp1 73 cmeq vhas_nul1.16b, vdata1.16b, #0 74 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 75 cmeq vhas_nul2.16b, vdata2.16b, #0 76 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 77 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 78 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 79 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 80 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 81 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b 82 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b 83 lsl tmp1, tmp1, #1 84 addp vend1.16b, vend1.16b, vend2.16b // 256->128 85 mov tmp3, #~0 86 addp vend1.16b, vend1.16b, vend2.16b // 128->64 87 lsr tmp1, tmp3, tmp1 88 89 mov tmp3, vend1.d[0] 90 bic tmp1, tmp3, tmp1 // Mask padding bits. 91 cbnz tmp1, L(tail) 92 93L(loop): 94 ld1 {vdata1.16b, vdata2.16b}, [src], #32 95 cmeq vhas_nul1.16b, vdata1.16b, #0 96 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 97 cmeq vhas_nul2.16b, vdata2.16b, #0 98 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 99 /* Use a fast check for the termination condition. */ 100 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b 101 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b 102 orr vend1.16b, vend1.16b, vend2.16b 103 addp vend1.2d, vend1.2d, vend1.2d 104 mov tmp1, vend1.d[0] 105 cbz tmp1, L(loop) 106 107 /* Termination condition found. Now need to establish exactly why 108 we terminated. */ 109 and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 110 and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 111 and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 112 and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 113 orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b 114 orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b 115 addp vend1.16b, vend1.16b, vend2.16b // 256->128 116 addp vend1.16b, vend1.16b, vend2.16b // 128->64 117 118 mov tmp1, vend1.d[0] 119L(tail): 120 /* Count the trailing zeros, by bit reversing... */ 121 rbit tmp1, tmp1 122 /* Re-bias source. */ 123 sub src, src, #32 124 clz tmp1, tmp1 /* And counting the leading zeros. */ 125 /* Tmp1 is even if the target charager was found first. Otherwise 126 we've found the end of string and we weren't looking for NUL. */ 127 tst tmp1, #1 128 add result, src, tmp1, lsr #1 129 csel result, result, xzr, eq 130 ret 131 132END (__strchr_aarch64) 133