1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* Assumptions: 18 * 19 * ARMv8-a, AArch64 20 */ 21 22#ifndef ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_ 23#define ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_ 24 25#include "asm_support_arm64.S" 26 27/* Parameters and result. */ 28#define src1 x0 29#define src2 x1 30#define limit x2 31#define result x0 32 33/* Internal variables. */ 34#define data1 x3 35#define data1w w3 36#define data2 x4 37#define data2w w4 38#define has_nul x5 39#define diff x6 40#define endloop x7 41#define tmp1 x8 42#define tmp2 x9 43#define tmp3 x10 44#define limit_wd x12 45#define mask x13 46 47// WARNING: If you change this code to use x14 and x15, you must also change 48// art_quick_string_compareto, which relies on these temps being unused. 49 50ENTRY __memcmp16 51 cbz limit, .Lret0 52 lsl limit, limit, #1 /* Half-words to bytes. */ 53 eor tmp1, src1, src2 54 tst tmp1, #7 55 b.ne .Lmisaligned8 56 ands tmp1, src1, #7 57 b.ne .Lmutual_align 58 add limit_wd, limit, #7 59 lsr limit_wd, limit_wd, #3 60 /* Start of performance-critical section -- one 64B cache line. */ 61.Lloop_aligned: 62 ldr data1, [src1], #8 63 ldr data2, [src2], #8 64.Lstart_realigned: 65 subs limit_wd, limit_wd, #1 66 eor diff, data1, data2 /* Non-zero if differences found. */ 67 csinv endloop, diff, xzr, ne /* Last Dword or differences. */ 68 cbz endloop, .Lloop_aligned 69 /* End of performance-critical section -- one 64B cache line. */ 70 71 /* Not reached the limit, must have found a diff. */ 72 cbnz limit_wd, .Lnot_limit 73 74 /* Limit % 8 == 0 => all bytes significant. */ 75 ands limit, limit, #7 76 b.eq .Lnot_limit 77 78 lsl limit, limit, #3 /* Bits -> bytes. */ 79 mov mask, #~0 80 lsl mask, mask, limit 81 bic data1, data1, mask 82 bic data2, data2, mask 83 84.Lnot_limit: 85 86 // Swap the byte order of diff. Exact reverse is not important, as we only need to detect 87 // the half-word. 88 rev diff, diff 89 // The most significant bit of DIFF marks the least significant bit of change between DATA1/2 90 clz diff, diff 91 // Mask off 0xF to have shift amount. Why does ARM64 not have BIC with immediate?!?! 92 bfi diff, xzr, #0, #4 93 // Create a 16b mask 94 mov mask, #0xFFFF 95 // Shift to the right half-word. 96 lsr data1, data1, diff 97 lsr data2, data2, diff 98 // Mask the lowest half-word. 99 and data1, data1, mask 100 and data2, data2, mask 101 // Compute difference. 102 sub result, data1, data2 103 ret 104 105.Lmutual_align: 106 /* Sources are mutually aligned, but are not currently at an 107 alignment boundary. Round down the addresses and then mask off 108 the bytes that precede the start point. */ 109 bic src1, src1, #7 110 bic src2, src2, #7 111 add limit, limit, tmp1 /* Adjust the limit for the extra. */ 112 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 113 ldr data1, [src1], #8 114 neg tmp1, tmp1 /* Bits to alignment -64. */ 115 ldr data2, [src2], #8 116 mov tmp2, #~0 117 /* Little-endian. Early bytes are at LSB. */ 118 lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 119 add limit_wd, limit, #7 120 orr data1, data1, tmp2 121 orr data2, data2, tmp2 122 lsr limit_wd, limit_wd, #3 123 b .Lstart_realigned 124 125.Lret0: 126 mov result, #0 127 ret 128 129 .p2align 6 130.Lmisaligned8: 131 sub limit, limit, #1 1321: 133 /* Perhaps we can do better than this. */ 134 ldrh data1w, [src1], #2 135 ldrh data2w, [src2], #2 136 subs limit, limit, #2 137 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 138 b.eq 1b 139 sub result, data1, data2 140 ret 141END __memcmp16 142 143#endif // ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_ 144