1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_arm64.h"
18
19 #include "arch/arm64/callee_save_frame_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "art_method.h"
22 #include "base/bit_utils.h"
23 #include "code_generator_arm64.h"
24 #include "common_arm64.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsic_objects.h"
29 #include "intrinsics.h"
30 #include "intrinsics_utils.h"
31 #include "lock_word.h"
32 #include "mirror/array-inl.h"
33 #include "mirror/object_array-inl.h"
34 #include "mirror/reference.h"
35 #include "mirror/string-inl.h"
36 #include "mirror/var_handle.h"
37 #include "scoped_thread_state_change-inl.h"
38 #include "thread-current-inl.h"
39 #include "utils/arm64/assembler_arm64.h"
40 #include "well_known_classes.h"
41
42 using namespace vixl::aarch64; // NOLINT(build/namespaces)
43
44 // TODO(VIXL): Make VIXL compile with -Wshadow.
45 #pragma GCC diagnostic push
46 #pragma GCC diagnostic ignored "-Wshadow"
47 #include "aarch64/disasm-aarch64.h"
48 #include "aarch64/macro-assembler-aarch64.h"
49 #pragma GCC diagnostic pop
50
51 namespace art HIDDEN {
52
53 namespace arm64 {
54
55 using helpers::CPURegisterFrom;
56 using helpers::DRegisterFrom;
57 using helpers::HeapOperand;
58 using helpers::LocationFrom;
59 using helpers::InputCPURegisterAt;
60 using helpers::InputCPURegisterOrZeroRegAt;
61 using helpers::OperandFrom;
62 using helpers::RegisterFrom;
63 using helpers::SRegisterFrom;
64 using helpers::WRegisterFrom;
65 using helpers::XRegisterFrom;
66 using helpers::HRegisterFrom;
67 using helpers::InputRegisterAt;
68 using helpers::OutputRegister;
69
70 namespace {
71
AbsoluteHeapOperandFrom(Location location,size_t offset=0)72 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
73 return MemOperand(XRegisterFrom(location), offset);
74 }
75
76 } // namespace
77
GetVIXLAssembler()78 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
79 return codegen_->GetVIXLAssembler();
80 }
81
GetAllocator()82 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
83 return codegen_->GetGraph()->GetAllocator();
84 }
85
86 using IntrinsicSlowPathARM64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM64,
87 SlowPathCodeARM64,
88 Arm64Assembler>;
89
90 #define __ codegen->GetVIXLAssembler()->
91
92 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
93 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
94 public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction * instruction,Location tmp)95 ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
96 : SlowPathCodeARM64(instruction), tmp_(tmp) {
97 }
98
EmitNativeCode(CodeGenerator * codegen_in)99 void EmitNativeCode(CodeGenerator* codegen_in) override {
100 DCHECK(codegen_in->EmitBakerReadBarrier());
101 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
102 LocationSummary* locations = instruction_->GetLocations();
103 DCHECK(locations->CanCall());
104 DCHECK(instruction_->IsInvokeStaticOrDirect())
105 << "Unexpected instruction in read barrier arraycopy slow path: "
106 << instruction_->DebugName();
107 DCHECK(instruction_->GetLocations()->Intrinsified());
108 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
109
110 const int32_t element_size = DataType::Size(DataType::Type::kReference);
111
112 Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
113 Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
114 Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
115 Register tmp_reg = WRegisterFrom(tmp_);
116
117 __ Bind(GetEntryLabel());
118 // The source range and destination pointer were initialized before entering the slow-path.
119 vixl::aarch64::Label slow_copy_loop;
120 __ Bind(&slow_copy_loop);
121 __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
122 codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
123 // TODO: Inline the mark bit check before calling the runtime?
124 // tmp_reg = ReadBarrier::Mark(tmp_reg);
125 // No need to save live registers; it's taken care of by the
126 // entrypoint. Also, there is no need to update the stack mask,
127 // as this runtime call will not trigger a garbage collection.
128 // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
129 // explanations.)
130 DCHECK_NE(tmp_.reg(), LR);
131 DCHECK_NE(tmp_.reg(), WSP);
132 DCHECK_NE(tmp_.reg(), WZR);
133 // IP0 is used internally by the ReadBarrierMarkRegX entry point
134 // as a temporary (and not preserved). It thus cannot be used by
135 // any live register in this slow path.
136 DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
137 DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
138 DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
139 DCHECK_NE(tmp_.reg(), IP0);
140 DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
141 // TODO: Load the entrypoint once before the loop, instead of
142 // loading it at every iteration.
143 int32_t entry_point_offset =
144 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
145 // This runtime call does not require a stack map.
146 codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
147 codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
148 __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
149 __ Cmp(src_curr_addr, src_stop_addr);
150 __ B(&slow_copy_loop, ne);
151 __ B(GetExitLabel());
152 }
153
GetDescription() const154 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
155
156 private:
157 Location tmp_;
158
159 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
160 };
161 #undef __
162
TryDispatch(HInvoke * invoke)163 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
164 Dispatch(invoke);
165 LocationSummary* res = invoke->GetLocations();
166 if (res == nullptr) {
167 return false;
168 }
169 return res->Intrinsified();
170 }
171
172 #define __ masm->
173
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)174 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
175 LocationSummary* locations =
176 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
177 locations->SetInAt(0, Location::RequiresFpuRegister());
178 locations->SetOut(Location::RequiresRegister());
179 }
180
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)181 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
182 LocationSummary* locations =
183 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
184 locations->SetInAt(0, Location::RequiresRegister());
185 locations->SetOut(Location::RequiresFpuRegister());
186 }
187
MoveFPToInt(LocationSummary * locations,bool is64bit,MacroAssembler * masm)188 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
189 Location input = locations->InAt(0);
190 Location output = locations->Out();
191 __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
192 is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
193 }
194
MoveIntToFP(LocationSummary * locations,bool is64bit,MacroAssembler * masm)195 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
196 Location input = locations->InAt(0);
197 Location output = locations->Out();
198 __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
199 is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
200 }
201
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)202 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
203 CreateFPToIntLocations(allocator_, invoke);
204 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)205 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
206 CreateIntToFPLocations(allocator_, invoke);
207 }
208
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)209 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
210 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
211 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)212 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
213 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
214 }
215
VisitFloatFloatToRawIntBits(HInvoke * invoke)216 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
217 CreateFPToIntLocations(allocator_, invoke);
218 }
VisitFloatIntBitsToFloat(HInvoke * invoke)219 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
220 CreateIntToFPLocations(allocator_, invoke);
221 }
222
VisitFloatFloatToRawIntBits(HInvoke * invoke)223 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
224 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
225 }
VisitFloatIntBitsToFloat(HInvoke * invoke)226 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
227 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
228 }
229
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)230 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
231 LocationSummary* locations =
232 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
233 locations->SetInAt(0, Location::RequiresRegister());
234 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
235 }
236
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)237 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
238 LocationSummary* locations =
239 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
240 locations->SetInAt(0, Location::RequiresRegister());
241 locations->SetInAt(1, Location::RequiresRegister());
242 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
243 }
244
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)245 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
246 LocationSummary* locations =
247 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
248 locations->SetInAt(0, Location::RequiresRegister());
249 locations->SetInAt(1, Location::RequiresRegister());
250 // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
251 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
252 }
253
GenerateReverseBytes(MacroAssembler * masm,DataType::Type type,CPURegister in,CPURegister out)254 static void GenerateReverseBytes(MacroAssembler* masm,
255 DataType::Type type,
256 CPURegister in,
257 CPURegister out) {
258 switch (type) {
259 case DataType::Type::kUint16:
260 __ Rev16(out.W(), in.W());
261 break;
262 case DataType::Type::kInt16:
263 __ Rev16(out.W(), in.W());
264 __ Sxth(out.W(), out.W());
265 break;
266 case DataType::Type::kInt32:
267 __ Rev(out.W(), in.W());
268 break;
269 case DataType::Type::kInt64:
270 __ Rev(out.X(), in.X());
271 break;
272 case DataType::Type::kFloat32:
273 __ Rev(in.W(), in.W()); // Note: Clobbers `in`.
274 __ Fmov(out.S(), in.W());
275 break;
276 case DataType::Type::kFloat64:
277 __ Rev(in.X(), in.X()); // Note: Clobbers `in`.
278 __ Fmov(out.D(), in.X());
279 break;
280 default:
281 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
282 UNREACHABLE();
283 }
284 }
285
GenReverseBytes(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)286 static void GenReverseBytes(LocationSummary* locations,
287 DataType::Type type,
288 MacroAssembler* masm) {
289 Location in = locations->InAt(0);
290 Location out = locations->Out();
291 GenerateReverseBytes(masm, type, CPURegisterFrom(in, type), CPURegisterFrom(out, type));
292 }
293
VisitIntegerReverseBytes(HInvoke * invoke)294 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
295 CreateIntToIntLocations(allocator_, invoke);
296 }
297
VisitIntegerReverseBytes(HInvoke * invoke)298 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
299 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
300 }
301
VisitLongReverseBytes(HInvoke * invoke)302 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
303 CreateIntToIntLocations(allocator_, invoke);
304 }
305
VisitLongReverseBytes(HInvoke * invoke)306 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
307 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
308 }
309
VisitShortReverseBytes(HInvoke * invoke)310 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
311 CreateIntToIntLocations(allocator_, invoke);
312 }
313
VisitShortReverseBytes(HInvoke * invoke)314 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
315 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
316 }
317
GenNumberOfLeadingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)318 static void GenNumberOfLeadingZeros(LocationSummary* locations,
319 DataType::Type type,
320 MacroAssembler* masm) {
321 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
322
323 Location in = locations->InAt(0);
324 Location out = locations->Out();
325
326 __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
327 }
328
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)329 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
330 CreateIntToIntLocations(allocator_, invoke);
331 }
332
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)333 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
334 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
335 }
336
VisitLongNumberOfLeadingZeros(HInvoke * invoke)337 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
338 CreateIntToIntLocations(allocator_, invoke);
339 }
340
VisitLongNumberOfLeadingZeros(HInvoke * invoke)341 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
342 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
343 }
344
GenNumberOfTrailingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)345 static void GenNumberOfTrailingZeros(LocationSummary* locations,
346 DataType::Type type,
347 MacroAssembler* masm) {
348 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
349
350 Location in = locations->InAt(0);
351 Location out = locations->Out();
352
353 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
354 __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
355 }
356
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)357 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
358 CreateIntToIntLocations(allocator_, invoke);
359 }
360
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)361 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
362 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
363 }
364
VisitLongNumberOfTrailingZeros(HInvoke * invoke)365 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
366 CreateIntToIntLocations(allocator_, invoke);
367 }
368
VisitLongNumberOfTrailingZeros(HInvoke * invoke)369 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
370 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
371 }
372
GenReverse(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)373 static void GenReverse(LocationSummary* locations,
374 DataType::Type type,
375 MacroAssembler* masm) {
376 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
377
378 Location in = locations->InAt(0);
379 Location out = locations->Out();
380
381 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
382 }
383
VisitIntegerReverse(HInvoke * invoke)384 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
385 CreateIntToIntLocations(allocator_, invoke);
386 }
387
VisitIntegerReverse(HInvoke * invoke)388 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
389 GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
390 }
391
VisitLongReverse(HInvoke * invoke)392 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
393 CreateIntToIntLocations(allocator_, invoke);
394 }
395
VisitLongReverse(HInvoke * invoke)396 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
397 GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
398 }
399
GenBitCount(HInvoke * instr,DataType::Type type,MacroAssembler * masm)400 static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) {
401 DCHECK(DataType::IsIntOrLongType(type)) << type;
402 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
403 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
404
405 UseScratchRegisterScope temps(masm);
406
407 Register src = InputRegisterAt(instr, 0);
408 Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
409 VRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS();
410
411 __ Fmov(fpr, src);
412 __ Cnt(fpr.V8B(), fpr.V8B());
413 __ Addv(fpr.B(), fpr.V8B());
414 __ Fmov(dst, fpr);
415 }
416
VisitLongBitCount(HInvoke * invoke)417 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
418 CreateIntToIntLocations(allocator_, invoke);
419 }
420
VisitLongBitCount(HInvoke * invoke)421 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
422 GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler());
423 }
424
VisitIntegerBitCount(HInvoke * invoke)425 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
426 CreateIntToIntLocations(allocator_, invoke);
427 }
428
VisitIntegerBitCount(HInvoke * invoke)429 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
430 GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler());
431 }
432
GenHighestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)433 static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
434 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
435
436 UseScratchRegisterScope temps(masm);
437
438 Register src = InputRegisterAt(invoke, 0);
439 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
440 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
441 size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u;
442 size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u;
443
444 __ Clz(temp, src);
445 __ Mov(dst, UINT64_C(1) << high_bit); // MOV (bitmask immediate)
446 __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit)); // Clear dst if src was 0.
447 __ Lsr(dst, dst, temp);
448 }
449
VisitIntegerHighestOneBit(HInvoke * invoke)450 void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
451 CreateIntToIntLocations(allocator_, invoke);
452 }
453
VisitIntegerHighestOneBit(HInvoke * invoke)454 void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
455 GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
456 }
457
VisitLongHighestOneBit(HInvoke * invoke)458 void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
459 CreateIntToIntLocations(allocator_, invoke);
460 }
461
VisitLongHighestOneBit(HInvoke * invoke)462 void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
463 GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
464 }
465
GenLowestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)466 static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
467 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
468
469 UseScratchRegisterScope temps(masm);
470
471 Register src = InputRegisterAt(invoke, 0);
472 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
473 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
474
475 __ Neg(temp, src);
476 __ And(dst, temp, src);
477 }
478
VisitIntegerLowestOneBit(HInvoke * invoke)479 void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
480 CreateIntToIntLocations(allocator_, invoke);
481 }
482
VisitIntegerLowestOneBit(HInvoke * invoke)483 void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
484 GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
485 }
486
VisitLongLowestOneBit(HInvoke * invoke)487 void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
488 CreateIntToIntLocations(allocator_, invoke);
489 }
490
VisitLongLowestOneBit(HInvoke * invoke)491 void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
492 GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
493 }
494
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)495 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
496 LocationSummary* locations =
497 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
498 locations->SetInAt(0, Location::RequiresFpuRegister());
499 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
500 }
501
VisitMathSqrt(HInvoke * invoke)502 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
503 CreateFPToFPLocations(allocator_, invoke);
504 }
505
VisitMathSqrt(HInvoke * invoke)506 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
507 LocationSummary* locations = invoke->GetLocations();
508 MacroAssembler* masm = GetVIXLAssembler();
509 __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
510 }
511
VisitMathCeil(HInvoke * invoke)512 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
513 CreateFPToFPLocations(allocator_, invoke);
514 }
515
VisitMathCeil(HInvoke * invoke)516 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
517 LocationSummary* locations = invoke->GetLocations();
518 MacroAssembler* masm = GetVIXLAssembler();
519 __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
520 }
521
VisitMathFloor(HInvoke * invoke)522 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
523 CreateFPToFPLocations(allocator_, invoke);
524 }
525
VisitMathFloor(HInvoke * invoke)526 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
527 LocationSummary* locations = invoke->GetLocations();
528 MacroAssembler* masm = GetVIXLAssembler();
529 __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
530 }
531
VisitMathRint(HInvoke * invoke)532 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
533 CreateFPToFPLocations(allocator_, invoke);
534 }
535
VisitMathRint(HInvoke * invoke)536 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
537 LocationSummary* locations = invoke->GetLocations();
538 MacroAssembler* masm = GetVIXLAssembler();
539 __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
540 }
541
CreateFPToIntPlusFPTempLocations(ArenaAllocator * allocator,HInvoke * invoke)542 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) {
543 LocationSummary* locations =
544 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
545 locations->SetInAt(0, Location::RequiresFpuRegister());
546 locations->SetOut(Location::RequiresRegister());
547 locations->AddTemp(Location::RequiresFpuRegister());
548 }
549
GenMathRound(HInvoke * invoke,bool is_double,vixl::aarch64::MacroAssembler * masm)550 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
551 // Java 8 API definition for Math.round():
552 // Return the closest long or int to the argument, with ties rounding to positive infinity.
553 //
554 // There is no single instruction in ARMv8 that can support the above definition.
555 // We choose to use FCVTAS here, because it has closest semantic.
556 // FCVTAS performs rounding to nearest integer, ties away from zero.
557 // For most inputs (positive values, zero or NaN), this instruction is enough.
558 // We only need a few handling code after FCVTAS if the input is negative half value.
559 //
560 // The reason why we didn't choose FCVTPS instruction here is that
561 // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
562 // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
563 // If we were using this instruction, for most inputs, more handling code would be needed.
564 LocationSummary* l = invoke->GetLocations();
565 VRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
566 VRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
567 Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
568 vixl::aarch64::Label done;
569
570 // Round to nearest integer, ties away from zero.
571 __ Fcvtas(out_reg, in_reg);
572
573 // For positive values, zero or NaN inputs, rounding is done.
574 __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
575
576 // Handle input < 0 cases.
577 // If input is negative but not a tie, previous result (round to nearest) is valid.
578 // If input is a negative tie, out_reg += 1.
579 __ Frinta(tmp_fp, in_reg);
580 __ Fsub(tmp_fp, in_reg, tmp_fp);
581 __ Fcmp(tmp_fp, 0.5);
582 __ Cinc(out_reg, out_reg, eq);
583
584 __ Bind(&done);
585 }
586
VisitMathRoundDouble(HInvoke * invoke)587 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
588 CreateFPToIntPlusFPTempLocations(allocator_, invoke);
589 }
590
VisitMathRoundDouble(HInvoke * invoke)591 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
592 GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler());
593 }
594
VisitMathRoundFloat(HInvoke * invoke)595 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
596 CreateFPToIntPlusFPTempLocations(allocator_, invoke);
597 }
598
VisitMathRoundFloat(HInvoke * invoke)599 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
600 GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler());
601 }
602
VisitMemoryPeekByte(HInvoke * invoke)603 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
604 CreateIntToIntLocations(allocator_, invoke);
605 }
606
VisitMemoryPeekByte(HInvoke * invoke)607 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
608 MacroAssembler* masm = GetVIXLAssembler();
609 __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
610 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
611 }
612
VisitMemoryPeekIntNative(HInvoke * invoke)613 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
614 CreateIntToIntLocations(allocator_, invoke);
615 }
616
VisitMemoryPeekIntNative(HInvoke * invoke)617 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
618 MacroAssembler* masm = GetVIXLAssembler();
619 __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
620 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
621 }
622
VisitMemoryPeekLongNative(HInvoke * invoke)623 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
624 CreateIntToIntLocations(allocator_, invoke);
625 }
626
VisitMemoryPeekLongNative(HInvoke * invoke)627 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
628 MacroAssembler* masm = GetVIXLAssembler();
629 __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
630 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
631 }
632
VisitMemoryPeekShortNative(HInvoke * invoke)633 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
634 CreateIntToIntLocations(allocator_, invoke);
635 }
636
VisitMemoryPeekShortNative(HInvoke * invoke)637 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
638 MacroAssembler* masm = GetVIXLAssembler();
639 __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
640 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
641 }
642
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)643 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
644 LocationSummary* locations =
645 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
646 locations->SetInAt(0, Location::RequiresRegister());
647 locations->SetInAt(1, Location::RequiresRegister());
648 }
649
VisitMemoryPokeByte(HInvoke * invoke)650 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
651 CreateIntIntToVoidLocations(allocator_, invoke);
652 }
653
VisitMemoryPokeByte(HInvoke * invoke)654 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
655 MacroAssembler* masm = GetVIXLAssembler();
656 __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
657 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
658 }
659
VisitMemoryPokeIntNative(HInvoke * invoke)660 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
661 CreateIntIntToVoidLocations(allocator_, invoke);
662 }
663
VisitMemoryPokeIntNative(HInvoke * invoke)664 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
665 MacroAssembler* masm = GetVIXLAssembler();
666 __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
667 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
668 }
669
VisitMemoryPokeLongNative(HInvoke * invoke)670 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
671 CreateIntIntToVoidLocations(allocator_, invoke);
672 }
673
VisitMemoryPokeLongNative(HInvoke * invoke)674 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
675 MacroAssembler* masm = GetVIXLAssembler();
676 __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
677 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
678 }
679
VisitMemoryPokeShortNative(HInvoke * invoke)680 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
681 CreateIntIntToVoidLocations(allocator_, invoke);
682 }
683
VisitMemoryPokeShortNative(HInvoke * invoke)684 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
685 MacroAssembler* masm = GetVIXLAssembler();
686 __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
687 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
688 }
689
VisitThreadCurrentThread(HInvoke * invoke)690 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
691 LocationSummary* locations =
692 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
693 locations->SetOut(Location::RequiresRegister());
694 }
695
VisitThreadCurrentThread(HInvoke * invoke)696 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
697 codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()),
698 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
699 }
700
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARM64 * codegen)701 static void GenUnsafeGet(HInvoke* invoke,
702 DataType::Type type,
703 bool is_volatile,
704 CodeGeneratorARM64* codegen) {
705 LocationSummary* locations = invoke->GetLocations();
706 DCHECK((type == DataType::Type::kInt8) ||
707 (type == DataType::Type::kInt32) ||
708 (type == DataType::Type::kInt64) ||
709 (type == DataType::Type::kReference));
710 Location base_loc = locations->InAt(1);
711 Register base = WRegisterFrom(base_loc); // Object pointer.
712 Location offset_loc = locations->InAt(2);
713 Register offset = XRegisterFrom(offset_loc); // Long offset.
714 Location trg_loc = locations->Out();
715 Register trg = RegisterFrom(trg_loc, type);
716
717 if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
718 // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
719 Register temp = WRegisterFrom(locations->GetTemp(0));
720 MacroAssembler* masm = codegen->GetVIXLAssembler();
721 // Piggy-back on the field load path using introspection for the Baker read barrier.
722 __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits.
723 codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
724 trg_loc,
725 base,
726 MemOperand(temp.X()),
727 /* needs_null_check= */ false,
728 is_volatile);
729 } else {
730 // Other cases.
731 MemOperand mem_op(base.X(), offset);
732 if (is_volatile) {
733 codegen->LoadAcquire(invoke, type, trg, mem_op, /* needs_null_check= */ true);
734 } else {
735 codegen->Load(type, trg, mem_op);
736 }
737
738 if (type == DataType::Type::kReference) {
739 DCHECK(trg.IsW());
740 codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
741 }
742 }
743 }
744
CreateUnsafeGetLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorARM64 * codegen)745 static void CreateUnsafeGetLocations(ArenaAllocator* allocator,
746 HInvoke* invoke,
747 CodeGeneratorARM64* codegen) {
748 bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
749 LocationSummary* locations =
750 new (allocator) LocationSummary(invoke,
751 can_call
752 ? LocationSummary::kCallOnSlowPath
753 : LocationSummary::kNoCall,
754 kIntrinsified);
755 if (can_call && kUseBakerReadBarrier) {
756 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
757 // We need a temporary register for the read barrier load in order to use
758 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
759 locations->AddTemp(FixedTempLocation());
760 }
761 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
762 locations->SetInAt(1, Location::RequiresRegister());
763 locations->SetInAt(2, Location::RequiresRegister());
764 locations->SetOut(Location::RequiresRegister(),
765 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
766 }
767
VisitUnsafeGet(HInvoke * invoke)768 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
769 VisitJdkUnsafeGet(invoke);
770 }
VisitUnsafeGetVolatile(HInvoke * invoke)771 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
772 VisitJdkUnsafeGetVolatile(invoke);
773 }
VisitUnsafeGetLong(HInvoke * invoke)774 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
775 VisitJdkUnsafeGetLong(invoke);
776 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)777 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
778 VisitJdkUnsafeGetLongVolatile(invoke);
779 }
VisitUnsafeGetObject(HInvoke * invoke)780 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
781 VisitJdkUnsafeGetReference(invoke);
782 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)783 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
784 VisitJdkUnsafeGetReferenceVolatile(invoke);
785 }
VisitUnsafeGetByte(HInvoke * invoke)786 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetByte(HInvoke* invoke) {
787 VisitJdkUnsafeGetByte(invoke);
788 }
789
VisitJdkUnsafeGet(HInvoke * invoke)790 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGet(HInvoke* invoke) {
791 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
792 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)793 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
794 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
795 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)796 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
797 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
798 }
VisitJdkUnsafeGetLong(HInvoke * invoke)799 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
800 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
801 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)802 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
803 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
804 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)805 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
806 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
807 }
VisitJdkUnsafeGetReference(HInvoke * invoke)808 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
809 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
810 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)811 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
812 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
813 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)814 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
815 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
816 }
VisitJdkUnsafeGetByte(HInvoke * invoke)817 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
818 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
819 }
820
VisitUnsafeGet(HInvoke * invoke)821 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
822 VisitJdkUnsafeGet(invoke);
823 }
VisitUnsafeGetVolatile(HInvoke * invoke)824 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
825 VisitJdkUnsafeGetVolatile(invoke);
826 }
VisitUnsafeGetLong(HInvoke * invoke)827 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
828 VisitJdkUnsafeGetLong(invoke);
829 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)830 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
831 VisitJdkUnsafeGetLongVolatile(invoke);
832 }
VisitUnsafeGetObject(HInvoke * invoke)833 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
834 VisitJdkUnsafeGetReference(invoke);
835 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)836 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
837 VisitJdkUnsafeGetReferenceVolatile(invoke);
838 }
VisitUnsafeGetByte(HInvoke * invoke)839 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetByte(HInvoke* invoke) {
840 VisitJdkUnsafeGetByte(invoke);
841 }
842
VisitJdkUnsafeGet(HInvoke * invoke)843 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGet(HInvoke* invoke) {
844 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
845 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)846 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
847 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
848 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)849 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
850 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
851 }
VisitJdkUnsafeGetLong(HInvoke * invoke)852 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
853 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
854 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)855 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
856 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
857 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)858 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
859 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
860 }
VisitJdkUnsafeGetReference(HInvoke * invoke)861 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
862 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
863 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)864 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
865 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
866 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)867 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
868 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
869 }
VisitJdkUnsafeGetByte(HInvoke * invoke)870 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
871 GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
872 }
873
CreateUnsafePutLocations(ArenaAllocator * allocator,HInvoke * invoke)874 static void CreateUnsafePutLocations(ArenaAllocator* allocator, HInvoke* invoke) {
875 LocationSummary* locations =
876 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
877 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
878 locations->SetInAt(1, Location::RequiresRegister());
879 locations->SetInAt(2, Location::RequiresRegister());
880 locations->SetInAt(3, Location::RequiresRegister());
881 }
882
VisitUnsafePut(HInvoke * invoke)883 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
884 VisitJdkUnsafePut(invoke);
885 }
VisitUnsafePutOrdered(HInvoke * invoke)886 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
887 VisitJdkUnsafePutOrdered(invoke);
888 }
VisitUnsafePutVolatile(HInvoke * invoke)889 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
890 VisitJdkUnsafePutVolatile(invoke);
891 }
VisitUnsafePutObject(HInvoke * invoke)892 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
893 VisitJdkUnsafePutReference(invoke);
894 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)895 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
896 VisitJdkUnsafePutObjectOrdered(invoke);
897 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)898 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
899 VisitJdkUnsafePutReferenceVolatile(invoke);
900 }
VisitUnsafePutLong(HInvoke * invoke)901 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
902 VisitJdkUnsafePutLong(invoke);
903 }
VisitUnsafePutLongOrdered(HInvoke * invoke)904 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
905 VisitJdkUnsafePutLongOrdered(invoke);
906 }
VisitUnsafePutLongVolatile(HInvoke * invoke)907 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
908 VisitJdkUnsafePutLongVolatile(invoke);
909 }
VisitUnsafePutByte(HInvoke * invoke)910 void IntrinsicLocationsBuilderARM64::VisitUnsafePutByte(HInvoke* invoke) {
911 VisitJdkUnsafePutByte(invoke);
912 }
913
VisitJdkUnsafePut(HInvoke * invoke)914 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePut(HInvoke* invoke) {
915 CreateUnsafePutLocations(allocator_, invoke);
916 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)917 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
918 CreateUnsafePutLocations(allocator_, invoke);
919 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)920 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
921 CreateUnsafePutLocations(allocator_, invoke);
922 }
VisitJdkUnsafePutRelease(HInvoke * invoke)923 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
924 CreateUnsafePutLocations(allocator_, invoke);
925 }
VisitJdkUnsafePutReference(HInvoke * invoke)926 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReference(HInvoke* invoke) {
927 CreateUnsafePutLocations(allocator_, invoke);
928 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)929 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
930 CreateUnsafePutLocations(allocator_, invoke);
931 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)932 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
933 CreateUnsafePutLocations(allocator_, invoke);
934 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)935 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
936 CreateUnsafePutLocations(allocator_, invoke);
937 }
VisitJdkUnsafePutLong(HInvoke * invoke)938 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLong(HInvoke* invoke) {
939 CreateUnsafePutLocations(allocator_, invoke);
940 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)941 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
942 CreateUnsafePutLocations(allocator_, invoke);
943 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)944 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
945 CreateUnsafePutLocations(allocator_, invoke);
946 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)947 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
948 CreateUnsafePutLocations(allocator_, invoke);
949 }
VisitJdkUnsafePutByte(HInvoke * invoke)950 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutByte(HInvoke* invoke) {
951 CreateUnsafePutLocations(allocator_, invoke);
952 }
953
GenUnsafePut(HInvoke * invoke,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM64 * codegen)954 static void GenUnsafePut(HInvoke* invoke,
955 DataType::Type type,
956 bool is_volatile,
957 bool is_ordered,
958 CodeGeneratorARM64* codegen) {
959 LocationSummary* locations = invoke->GetLocations();
960 MacroAssembler* masm = codegen->GetVIXLAssembler();
961
962 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
963 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
964 Register value = RegisterFrom(locations->InAt(3), type);
965 Register source = value;
966 MemOperand mem_op(base.X(), offset);
967
968 {
969 // We use a block to end the scratch scope before the write barrier, thus
970 // freeing the temporary registers so they can be used in `MarkGCCard`.
971 UseScratchRegisterScope temps(masm);
972
973 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
974 DCHECK(value.IsW());
975 Register temp = temps.AcquireW();
976 __ Mov(temp.W(), value.W());
977 codegen->GetAssembler()->PoisonHeapReference(temp.W());
978 source = temp;
979 }
980
981 if (is_volatile || is_ordered) {
982 codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false);
983 } else {
984 codegen->Store(type, source, mem_op);
985 }
986 }
987
988 if (type == DataType::Type::kReference) {
989 bool value_can_be_null = true; // TODO: Worth finding out this information?
990 codegen->MaybeMarkGCCard(base, value, value_can_be_null);
991 }
992 }
993
VisitUnsafePut(HInvoke * invoke)994 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
995 VisitJdkUnsafePut(invoke);
996 }
VisitUnsafePutOrdered(HInvoke * invoke)997 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
998 VisitJdkUnsafePutOrdered(invoke);
999 }
VisitUnsafePutVolatile(HInvoke * invoke)1000 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
1001 VisitJdkUnsafePutVolatile(invoke);
1002 }
VisitUnsafePutObject(HInvoke * invoke)1003 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
1004 VisitJdkUnsafePutReference(invoke);
1005 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1006 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1007 VisitJdkUnsafePutObjectOrdered(invoke);
1008 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1009 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1010 VisitJdkUnsafePutReferenceVolatile(invoke);
1011 }
VisitUnsafePutLong(HInvoke * invoke)1012 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
1013 VisitJdkUnsafePutLong(invoke);
1014 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1015 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1016 VisitJdkUnsafePutLongOrdered(invoke);
1017 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1018 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1019 VisitJdkUnsafePutLongVolatile(invoke);
1020 }
VisitUnsafePutByte(HInvoke * invoke)1021 void IntrinsicCodeGeneratorARM64::VisitUnsafePutByte(HInvoke* invoke) {
1022 VisitJdkUnsafePutByte(invoke);
1023 }
1024
VisitJdkUnsafePut(HInvoke * invoke)1025 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePut(HInvoke* invoke) {
1026 GenUnsafePut(invoke,
1027 DataType::Type::kInt32,
1028 /*is_volatile=*/ false,
1029 /*is_ordered=*/ false,
1030 codegen_);
1031 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)1032 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
1033 GenUnsafePut(invoke,
1034 DataType::Type::kInt32,
1035 /*is_volatile=*/ false,
1036 /*is_ordered=*/ true,
1037 codegen_);
1038 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)1039 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
1040 GenUnsafePut(invoke,
1041 DataType::Type::kInt32,
1042 /*is_volatile=*/ true,
1043 /*is_ordered=*/ false,
1044 codegen_);
1045 }
VisitJdkUnsafePutRelease(HInvoke * invoke)1046 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
1047 GenUnsafePut(invoke,
1048 DataType::Type::kInt32,
1049 /*is_volatile=*/ true,
1050 /*is_ordered=*/ false,
1051 codegen_);
1052 }
VisitJdkUnsafePutReference(HInvoke * invoke)1053 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReference(HInvoke* invoke) {
1054 GenUnsafePut(invoke,
1055 DataType::Type::kReference,
1056 /*is_volatile=*/ false,
1057 /*is_ordered=*/ false,
1058 codegen_);
1059 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)1060 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
1061 GenUnsafePut(invoke,
1062 DataType::Type::kReference,
1063 /*is_volatile=*/ false,
1064 /*is_ordered=*/ true,
1065 codegen_);
1066 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)1067 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
1068 GenUnsafePut(invoke,
1069 DataType::Type::kReference,
1070 /*is_volatile=*/ true,
1071 /*is_ordered=*/ false,
1072 codegen_);
1073 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)1074 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
1075 GenUnsafePut(invoke,
1076 DataType::Type::kReference,
1077 /*is_volatile=*/ true,
1078 /*is_ordered=*/ false,
1079 codegen_);
1080 }
VisitJdkUnsafePutLong(HInvoke * invoke)1081 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLong(HInvoke* invoke) {
1082 GenUnsafePut(invoke,
1083 DataType::Type::kInt64,
1084 /*is_volatile=*/ false,
1085 /*is_ordered=*/ false,
1086 codegen_);
1087 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)1088 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
1089 GenUnsafePut(invoke,
1090 DataType::Type::kInt64,
1091 /*is_volatile=*/ false,
1092 /*is_ordered=*/ true,
1093 codegen_);
1094 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)1095 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
1096 GenUnsafePut(invoke,
1097 DataType::Type::kInt64,
1098 /*is_volatile=*/ true,
1099 /*is_ordered=*/ false,
1100 codegen_);
1101 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)1102 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
1103 GenUnsafePut(invoke,
1104 DataType::Type::kInt64,
1105 /*is_volatile=*/ true,
1106 /*is_ordered=*/ false,
1107 codegen_);
1108 }
VisitJdkUnsafePutByte(HInvoke * invoke)1109 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutByte(HInvoke* invoke) {
1110 GenUnsafePut(invoke,
1111 DataType::Type::kInt8,
1112 /*is_volatile=*/ false,
1113 /*is_ordered=*/ false,
1114 codegen_);
1115 }
1116
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorARM64 * codegen)1117 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
1118 HInvoke* invoke,
1119 CodeGeneratorARM64* codegen) {
1120 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
1121 LocationSummary* locations =
1122 new (allocator) LocationSummary(invoke,
1123 can_call
1124 ? LocationSummary::kCallOnSlowPath
1125 : LocationSummary::kNoCall,
1126 kIntrinsified);
1127 if (can_call && kUseBakerReadBarrier) {
1128 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1129 }
1130 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1131 locations->SetInAt(1, Location::RequiresRegister());
1132 locations->SetInAt(2, Location::RequiresRegister());
1133 locations->SetInAt(3, Location::RequiresRegister());
1134 locations->SetInAt(4, Location::RequiresRegister());
1135
1136 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1137 }
1138
EmitLoadExclusive(CodeGeneratorARM64 * codegen,DataType::Type type,Register ptr,Register old_value,bool use_load_acquire)1139 static void EmitLoadExclusive(CodeGeneratorARM64* codegen,
1140 DataType::Type type,
1141 Register ptr,
1142 Register old_value,
1143 bool use_load_acquire) {
1144 Arm64Assembler* assembler = codegen->GetAssembler();
1145 MacroAssembler* masm = assembler->GetVIXLAssembler();
1146 switch (type) {
1147 case DataType::Type::kBool:
1148 case DataType::Type::kUint8:
1149 case DataType::Type::kInt8:
1150 if (use_load_acquire) {
1151 __ Ldaxrb(old_value, MemOperand(ptr));
1152 } else {
1153 __ Ldxrb(old_value, MemOperand(ptr));
1154 }
1155 break;
1156 case DataType::Type::kUint16:
1157 case DataType::Type::kInt16:
1158 if (use_load_acquire) {
1159 __ Ldaxrh(old_value, MemOperand(ptr));
1160 } else {
1161 __ Ldxrh(old_value, MemOperand(ptr));
1162 }
1163 break;
1164 case DataType::Type::kInt32:
1165 case DataType::Type::kInt64:
1166 case DataType::Type::kReference:
1167 if (use_load_acquire) {
1168 __ Ldaxr(old_value, MemOperand(ptr));
1169 } else {
1170 __ Ldxr(old_value, MemOperand(ptr));
1171 }
1172 break;
1173 default:
1174 LOG(FATAL) << "Unexpected type: " << type;
1175 UNREACHABLE();
1176 }
1177 switch (type) {
1178 case DataType::Type::kInt8:
1179 __ Sxtb(old_value, old_value);
1180 break;
1181 case DataType::Type::kInt16:
1182 __ Sxth(old_value, old_value);
1183 break;
1184 case DataType::Type::kReference:
1185 assembler->MaybeUnpoisonHeapReference(old_value);
1186 break;
1187 default:
1188 break;
1189 }
1190 }
1191
EmitStoreExclusive(CodeGeneratorARM64 * codegen,DataType::Type type,Register ptr,Register store_result,Register new_value,bool use_store_release)1192 static void EmitStoreExclusive(CodeGeneratorARM64* codegen,
1193 DataType::Type type,
1194 Register ptr,
1195 Register store_result,
1196 Register new_value,
1197 bool use_store_release) {
1198 Arm64Assembler* assembler = codegen->GetAssembler();
1199 MacroAssembler* masm = assembler->GetVIXLAssembler();
1200 if (type == DataType::Type::kReference) {
1201 assembler->MaybePoisonHeapReference(new_value);
1202 }
1203 switch (type) {
1204 case DataType::Type::kBool:
1205 case DataType::Type::kUint8:
1206 case DataType::Type::kInt8:
1207 if (use_store_release) {
1208 __ Stlxrb(store_result, new_value, MemOperand(ptr));
1209 } else {
1210 __ Stxrb(store_result, new_value, MemOperand(ptr));
1211 }
1212 break;
1213 case DataType::Type::kUint16:
1214 case DataType::Type::kInt16:
1215 if (use_store_release) {
1216 __ Stlxrh(store_result, new_value, MemOperand(ptr));
1217 } else {
1218 __ Stxrh(store_result, new_value, MemOperand(ptr));
1219 }
1220 break;
1221 case DataType::Type::kInt32:
1222 case DataType::Type::kInt64:
1223 case DataType::Type::kReference:
1224 if (use_store_release) {
1225 __ Stlxr(store_result, new_value, MemOperand(ptr));
1226 } else {
1227 __ Stxr(store_result, new_value, MemOperand(ptr));
1228 }
1229 break;
1230 default:
1231 LOG(FATAL) << "Unexpected type: " << type;
1232 UNREACHABLE();
1233 }
1234 if (type == DataType::Type::kReference) {
1235 assembler->MaybeUnpoisonHeapReference(new_value);
1236 }
1237 }
1238
GenerateCompareAndSet(CodeGeneratorARM64 * codegen,DataType::Type type,std::memory_order order,bool strong,vixl::aarch64::Label * cmp_failure,Register ptr,Register new_value,Register old_value,Register store_result,Register expected,Register expected2=Register ())1239 static void GenerateCompareAndSet(CodeGeneratorARM64* codegen,
1240 DataType::Type type,
1241 std::memory_order order,
1242 bool strong,
1243 vixl::aarch64::Label* cmp_failure,
1244 Register ptr,
1245 Register new_value,
1246 Register old_value,
1247 Register store_result,
1248 Register expected,
1249 Register expected2 = Register()) {
1250 // The `expected2` is valid only for reference slow path and represents the unmarked old value
1251 // from the main path attempt to emit CAS when the marked old value matched `expected`.
1252 DCHECK_IMPLIES(expected2.IsValid(), type == DataType::Type::kReference);
1253
1254 DCHECK(ptr.IsX());
1255 DCHECK_EQ(new_value.IsX(), type == DataType::Type::kInt64);
1256 DCHECK_EQ(old_value.IsX(), type == DataType::Type::kInt64);
1257 DCHECK(store_result.IsW());
1258 DCHECK_EQ(expected.IsX(), type == DataType::Type::kInt64);
1259 DCHECK_IMPLIES(expected2.IsValid(), expected2.IsW());
1260
1261 Arm64Assembler* assembler = codegen->GetAssembler();
1262 MacroAssembler* masm = assembler->GetVIXLAssembler();
1263
1264 bool use_load_acquire =
1265 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1266 bool use_store_release =
1267 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1268 DCHECK(use_load_acquire || use_store_release || order == std::memory_order_relaxed);
1269
1270 // repeat: {
1271 // old_value = [ptr]; // Load exclusive.
1272 // if (old_value != expected && old_value != expected2) goto cmp_failure;
1273 // store_result = failed([ptr] <- new_value); // Store exclusive.
1274 // }
1275 // if (strong) {
1276 // if (store_result) goto repeat; // Repeat until compare fails or store exclusive succeeds.
1277 // } else {
1278 // store_result = store_result ^ 1; // Report success as 1, failure as 0.
1279 // }
1280 //
1281 // Flag Z indicates whether `old_value == expected || old_value == expected2`.
1282 // (If `expected2` is not valid, the `old_value == expected2` part is not emitted.)
1283
1284 vixl::aarch64::Label loop_head;
1285 if (strong) {
1286 __ Bind(&loop_head);
1287 }
1288 EmitLoadExclusive(codegen, type, ptr, old_value, use_load_acquire);
1289 __ Cmp(old_value, expected);
1290 if (expected2.IsValid()) {
1291 __ Ccmp(old_value, expected2, ZFlag, ne);
1292 }
1293 // If the comparison failed, the Z flag is cleared as we branch to the `cmp_failure` label.
1294 // If the comparison succeeded, the Z flag is set and remains set after the end of the
1295 // code emitted here, unless we retry the whole operation.
1296 __ B(cmp_failure, ne);
1297 EmitStoreExclusive(codegen, type, ptr, store_result, new_value, use_store_release);
1298 if (strong) {
1299 __ Cbnz(store_result, &loop_head);
1300 } else {
1301 // Flip the `store_result` register to indicate success by 1 and failure by 0.
1302 __ Eor(store_result, store_result, 1);
1303 }
1304 }
1305
1306 class ReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
1307 public:
ReadBarrierCasSlowPathARM64(HInvoke * invoke,std::memory_order order,bool strong,Register base,Register offset,Register expected,Register new_value,Register old_value,Register old_value_temp,Register store_result,bool update_old_value,CodeGeneratorARM64 * arm64_codegen)1308 ReadBarrierCasSlowPathARM64(HInvoke* invoke,
1309 std::memory_order order,
1310 bool strong,
1311 Register base,
1312 Register offset,
1313 Register expected,
1314 Register new_value,
1315 Register old_value,
1316 Register old_value_temp,
1317 Register store_result,
1318 bool update_old_value,
1319 CodeGeneratorARM64* arm64_codegen)
1320 : SlowPathCodeARM64(invoke),
1321 order_(order),
1322 strong_(strong),
1323 base_(base),
1324 offset_(offset),
1325 expected_(expected),
1326 new_value_(new_value),
1327 old_value_(old_value),
1328 old_value_temp_(old_value_temp),
1329 store_result_(store_result),
1330 update_old_value_(update_old_value),
1331 mark_old_value_slow_path_(nullptr),
1332 update_old_value_slow_path_(nullptr) {
1333 if (!kUseBakerReadBarrier) {
1334 // We need to add the slow path now, it is too late when emitting slow path code.
1335 mark_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1336 invoke,
1337 Location::RegisterLocation(old_value_temp.GetCode()),
1338 Location::RegisterLocation(old_value.GetCode()),
1339 Location::RegisterLocation(base.GetCode()),
1340 /*offset=*/ 0u,
1341 /*index=*/ Location::RegisterLocation(offset.GetCode()));
1342 if (update_old_value_) {
1343 update_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1344 invoke,
1345 Location::RegisterLocation(old_value.GetCode()),
1346 Location::RegisterLocation(old_value_temp.GetCode()),
1347 Location::RegisterLocation(base.GetCode()),
1348 /*offset=*/ 0u,
1349 /*index=*/ Location::RegisterLocation(offset.GetCode()));
1350 }
1351 }
1352 }
1353
GetDescription() const1354 const char* GetDescription() const override { return "ReadBarrierCasSlowPathARM64"; }
1355
EmitNativeCode(CodeGenerator * codegen)1356 void EmitNativeCode(CodeGenerator* codegen) override {
1357 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1358 Arm64Assembler* assembler = arm64_codegen->GetAssembler();
1359 MacroAssembler* masm = assembler->GetVIXLAssembler();
1360 __ Bind(GetEntryLabel());
1361
1362 // Mark the `old_value_` from the main path and compare with `expected_`.
1363 if (kUseBakerReadBarrier) {
1364 DCHECK(mark_old_value_slow_path_ == nullptr);
1365 arm64_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_temp_, old_value_);
1366 } else {
1367 DCHECK(mark_old_value_slow_path_ != nullptr);
1368 __ B(mark_old_value_slow_path_->GetEntryLabel());
1369 __ Bind(mark_old_value_slow_path_->GetExitLabel());
1370 }
1371 __ Cmp(old_value_temp_, expected_);
1372 if (update_old_value_) {
1373 // Update the old value if we're going to return from the slow path.
1374 __ Csel(old_value_, old_value_temp_, old_value_, ne);
1375 }
1376 __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure.
1377
1378 // The `old_value` we have read did not match `expected` (which is always a to-space
1379 // reference) but after the read barrier the marked to-space value matched, so the
1380 // `old_value` must be a from-space reference to the same object. Do the same CAS loop
1381 // as the main path but check for both `expected` and the unmarked old value
1382 // representing the to-space and from-space references for the same object.
1383
1384 UseScratchRegisterScope temps(masm);
1385 DCHECK_IMPLIES(store_result_.IsValid(), !temps.IsAvailable(store_result_));
1386 Register tmp_ptr = temps.AcquireX();
1387 Register store_result = store_result_.IsValid() ? store_result_ : temps.AcquireW();
1388
1389 // Recalculate the `tmp_ptr` from main path clobbered by the read barrier above.
1390 __ Add(tmp_ptr, base_.X(), Operand(offset_));
1391
1392 vixl::aarch64::Label mark_old_value;
1393 GenerateCompareAndSet(arm64_codegen,
1394 DataType::Type::kReference,
1395 order_,
1396 strong_,
1397 /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
1398 tmp_ptr,
1399 new_value_,
1400 /*old_value=*/ old_value_temp_,
1401 store_result,
1402 expected_,
1403 /*expected2=*/ old_value_);
1404 if (update_old_value_) {
1405 // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
1406 // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
1407 __ Mov(old_value_, expected_);
1408 }
1409
1410 // Z=true from the CMP+CCMP in GenerateCompareAndSet() above indicates comparison success.
1411 // For strong CAS, that's the overall success. For weak CAS, the code also needs
1412 // to check the `store_result` after returning from the slow path.
1413 __ B(GetExitLabel());
1414
1415 if (update_old_value_) {
1416 __ Bind(&mark_old_value);
1417 if (kUseBakerReadBarrier) {
1418 DCHECK(update_old_value_slow_path_ == nullptr);
1419 arm64_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_, old_value_temp_);
1420 } else {
1421 // Note: We could redirect the `failure` above directly to the entry label and bind
1422 // the exit label in the main path, but the main path would need to access the
1423 // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
1424 DCHECK(update_old_value_slow_path_ != nullptr);
1425 __ B(update_old_value_slow_path_->GetEntryLabel());
1426 __ Bind(update_old_value_slow_path_->GetExitLabel());
1427 }
1428 __ B(GetExitLabel());
1429 }
1430 }
1431
1432 private:
1433 std::memory_order order_;
1434 bool strong_;
1435 Register base_;
1436 Register offset_;
1437 Register expected_;
1438 Register new_value_;
1439 Register old_value_;
1440 Register old_value_temp_;
1441 Register store_result_;
1442 bool update_old_value_;
1443 SlowPathCodeARM64* mark_old_value_slow_path_;
1444 SlowPathCodeARM64* update_old_value_slow_path_;
1445 };
1446
GenUnsafeCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARM64 * codegen)1447 static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
1448 MacroAssembler* masm = codegen->GetVIXLAssembler();
1449 LocationSummary* locations = invoke->GetLocations();
1450
1451 Register out = WRegisterFrom(locations->Out()); // Boolean result.
1452 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
1453 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
1454 Register expected = RegisterFrom(locations->InAt(3), type); // Expected.
1455 Register new_value = RegisterFrom(locations->InAt(4), type); // New value.
1456
1457 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1458 if (type == DataType::Type::kReference) {
1459 // Mark card for object assuming new value is stored.
1460 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
1461 codegen->MaybeMarkGCCard(base, new_value, new_value_can_be_null);
1462 }
1463
1464 UseScratchRegisterScope temps(masm);
1465 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
1466 Register old_value; // Value in memory.
1467
1468 vixl::aarch64::Label exit_loop_label;
1469 vixl::aarch64::Label* exit_loop = &exit_loop_label;
1470 vixl::aarch64::Label* cmp_failure = &exit_loop_label;
1471
1472 if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
1473 // We need to store the `old_value` in a non-scratch register to make sure
1474 // the read barrier in the slow path does not clobber it.
1475 old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path.
1476 // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
1477 // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
1478 Register old_value_temp = WRegisterFrom(locations->GetTemp(1));
1479 ReadBarrierCasSlowPathARM64* slow_path =
1480 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
1481 invoke,
1482 std::memory_order_seq_cst,
1483 /*strong=*/ true,
1484 base,
1485 offset,
1486 expected,
1487 new_value,
1488 old_value,
1489 old_value_temp,
1490 /*store_result=*/ Register(), // Use a scratch register.
1491 /*update_old_value=*/ false,
1492 codegen);
1493 codegen->AddSlowPath(slow_path);
1494 exit_loop = slow_path->GetExitLabel();
1495 cmp_failure = slow_path->GetEntryLabel();
1496 } else {
1497 old_value = temps.AcquireSameSizeAs(new_value);
1498 }
1499
1500 __ Add(tmp_ptr, base.X(), Operand(offset));
1501
1502 GenerateCompareAndSet(codegen,
1503 type,
1504 std::memory_order_seq_cst,
1505 /*strong=*/ true,
1506 cmp_failure,
1507 tmp_ptr,
1508 new_value,
1509 old_value,
1510 /*store_result=*/ old_value.W(), // Reuse `old_value` for ST*XR* result.
1511 expected);
1512 __ Bind(exit_loop);
1513 __ Cset(out, eq);
1514 }
1515
VisitUnsafeCASInt(HInvoke * invoke)1516 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1517 VisitJdkUnsafeCASInt(invoke);
1518 }
VisitUnsafeCASLong(HInvoke * invoke)1519 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1520 VisitJdkUnsafeCASLong(invoke);
1521 }
VisitUnsafeCASObject(HInvoke * invoke)1522 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1523 VisitJdkUnsafeCASObject(invoke);
1524 }
1525
VisitJdkUnsafeCASInt(HInvoke * invoke)1526 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
1527 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
1528 VisitJdkUnsafeCompareAndSetInt(invoke);
1529 }
VisitJdkUnsafeCASLong(HInvoke * invoke)1530 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
1531 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
1532 VisitJdkUnsafeCompareAndSetLong(invoke);
1533 }
VisitJdkUnsafeCASObject(HInvoke * invoke)1534 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
1535 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
1536 VisitJdkUnsafeCompareAndSetReference(invoke);
1537 }
1538
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)1539 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
1540 CreateUnsafeCASLocations(allocator_, invoke, codegen_);
1541 }
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)1542 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
1543 CreateUnsafeCASLocations(allocator_, invoke, codegen_);
1544 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)1545 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
1546 // The only supported read barrier implementation is the Baker-style read barriers.
1547 if (codegen_->EmitNonBakerReadBarrier()) {
1548 return;
1549 }
1550
1551 CreateUnsafeCASLocations(allocator_, invoke, codegen_);
1552 if (codegen_->EmitReadBarrier()) {
1553 // We need two non-scratch temporary registers for read barrier.
1554 LocationSummary* locations = invoke->GetLocations();
1555 if (kUseBakerReadBarrier) {
1556 locations->AddTemp(Location::RequiresRegister());
1557 locations->AddTemp(Location::RequiresRegister());
1558 } else {
1559 // To preserve the old value across the non-Baker read barrier
1560 // slow path, use a fixed callee-save register.
1561 constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
1562 locations->AddTemp(Location::RegisterLocation(first_callee_save));
1563 // To reduce the number of moves, request x0 as the second temporary.
1564 DCHECK(InvokeRuntimeCallingConvention().GetReturnLocation(DataType::Type::kReference).Equals(
1565 Location::RegisterLocation(x0.GetCode())));
1566 locations->AddTemp(Location::RegisterLocation(x0.GetCode()));
1567 }
1568 }
1569 }
1570
VisitUnsafeCASInt(HInvoke * invoke)1571 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1572 VisitJdkUnsafeCASInt(invoke);
1573 }
VisitUnsafeCASLong(HInvoke * invoke)1574 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1575 VisitJdkUnsafeCASLong(invoke);
1576 }
VisitUnsafeCASObject(HInvoke * invoke)1577 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1578 VisitJdkUnsafeCASObject(invoke);
1579 }
1580
VisitJdkUnsafeCASInt(HInvoke * invoke)1581 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
1582 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
1583 VisitJdkUnsafeCompareAndSetInt(invoke);
1584 }
VisitJdkUnsafeCASLong(HInvoke * invoke)1585 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
1586 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
1587 VisitJdkUnsafeCompareAndSetLong(invoke);
1588 }
VisitJdkUnsafeCASObject(HInvoke * invoke)1589 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
1590 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
1591 VisitJdkUnsafeCompareAndSetReference(invoke);
1592 }
1593
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)1594 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
1595 GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
1596 }
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)1597 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
1598 GenUnsafeCas(invoke, DataType::Type::kInt64, codegen_);
1599 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)1600 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
1601 // The only supported read barrier implementation is the Baker-style read barriers.
1602 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
1603
1604 GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
1605 }
1606
1607 enum class GetAndUpdateOp {
1608 kSet,
1609 kAdd,
1610 kAddWithByteSwap,
1611 kAnd,
1612 kOr,
1613 kXor
1614 };
1615
GenerateGetAndUpdate(CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op,DataType::Type load_store_type,std::memory_order order,Register ptr,CPURegister arg,CPURegister old_value)1616 static void GenerateGetAndUpdate(CodeGeneratorARM64* codegen,
1617 GetAndUpdateOp get_and_update_op,
1618 DataType::Type load_store_type,
1619 std::memory_order order,
1620 Register ptr,
1621 CPURegister arg,
1622 CPURegister old_value) {
1623 MacroAssembler* masm = codegen->GetVIXLAssembler();
1624 UseScratchRegisterScope temps(masm);
1625 Register store_result = temps.AcquireW();
1626
1627 DCHECK_EQ(old_value.GetSizeInBits(), arg.GetSizeInBits());
1628 Register old_value_reg;
1629 Register new_value;
1630 switch (get_and_update_op) {
1631 case GetAndUpdateOp::kSet:
1632 old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1633 new_value = arg.IsX() ? arg.X() : arg.W();
1634 break;
1635 case GetAndUpdateOp::kAddWithByteSwap:
1636 case GetAndUpdateOp::kAdd:
1637 if (arg.IsVRegister()) {
1638 old_value_reg = arg.IsD() ? temps.AcquireX() : temps.AcquireW();
1639 new_value = old_value_reg; // Use the same temporary.
1640 break;
1641 }
1642 FALLTHROUGH_INTENDED;
1643 case GetAndUpdateOp::kAnd:
1644 case GetAndUpdateOp::kOr:
1645 case GetAndUpdateOp::kXor:
1646 old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1647 new_value = old_value.IsX() ? temps.AcquireX() : temps.AcquireW();
1648 break;
1649 }
1650
1651 bool use_load_acquire =
1652 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1653 bool use_store_release =
1654 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1655 DCHECK(use_load_acquire || use_store_release);
1656
1657 vixl::aarch64::Label loop_label;
1658 __ Bind(&loop_label);
1659 EmitLoadExclusive(codegen, load_store_type, ptr, old_value_reg, use_load_acquire);
1660 switch (get_and_update_op) {
1661 case GetAndUpdateOp::kSet:
1662 break;
1663 case GetAndUpdateOp::kAddWithByteSwap:
1664 // To avoid unnecessary sign extension before REV16, the caller must specify `kUint16`
1665 // instead of `kInt16` and do the sign-extension explicitly afterwards.
1666 DCHECK_NE(load_store_type, DataType::Type::kInt16);
1667 GenerateReverseBytes(masm, load_store_type, old_value_reg, old_value_reg);
1668 FALLTHROUGH_INTENDED;
1669 case GetAndUpdateOp::kAdd:
1670 if (arg.IsVRegister()) {
1671 VRegister old_value_vreg = old_value.IsD() ? old_value.D() : old_value.S();
1672 VRegister sum = temps.AcquireSameSizeAs(old_value_vreg);
1673 __ Fmov(old_value_vreg, old_value_reg);
1674 __ Fadd(sum, old_value_vreg, arg.IsD() ? arg.D() : arg.S());
1675 __ Fmov(new_value, sum);
1676 } else {
1677 __ Add(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1678 }
1679 if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
1680 GenerateReverseBytes(masm, load_store_type, new_value, new_value);
1681 }
1682 break;
1683 case GetAndUpdateOp::kAnd:
1684 __ And(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1685 break;
1686 case GetAndUpdateOp::kOr:
1687 __ Orr(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1688 break;
1689 case GetAndUpdateOp::kXor:
1690 __ Eor(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1691 break;
1692 }
1693 EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value, use_store_release);
1694 __ Cbnz(store_result, &loop_label);
1695 }
1696
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorARM64 * codegen)1697 static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
1698 HInvoke* invoke,
1699 CodeGeneratorARM64* codegen) {
1700 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
1701 LocationSummary* locations =
1702 new (allocator) LocationSummary(invoke,
1703 can_call
1704 ? LocationSummary::kCallOnSlowPath
1705 : LocationSummary::kNoCall,
1706 kIntrinsified);
1707 if (can_call && kUseBakerReadBarrier) {
1708 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1709 }
1710 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1711 locations->SetInAt(1, Location::RequiresRegister());
1712 locations->SetInAt(2, Location::RequiresRegister());
1713 locations->SetInAt(3, Location::RequiresRegister());
1714 locations->AddTemp(Location::RequiresRegister());
1715
1716 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1717 }
1718
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op)1719 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
1720 DataType::Type type,
1721 CodeGeneratorARM64* codegen,
1722 GetAndUpdateOp get_and_update_op) {
1723 MacroAssembler* masm = codegen->GetVIXLAssembler();
1724 LocationSummary* locations = invoke->GetLocations();
1725
1726 Register out = RegisterFrom(locations->Out(), type); // Result.
1727 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
1728 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
1729 Register arg = RegisterFrom(locations->InAt(3), type); // New value or addend.
1730 Register tmp_ptr = XRegisterFrom(locations->GetTemp(0)); // Pointer to actual memory.
1731
1732 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1733 if (type == DataType::Type::kReference) {
1734 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
1735 // Mark card for object as a new value shall be stored.
1736 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
1737 codegen->MaybeMarkGCCard(base, /*value=*/arg, new_value_can_be_null);
1738 }
1739
1740 __ Add(tmp_ptr, base.X(), Operand(offset));
1741 GenerateGetAndUpdate(codegen,
1742 get_and_update_op,
1743 type,
1744 std::memory_order_seq_cst,
1745 tmp_ptr,
1746 arg,
1747 /*old_value=*/ out);
1748
1749 if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
1750 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
1751 if (kUseBakerReadBarrier) {
1752 codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), out.W());
1753 } else {
1754 codegen->GenerateReadBarrierSlow(
1755 invoke,
1756 Location::RegisterLocation(out.GetCode()),
1757 Location::RegisterLocation(out.GetCode()),
1758 Location::RegisterLocation(base.GetCode()),
1759 /*offset=*/ 0u,
1760 /*index=*/ Location::RegisterLocation(offset.GetCode()));
1761 }
1762 }
1763 }
1764
VisitUnsafeGetAndAddInt(HInvoke * invoke)1765 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
1766 VisitJdkUnsafeGetAndAddInt(invoke);
1767 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)1768 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
1769 VisitJdkUnsafeGetAndAddLong(invoke);
1770 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)1771 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
1772 VisitJdkUnsafeGetAndSetInt(invoke);
1773 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)1774 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
1775 VisitJdkUnsafeGetAndSetLong(invoke);
1776 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)1777 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
1778 VisitJdkUnsafeGetAndSetReference(invoke);
1779 }
1780
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)1781 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
1782 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
1783 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)1784 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
1785 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
1786 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)1787 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
1788 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
1789 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)1790 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
1791 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
1792 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)1793 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
1794 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
1795 }
1796
VisitUnsafeGetAndAddInt(HInvoke * invoke)1797 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
1798 VisitJdkUnsafeGetAndAddInt(invoke);
1799 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)1800 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
1801 VisitJdkUnsafeGetAndAddLong(invoke);
1802 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)1803 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
1804 VisitJdkUnsafeGetAndSetInt(invoke);
1805 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)1806 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
1807 VisitJdkUnsafeGetAndSetLong(invoke);
1808 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)1809 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
1810 VisitJdkUnsafeGetAndSetReference(invoke);
1811 }
1812
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)1813 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
1814 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
1815 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)1816 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
1817 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
1818 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)1819 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
1820 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
1821 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)1822 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
1823 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
1824 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)1825 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
1826 GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
1827 }
1828
VisitStringCompareTo(HInvoke * invoke)1829 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
1830 LocationSummary* locations =
1831 new (allocator_) LocationSummary(invoke,
1832 invoke->InputAt(1)->CanBeNull()
1833 ? LocationSummary::kCallOnSlowPath
1834 : LocationSummary::kNoCall,
1835 kIntrinsified);
1836 locations->SetInAt(0, Location::RequiresRegister());
1837 locations->SetInAt(1, Location::RequiresRegister());
1838 locations->AddTemp(Location::RequiresRegister());
1839 locations->AddTemp(Location::RequiresRegister());
1840 locations->AddTemp(Location::RequiresRegister());
1841 // Need temporary registers for String compression's feature.
1842 if (mirror::kUseStringCompression) {
1843 locations->AddTemp(Location::RequiresRegister());
1844 }
1845 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1846 }
1847
VisitStringCompareTo(HInvoke * invoke)1848 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
1849 MacroAssembler* masm = GetVIXLAssembler();
1850 LocationSummary* locations = invoke->GetLocations();
1851
1852 Register str = InputRegisterAt(invoke, 0);
1853 Register arg = InputRegisterAt(invoke, 1);
1854 DCHECK(str.IsW());
1855 DCHECK(arg.IsW());
1856 Register out = OutputRegister(invoke);
1857
1858 Register temp0 = WRegisterFrom(locations->GetTemp(0));
1859 Register temp1 = WRegisterFrom(locations->GetTemp(1));
1860 Register temp2 = WRegisterFrom(locations->GetTemp(2));
1861 Register temp3;
1862 if (mirror::kUseStringCompression) {
1863 temp3 = WRegisterFrom(locations->GetTemp(3));
1864 }
1865
1866 vixl::aarch64::Label loop;
1867 vixl::aarch64::Label find_char_diff;
1868 vixl::aarch64::Label end;
1869 vixl::aarch64::Label different_compression;
1870
1871 // Get offsets of count and value fields within a string object.
1872 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1873 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1874
1875 // Note that the null check must have been done earlier.
1876 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1877
1878 // Take slow path and throw if input can be and is null.
1879 SlowPathCodeARM64* slow_path = nullptr;
1880 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1881 if (can_slow_path) {
1882 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1883 codegen_->AddSlowPath(slow_path);
1884 __ Cbz(arg, slow_path->GetEntryLabel());
1885 }
1886
1887 // Reference equality check, return 0 if same reference.
1888 __ Subs(out, str, arg);
1889 __ B(&end, eq);
1890
1891 if (mirror::kUseStringCompression) {
1892 // Load `count` fields of this and argument strings.
1893 __ Ldr(temp3, HeapOperand(str, count_offset));
1894 __ Ldr(temp2, HeapOperand(arg, count_offset));
1895 // Clean out compression flag from lengths.
1896 __ Lsr(temp0, temp3, 1u);
1897 __ Lsr(temp1, temp2, 1u);
1898 } else {
1899 // Load lengths of this and argument strings.
1900 __ Ldr(temp0, HeapOperand(str, count_offset));
1901 __ Ldr(temp1, HeapOperand(arg, count_offset));
1902 }
1903 // out = length diff.
1904 __ Subs(out, temp0, temp1);
1905 // temp0 = min(len(str), len(arg)).
1906 __ Csel(temp0, temp1, temp0, ge);
1907 // Shorter string is empty?
1908 __ Cbz(temp0, &end);
1909
1910 if (mirror::kUseStringCompression) {
1911 // Check if both strings using same compression style to use this comparison loop.
1912 __ Eor(temp2, temp2, Operand(temp3));
1913 // Interleave with compression flag extraction which is needed for both paths
1914 // and also set flags which is needed only for the different compressions path.
1915 __ Ands(temp3.W(), temp3.W(), Operand(1));
1916 __ Tbnz(temp2, 0, &different_compression); // Does not use flags.
1917 }
1918 // Store offset of string value in preparation for comparison loop.
1919 __ Mov(temp1, value_offset);
1920 if (mirror::kUseStringCompression) {
1921 // For string compression, calculate the number of bytes to compare (not chars).
1922 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1923 __ Lsl(temp0, temp0, temp3);
1924 }
1925
1926 UseScratchRegisterScope scratch_scope(masm);
1927 Register temp4 = scratch_scope.AcquireX();
1928
1929 // Assertions that must hold in order to compare strings 8 bytes at a time.
1930 DCHECK_ALIGNED(value_offset, 8);
1931 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1932
1933 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1934 DCHECK_EQ(char_size, 2u);
1935
1936 // Promote temp2 to an X reg, ready for LDR.
1937 temp2 = temp2.X();
1938
1939 // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
1940 __ Bind(&loop);
1941 __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
1942 __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
1943 __ Cmp(temp4, temp2);
1944 __ B(ne, &find_char_diff);
1945 __ Add(temp1, temp1, char_size * 4);
1946 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1947 __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
1948 __ B(&loop, hi);
1949 __ B(&end);
1950
1951 // Promote temp1 to an X reg, ready for EOR.
1952 temp1 = temp1.X();
1953
1954 // Find the single character difference.
1955 __ Bind(&find_char_diff);
1956 // Get the bit position of the first character that differs.
1957 __ Eor(temp1, temp2, temp4);
1958 __ Rbit(temp1, temp1);
1959 __ Clz(temp1, temp1);
1960
1961 // If the number of chars remaining <= the index where the difference occurs (0-3), then
1962 // the difference occurs outside the remaining string data, so just return length diff (out).
1963 // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
1964 // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
1965 // unsigned when string compression is disabled.
1966 // When it's enabled, the comparison must be unsigned.
1967 __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
1968 __ B(ls, &end);
1969
1970 // Extract the characters and calculate the difference.
1971 if (mirror:: kUseStringCompression) {
1972 __ Bic(temp1, temp1, 0x7);
1973 __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
1974 } else {
1975 __ Bic(temp1, temp1, 0xf);
1976 }
1977 __ Lsr(temp2, temp2, temp1);
1978 __ Lsr(temp4, temp4, temp1);
1979 if (mirror::kUseStringCompression) {
1980 // Prioritize the case of compressed strings and calculate such result first.
1981 __ Uxtb(temp1, temp4);
1982 __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
1983 __ Tbz(temp3, 0u, &end); // If actually compressed, we're done.
1984 }
1985 __ Uxth(temp4, temp4);
1986 __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
1987
1988 if (mirror::kUseStringCompression) {
1989 __ B(&end);
1990 __ Bind(&different_compression);
1991
1992 // Comparison for different compression style.
1993 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1994 DCHECK_EQ(c_char_size, 1u);
1995 temp1 = temp1.W();
1996 temp2 = temp2.W();
1997 temp4 = temp4.W();
1998
1999 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
2000 // Note that flags have been set by the `str` compression flag extraction to `temp3`
2001 // before branching to the `different_compression` label.
2002 __ Csel(temp1, str, arg, eq); // Pointer to the compressed string.
2003 __ Csel(temp2, str, arg, ne); // Pointer to the uncompressed string.
2004
2005 // We want to free up the temp3, currently holding `str` compression flag, for comparison.
2006 // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
2007 // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
2008 // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
2009 __ Lsl(temp0, temp0, 1u);
2010
2011 // Adjust temp1 and temp2 from string pointers to data pointers.
2012 __ Add(temp1, temp1, Operand(value_offset));
2013 __ Add(temp2, temp2, Operand(value_offset));
2014
2015 // Complete the move of the compression flag.
2016 __ Sub(temp0, temp0, Operand(temp3));
2017
2018 vixl::aarch64::Label different_compression_loop;
2019 vixl::aarch64::Label different_compression_diff;
2020
2021 __ Bind(&different_compression_loop);
2022 __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
2023 __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
2024 __ Subs(temp4, temp4, Operand(temp3));
2025 __ B(&different_compression_diff, ne);
2026 __ Subs(temp0, temp0, 2);
2027 __ B(&different_compression_loop, hi);
2028 __ B(&end);
2029
2030 // Calculate the difference.
2031 __ Bind(&different_compression_diff);
2032 __ Tst(temp0, Operand(1));
2033 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2034 "Expecting 0=compressed, 1=uncompressed");
2035 __ Cneg(out, temp4, ne);
2036 }
2037
2038 __ Bind(&end);
2039
2040 if (can_slow_path) {
2041 __ Bind(slow_path->GetExitLabel());
2042 }
2043 }
2044
2045 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
2046 // The normal loop plus the pre-header is 9 instructions without string compression and 12
2047 // instructions with string compression. We can compare up to 8 bytes in 4 instructions
2048 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
2049 // to 10 instructions for the unrolled loop.
2050 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
2051
GetConstString(HInstruction * candidate,uint32_t * utf16_length)2052 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
2053 if (candidate->IsLoadString()) {
2054 HLoadString* load_string = candidate->AsLoadString();
2055 const DexFile& dex_file = load_string->GetDexFile();
2056 return dex_file.GetStringDataAndUtf16Length(load_string->GetStringIndex(), utf16_length);
2057 }
2058 return nullptr;
2059 }
2060
VisitStringEquals(HInvoke * invoke)2061 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
2062 LocationSummary* locations =
2063 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2064 locations->SetInAt(0, Location::RequiresRegister());
2065 locations->SetInAt(1, Location::RequiresRegister());
2066
2067 // For the generic implementation and for long const strings we need a temporary.
2068 // We do not need it for short const strings, up to 8 bytes, see code generation below.
2069 uint32_t const_string_length = 0u;
2070 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
2071 if (const_string == nullptr) {
2072 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
2073 }
2074 bool is_compressed =
2075 mirror::kUseStringCompression &&
2076 const_string != nullptr &&
2077 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
2078 if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
2079 locations->AddTemp(Location::RequiresRegister());
2080 }
2081
2082 // TODO: If the String.equals() is used only for an immediately following HIf, we can
2083 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
2084 // Then we shall need an extra temporary register instead of the output register.
2085 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2086 }
2087
VisitStringEquals(HInvoke * invoke)2088 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
2089 MacroAssembler* masm = GetVIXLAssembler();
2090 LocationSummary* locations = invoke->GetLocations();
2091
2092 Register str = WRegisterFrom(locations->InAt(0));
2093 Register arg = WRegisterFrom(locations->InAt(1));
2094 Register out = XRegisterFrom(locations->Out());
2095
2096 UseScratchRegisterScope scratch_scope(masm);
2097 Register temp = scratch_scope.AcquireW();
2098 Register temp1 = scratch_scope.AcquireW();
2099
2100 vixl::aarch64::Label loop;
2101 vixl::aarch64::Label end;
2102 vixl::aarch64::Label return_true;
2103 vixl::aarch64::Label return_false;
2104
2105 // Get offsets of count, value, and class fields within a string object.
2106 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
2107 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
2108 const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2109
2110 // Note that the null check must have been done earlier.
2111 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
2112
2113 StringEqualsOptimizations optimizations(invoke);
2114 if (!optimizations.GetArgumentNotNull()) {
2115 // Check if input is null, return false if it is.
2116 __ Cbz(arg, &return_false);
2117 }
2118
2119 // Reference equality check, return true if same reference.
2120 __ Cmp(str, arg);
2121 __ B(&return_true, eq);
2122
2123 if (!optimizations.GetArgumentIsString()) {
2124 // Instanceof check for the argument by comparing class fields.
2125 // All string objects must have the same type since String cannot be subclassed.
2126 // Receiver must be a string object, so its class field is equal to all strings' class fields.
2127 // If the argument is a string object, its class field must be equal to receiver's class field.
2128 //
2129 // As the String class is expected to be non-movable, we can read the class
2130 // field from String.equals' arguments without read barriers.
2131 AssertNonMovableStringClass();
2132 // /* HeapReference<Class> */ temp = str->klass_
2133 __ Ldr(temp, MemOperand(str.X(), class_offset));
2134 // /* HeapReference<Class> */ temp1 = arg->klass_
2135 __ Ldr(temp1, MemOperand(arg.X(), class_offset));
2136 // Also, because we use the previously loaded class references only in the
2137 // following comparison, we don't need to unpoison them.
2138 __ Cmp(temp, temp1);
2139 __ B(&return_false, ne);
2140 }
2141
2142 // Check if one of the inputs is a const string. Do not special-case both strings
2143 // being const, such cases should be handled by constant folding if needed.
2144 uint32_t const_string_length = 0u;
2145 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
2146 if (const_string == nullptr) {
2147 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
2148 if (const_string != nullptr) {
2149 std::swap(str, arg); // Make sure the const string is in `str`.
2150 }
2151 }
2152 bool is_compressed =
2153 mirror::kUseStringCompression &&
2154 const_string != nullptr &&
2155 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
2156
2157 if (const_string != nullptr) {
2158 // Load `count` field of the argument string and check if it matches the const string.
2159 // Also compares the compression style, if differs return false.
2160 __ Ldr(temp, MemOperand(arg.X(), count_offset));
2161 // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
2162 scratch_scope.Release(temp1);
2163 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
2164 temp1 = scratch_scope.AcquireW();
2165 __ B(&return_false, ne);
2166 } else {
2167 // Load `count` fields of this and argument strings.
2168 __ Ldr(temp, MemOperand(str.X(), count_offset));
2169 __ Ldr(temp1, MemOperand(arg.X(), count_offset));
2170 // Check if `count` fields are equal, return false if they're not.
2171 // Also compares the compression style, if differs return false.
2172 __ Cmp(temp, temp1);
2173 __ B(&return_false, ne);
2174 }
2175
2176 // Assertions that must hold in order to compare strings 8 bytes at a time.
2177 // Ok to do this because strings are zero-padded to kObjectAlignment.
2178 DCHECK_ALIGNED(value_offset, 8);
2179 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
2180
2181 if (const_string != nullptr &&
2182 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
2183 : kShortConstStringEqualsCutoffInBytes / 2u)) {
2184 // Load and compare the contents. Though we know the contents of the short const string
2185 // at compile time, materializing constants may be more code than loading from memory.
2186 int32_t offset = value_offset;
2187 size_t remaining_bytes =
2188 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
2189 temp = temp.X();
2190 temp1 = temp1.X();
2191 while (remaining_bytes > sizeof(uint64_t)) {
2192 Register temp2 = XRegisterFrom(locations->GetTemp(0));
2193 __ Ldp(temp, temp1, MemOperand(str.X(), offset));
2194 __ Ldp(temp2, out, MemOperand(arg.X(), offset));
2195 __ Cmp(temp, temp2);
2196 __ Ccmp(temp1, out, NoFlag, eq);
2197 __ B(&return_false, ne);
2198 offset += 2u * sizeof(uint64_t);
2199 remaining_bytes -= 2u * sizeof(uint64_t);
2200 }
2201 if (remaining_bytes != 0u) {
2202 __ Ldr(temp, MemOperand(str.X(), offset));
2203 __ Ldr(temp1, MemOperand(arg.X(), offset));
2204 __ Cmp(temp, temp1);
2205 __ B(&return_false, ne);
2206 }
2207 } else {
2208 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
2209 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2210 "Expecting 0=compressed, 1=uncompressed");
2211 __ Cbz(temp, &return_true);
2212
2213 if (mirror::kUseStringCompression) {
2214 // For string compression, calculate the number of bytes to compare (not chars).
2215 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
2216 __ And(temp1, temp, Operand(1)); // Extract compression flag.
2217 __ Lsr(temp, temp, 1u); // Extract length.
2218 __ Lsl(temp, temp, temp1); // Calculate number of bytes to compare.
2219 }
2220
2221 // Store offset of string value in preparation for comparison loop
2222 __ Mov(temp1, value_offset);
2223
2224 temp1 = temp1.X();
2225 Register temp2 = XRegisterFrom(locations->GetTemp(0));
2226 // Loop to compare strings 8 bytes at a time starting at the front of the string.
2227 __ Bind(&loop);
2228 __ Ldr(out, MemOperand(str.X(), temp1));
2229 __ Ldr(temp2, MemOperand(arg.X(), temp1));
2230 __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
2231 __ Cmp(out, temp2);
2232 __ B(&return_false, ne);
2233 // With string compression, we have compared 8 bytes, otherwise 4 chars.
2234 __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
2235 __ B(&loop, hi);
2236 }
2237
2238 // Return true and exit the function.
2239 // If loop does not result in returning false, we return true.
2240 __ Bind(&return_true);
2241 __ Mov(out, 1);
2242 __ B(&end);
2243
2244 // Return false and exit the function.
2245 __ Bind(&return_false);
2246 __ Mov(out, 0);
2247 __ Bind(&end);
2248 }
2249
GenerateVisitStringIndexOf(HInvoke * invoke,MacroAssembler * masm,CodeGeneratorARM64 * codegen,bool start_at_zero)2250 static void GenerateVisitStringIndexOf(HInvoke* invoke,
2251 MacroAssembler* masm,
2252 CodeGeneratorARM64* codegen,
2253 bool start_at_zero) {
2254 LocationSummary* locations = invoke->GetLocations();
2255
2256 // Note that the null check must have been done earlier.
2257 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
2258
2259 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
2260 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
2261 SlowPathCodeARM64* slow_path = nullptr;
2262 HInstruction* code_point = invoke->InputAt(1);
2263 if (code_point->IsIntConstant()) {
2264 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
2265 // Always needs the slow-path. We could directly dispatch to it, but this case should be
2266 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
2267 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2268 codegen->AddSlowPath(slow_path);
2269 __ B(slow_path->GetEntryLabel());
2270 __ Bind(slow_path->GetExitLabel());
2271 return;
2272 }
2273 } else if (code_point->GetType() != DataType::Type::kUint16) {
2274 Register char_reg = WRegisterFrom(locations->InAt(1));
2275 __ Tst(char_reg, 0xFFFF0000);
2276 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2277 codegen->AddSlowPath(slow_path);
2278 __ B(ne, slow_path->GetEntryLabel());
2279 }
2280
2281 if (start_at_zero) {
2282 // Start-index = 0.
2283 Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
2284 __ Mov(tmp_reg, 0);
2285 }
2286
2287 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
2288 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
2289
2290 if (slow_path != nullptr) {
2291 __ Bind(slow_path->GetExitLabel());
2292 }
2293 }
2294
VisitStringIndexOf(HInvoke * invoke)2295 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
2296 LocationSummary* locations = new (allocator_) LocationSummary(
2297 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2298 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2299 // best to align the inputs accordingly.
2300 InvokeRuntimeCallingConvention calling_convention;
2301 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2302 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2303 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
2304
2305 // Need to send start_index=0.
2306 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2307 }
2308
VisitStringIndexOf(HInvoke * invoke)2309 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
2310 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true);
2311 }
2312
VisitStringIndexOfAfter(HInvoke * invoke)2313 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
2314 LocationSummary* locations = new (allocator_) LocationSummary(
2315 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2316 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2317 // best to align the inputs accordingly.
2318 InvokeRuntimeCallingConvention calling_convention;
2319 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2320 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2321 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2322 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
2323 }
2324
VisitStringIndexOfAfter(HInvoke * invoke)2325 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
2326 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false);
2327 }
2328
VisitStringNewStringFromBytes(HInvoke * invoke)2329 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
2330 LocationSummary* locations = new (allocator_) LocationSummary(
2331 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2332 InvokeRuntimeCallingConvention calling_convention;
2333 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2334 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2335 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2336 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
2337 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
2338 }
2339
VisitStringNewStringFromBytes(HInvoke * invoke)2340 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
2341 MacroAssembler* masm = GetVIXLAssembler();
2342 LocationSummary* locations = invoke->GetLocations();
2343
2344 Register byte_array = WRegisterFrom(locations->InAt(0));
2345 __ Cmp(byte_array, 0);
2346 SlowPathCodeARM64* slow_path =
2347 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2348 codegen_->AddSlowPath(slow_path);
2349 __ B(eq, slow_path->GetEntryLabel());
2350
2351 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
2352 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
2353 __ Bind(slow_path->GetExitLabel());
2354 }
2355
VisitStringNewStringFromChars(HInvoke * invoke)2356 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
2357 LocationSummary* locations =
2358 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2359 InvokeRuntimeCallingConvention calling_convention;
2360 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2361 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2362 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2363 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
2364 }
2365
VisitStringNewStringFromChars(HInvoke * invoke)2366 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
2367 // No need to emit code checking whether `locations->InAt(2)` is a null
2368 // pointer, as callers of the native method
2369 //
2370 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
2371 //
2372 // all include a null check on `data` before calling that method.
2373 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
2374 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
2375 }
2376
VisitStringNewStringFromString(HInvoke * invoke)2377 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
2378 LocationSummary* locations = new (allocator_) LocationSummary(
2379 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2380 InvokeRuntimeCallingConvention calling_convention;
2381 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2382 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
2383 }
2384
VisitStringNewStringFromString(HInvoke * invoke)2385 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
2386 MacroAssembler* masm = GetVIXLAssembler();
2387 LocationSummary* locations = invoke->GetLocations();
2388
2389 Register string_to_copy = WRegisterFrom(locations->InAt(0));
2390 __ Cmp(string_to_copy, 0);
2391 SlowPathCodeARM64* slow_path =
2392 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2393 codegen_->AddSlowPath(slow_path);
2394 __ B(eq, slow_path->GetEntryLabel());
2395
2396 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
2397 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
2398 __ Bind(slow_path->GetExitLabel());
2399 }
2400
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2401 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2402 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2403 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2404 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2405
2406 LocationSummary* const locations =
2407 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2408 InvokeRuntimeCallingConvention calling_convention;
2409
2410 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2411 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2412 }
2413
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2414 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2415 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2416 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2417 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
2418 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2419
2420 LocationSummary* const locations =
2421 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2422 InvokeRuntimeCallingConvention calling_convention;
2423
2424 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2425 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
2426 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2427 }
2428
CreateFPFPFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)2429 static void CreateFPFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2430 DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
2431 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2432 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
2433 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(2)->GetType()));
2434 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2435
2436 LocationSummary* const locations =
2437 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2438
2439 locations->SetInAt(0, Location::RequiresFpuRegister());
2440 locations->SetInAt(1, Location::RequiresFpuRegister());
2441 locations->SetInAt(2, Location::RequiresFpuRegister());
2442 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2443 }
2444
GenFPToFPCall(HInvoke * invoke,CodeGeneratorARM64 * codegen,QuickEntrypointEnum entry)2445 static void GenFPToFPCall(HInvoke* invoke,
2446 CodeGeneratorARM64* codegen,
2447 QuickEntrypointEnum entry) {
2448 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2449 }
2450
VisitMathCos(HInvoke * invoke)2451 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
2452 CreateFPToFPCallLocations(allocator_, invoke);
2453 }
2454
VisitMathCos(HInvoke * invoke)2455 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
2456 GenFPToFPCall(invoke, codegen_, kQuickCos);
2457 }
2458
VisitMathSin(HInvoke * invoke)2459 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
2460 CreateFPToFPCallLocations(allocator_, invoke);
2461 }
2462
VisitMathSin(HInvoke * invoke)2463 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
2464 GenFPToFPCall(invoke, codegen_, kQuickSin);
2465 }
2466
VisitMathAcos(HInvoke * invoke)2467 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
2468 CreateFPToFPCallLocations(allocator_, invoke);
2469 }
2470
VisitMathAcos(HInvoke * invoke)2471 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
2472 GenFPToFPCall(invoke, codegen_, kQuickAcos);
2473 }
2474
VisitMathAsin(HInvoke * invoke)2475 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
2476 CreateFPToFPCallLocations(allocator_, invoke);
2477 }
2478
VisitMathAsin(HInvoke * invoke)2479 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
2480 GenFPToFPCall(invoke, codegen_, kQuickAsin);
2481 }
2482
VisitMathAtan(HInvoke * invoke)2483 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
2484 CreateFPToFPCallLocations(allocator_, invoke);
2485 }
2486
VisitMathAtan(HInvoke * invoke)2487 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
2488 GenFPToFPCall(invoke, codegen_, kQuickAtan);
2489 }
2490
VisitMathCbrt(HInvoke * invoke)2491 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
2492 CreateFPToFPCallLocations(allocator_, invoke);
2493 }
2494
VisitMathCbrt(HInvoke * invoke)2495 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
2496 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
2497 }
2498
VisitMathCosh(HInvoke * invoke)2499 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
2500 CreateFPToFPCallLocations(allocator_, invoke);
2501 }
2502
VisitMathCosh(HInvoke * invoke)2503 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
2504 GenFPToFPCall(invoke, codegen_, kQuickCosh);
2505 }
2506
VisitMathExp(HInvoke * invoke)2507 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
2508 CreateFPToFPCallLocations(allocator_, invoke);
2509 }
2510
VisitMathExp(HInvoke * invoke)2511 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
2512 GenFPToFPCall(invoke, codegen_, kQuickExp);
2513 }
2514
VisitMathExpm1(HInvoke * invoke)2515 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
2516 CreateFPToFPCallLocations(allocator_, invoke);
2517 }
2518
VisitMathExpm1(HInvoke * invoke)2519 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
2520 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
2521 }
2522
VisitMathLog(HInvoke * invoke)2523 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
2524 CreateFPToFPCallLocations(allocator_, invoke);
2525 }
2526
VisitMathLog(HInvoke * invoke)2527 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
2528 GenFPToFPCall(invoke, codegen_, kQuickLog);
2529 }
2530
VisitMathLog10(HInvoke * invoke)2531 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
2532 CreateFPToFPCallLocations(allocator_, invoke);
2533 }
2534
VisitMathLog10(HInvoke * invoke)2535 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
2536 GenFPToFPCall(invoke, codegen_, kQuickLog10);
2537 }
2538
VisitMathSinh(HInvoke * invoke)2539 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
2540 CreateFPToFPCallLocations(allocator_, invoke);
2541 }
2542
VisitMathSinh(HInvoke * invoke)2543 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
2544 GenFPToFPCall(invoke, codegen_, kQuickSinh);
2545 }
2546
VisitMathTan(HInvoke * invoke)2547 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
2548 CreateFPToFPCallLocations(allocator_, invoke);
2549 }
2550
VisitMathTan(HInvoke * invoke)2551 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
2552 GenFPToFPCall(invoke, codegen_, kQuickTan);
2553 }
2554
VisitMathTanh(HInvoke * invoke)2555 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
2556 CreateFPToFPCallLocations(allocator_, invoke);
2557 }
2558
VisitMathTanh(HInvoke * invoke)2559 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
2560 GenFPToFPCall(invoke, codegen_, kQuickTanh);
2561 }
2562
VisitMathAtan2(HInvoke * invoke)2563 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
2564 CreateFPFPToFPCallLocations(allocator_, invoke);
2565 }
2566
VisitMathAtan2(HInvoke * invoke)2567 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
2568 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
2569 }
2570
VisitMathPow(HInvoke * invoke)2571 void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
2572 CreateFPFPToFPCallLocations(allocator_, invoke);
2573 }
2574
VisitMathPow(HInvoke * invoke)2575 void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
2576 GenFPToFPCall(invoke, codegen_, kQuickPow);
2577 }
2578
VisitMathHypot(HInvoke * invoke)2579 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
2580 CreateFPFPToFPCallLocations(allocator_, invoke);
2581 }
2582
VisitMathHypot(HInvoke * invoke)2583 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
2584 GenFPToFPCall(invoke, codegen_, kQuickHypot);
2585 }
2586
VisitMathNextAfter(HInvoke * invoke)2587 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
2588 CreateFPFPToFPCallLocations(allocator_, invoke);
2589 }
2590
VisitMathNextAfter(HInvoke * invoke)2591 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
2592 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
2593 }
2594
VisitStringGetCharsNoCheck(HInvoke * invoke)2595 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2596 LocationSummary* locations =
2597 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2598 locations->SetInAt(0, Location::RequiresRegister());
2599 locations->SetInAt(1, Location::RequiresRegister());
2600 locations->SetInAt(2, Location::RequiresRegister());
2601 locations->SetInAt(3, Location::RequiresRegister());
2602 locations->SetInAt(4, Location::RequiresRegister());
2603
2604 locations->AddTemp(Location::RequiresRegister());
2605 locations->AddTemp(Location::RequiresRegister());
2606 locations->AddTemp(Location::RequiresRegister());
2607 }
2608
VisitStringGetCharsNoCheck(HInvoke * invoke)2609 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2610 MacroAssembler* masm = GetVIXLAssembler();
2611 LocationSummary* locations = invoke->GetLocations();
2612
2613 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2614 const size_t char_size = DataType::Size(DataType::Type::kUint16);
2615 DCHECK_EQ(char_size, 2u);
2616
2617 // Location of data in char array buffer.
2618 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2619
2620 // Location of char array data in string.
2621 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2622
2623 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2624 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2625 Register srcObj = XRegisterFrom(locations->InAt(0));
2626 Register srcBegin = XRegisterFrom(locations->InAt(1));
2627 Register srcEnd = XRegisterFrom(locations->InAt(2));
2628 Register dstObj = XRegisterFrom(locations->InAt(3));
2629 Register dstBegin = XRegisterFrom(locations->InAt(4));
2630
2631 Register src_ptr = XRegisterFrom(locations->GetTemp(0));
2632 Register num_chr = XRegisterFrom(locations->GetTemp(1));
2633 Register tmp1 = XRegisterFrom(locations->GetTemp(2));
2634
2635 UseScratchRegisterScope temps(masm);
2636 Register dst_ptr = temps.AcquireX();
2637 Register tmp2 = temps.AcquireX();
2638
2639 vixl::aarch64::Label done;
2640 vixl::aarch64::Label compressed_string_vector_loop;
2641 vixl::aarch64::Label compressed_string_remainder;
2642 __ Sub(num_chr, srcEnd, srcBegin);
2643 // Early out for valid zero-length retrievals.
2644 __ Cbz(num_chr, &done);
2645
2646 // dst address start to copy to.
2647 __ Add(dst_ptr, dstObj, Operand(data_offset));
2648 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
2649
2650 // src address to copy from.
2651 __ Add(src_ptr, srcObj, Operand(value_offset));
2652 vixl::aarch64::Label compressed_string_preloop;
2653 if (mirror::kUseStringCompression) {
2654 // Location of count in string.
2655 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2656 // String's length.
2657 __ Ldr(tmp2, MemOperand(srcObj, count_offset));
2658 __ Tbz(tmp2, 0, &compressed_string_preloop);
2659 }
2660 __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
2661
2662 // Do the copy.
2663 vixl::aarch64::Label loop;
2664 vixl::aarch64::Label remainder;
2665
2666 // Save repairing the value of num_chr on the < 8 character path.
2667 __ Subs(tmp1, num_chr, 8);
2668 __ B(lt, &remainder);
2669
2670 // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2671 __ Mov(num_chr, tmp1);
2672
2673 // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
2674 // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
2675 __ Bind(&loop);
2676 __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
2677 __ Subs(num_chr, num_chr, 8);
2678 __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
2679 __ B(ge, &loop);
2680
2681 __ Adds(num_chr, num_chr, 8);
2682 __ B(eq, &done);
2683
2684 // Main loop for < 8 character case and remainder handling. Loads and stores one
2685 // 16-bit Java character at a time.
2686 __ Bind(&remainder);
2687 __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
2688 __ Subs(num_chr, num_chr, 1);
2689 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2690 __ B(gt, &remainder);
2691 __ B(&done);
2692
2693 if (mirror::kUseStringCompression) {
2694 // For compressed strings, acquire a SIMD temporary register.
2695 VRegister vtmp1 = temps.AcquireVRegisterOfSize(kQRegSize);
2696 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2697 DCHECK_EQ(c_char_size, 1u);
2698 __ Bind(&compressed_string_preloop);
2699 __ Add(src_ptr, src_ptr, Operand(srcBegin));
2700
2701 // Save repairing the value of num_chr on the < 8 character path.
2702 __ Subs(tmp1, num_chr, 8);
2703 __ B(lt, &compressed_string_remainder);
2704
2705 // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2706 __ Mov(num_chr, tmp1);
2707
2708 // Main loop for compressed src, copying 8 characters (8-bit) to (16-bit) at a time.
2709 // Uses SIMD instructions.
2710 __ Bind(&compressed_string_vector_loop);
2711 __ Ld1(vtmp1.V8B(), MemOperand(src_ptr, c_char_size * 8, PostIndex));
2712 __ Subs(num_chr, num_chr, 8);
2713 __ Uxtl(vtmp1.V8H(), vtmp1.V8B());
2714 __ St1(vtmp1.V8H(), MemOperand(dst_ptr, char_size * 8, PostIndex));
2715 __ B(ge, &compressed_string_vector_loop);
2716
2717 __ Adds(num_chr, num_chr, 8);
2718 __ B(eq, &done);
2719
2720 // Loop for < 8 character case and remainder handling with a compressed src.
2721 // Copies 1 character (8-bit) to (16-bit) at a time.
2722 __ Bind(&compressed_string_remainder);
2723 __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
2724 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2725 __ Subs(num_chr, num_chr, Operand(1));
2726 __ B(gt, &compressed_string_remainder);
2727 }
2728
2729 __ Bind(&done);
2730 }
2731
2732 // This value is greater than ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore,
2733 // so if we choose to jump to the slow path we will end up in the native implementation.
2734 static constexpr int32_t kSystemArrayCopyCharThreshold = 192;
2735
LocationForSystemArrayCopyInput(HInstruction * input)2736 static Location LocationForSystemArrayCopyInput(HInstruction* input) {
2737 HIntConstant* const_input = input->AsIntConstantOrNull();
2738 if (const_input != nullptr && vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
2739 return Location::ConstantLocation(const_input);
2740 } else {
2741 return Location::RequiresRegister();
2742 }
2743 }
2744
VisitSystemArrayCopyChar(HInvoke * invoke)2745 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2746 // Check to see if we have known failures that will cause us to have to bail out
2747 // to the runtime, and just generate the runtime call directly.
2748 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
2749 HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstantOrNull();
2750
2751 // The positions must be non-negative.
2752 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2753 (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2754 // We will have to fail anyways.
2755 return;
2756 }
2757
2758 // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2759 // native implementation.
2760 HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
2761 if (length != nullptr) {
2762 int32_t len = length->GetValue();
2763 if (len < 0 || len > kSystemArrayCopyCharThreshold) {
2764 // Just call as normal.
2765 return;
2766 }
2767 }
2768
2769 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2770 LocationSummary* locations =
2771 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2772 // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2773 locations->SetInAt(0, Location::RequiresRegister());
2774 locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
2775 locations->SetInAt(2, Location::RequiresRegister());
2776 locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
2777 locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
2778
2779 locations->AddTemp(Location::RequiresRegister());
2780 locations->AddTemp(Location::RequiresRegister());
2781 locations->AddTemp(Location::RequiresRegister());
2782 }
2783
CheckSystemArrayCopyPosition(MacroAssembler * masm,Register array,Location pos,Location length,SlowPathCodeARM64 * slow_path,Register temp,bool length_is_array_length,bool position_sign_checked)2784 static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
2785 Register array,
2786 Location pos,
2787 Location length,
2788 SlowPathCodeARM64* slow_path,
2789 Register temp,
2790 bool length_is_array_length,
2791 bool position_sign_checked) {
2792 const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2793 if (pos.IsConstant()) {
2794 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2795 if (pos_const == 0) {
2796 if (!length_is_array_length) {
2797 // Check that length(array) >= length.
2798 __ Ldr(temp, MemOperand(array, length_offset));
2799 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2800 __ B(slow_path->GetEntryLabel(), lt);
2801 }
2802 } else {
2803 // Calculate length(array) - pos.
2804 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
2805 // as `int32_t`. If the result is negative, the B.LT below shall go to the slow path.
2806 __ Ldr(temp, MemOperand(array, length_offset));
2807 __ Sub(temp, temp, pos_const);
2808
2809 // Check that (length(array) - pos) >= length.
2810 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2811 __ B(slow_path->GetEntryLabel(), lt);
2812 }
2813 } else if (length_is_array_length) {
2814 // The only way the copy can succeed is if pos is zero.
2815 __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
2816 } else {
2817 // Check that pos >= 0.
2818 Register pos_reg = WRegisterFrom(pos);
2819 if (!position_sign_checked) {
2820 __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
2821 }
2822
2823 // Calculate length(array) - pos.
2824 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
2825 // as `int32_t`. If the result is negative, the B.LT below shall go to the slow path.
2826 __ Ldr(temp, MemOperand(array, length_offset));
2827 __ Sub(temp, temp, pos_reg);
2828
2829 // Check that (length(array) - pos) >= length.
2830 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2831 __ B(slow_path->GetEntryLabel(), lt);
2832 }
2833 }
2834
GenArrayAddress(MacroAssembler * masm,Register dest,Register base,Location pos,DataType::Type type,int32_t data_offset)2835 static void GenArrayAddress(MacroAssembler* masm,
2836 Register dest,
2837 Register base,
2838 Location pos,
2839 DataType::Type type,
2840 int32_t data_offset) {
2841 if (pos.IsConstant()) {
2842 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2843 __ Add(dest, base, DataType::Size(type) * constant + data_offset);
2844 } else {
2845 if (data_offset != 0) {
2846 __ Add(dest, base, data_offset);
2847 base = dest;
2848 }
2849 __ Add(dest, base, Operand(XRegisterFrom(pos), LSL, DataType::SizeShift(type)));
2850 }
2851 }
2852
2853 // Compute base source address, base destination address, and end
2854 // source address for System.arraycopy* intrinsics in `src_base`,
2855 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(MacroAssembler * masm,DataType::Type type,Register src,Location src_pos,Register dst,Location dst_pos,Location copy_length,Register src_base,Register dst_base,Register src_end)2856 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
2857 DataType::Type type,
2858 Register src,
2859 Location src_pos,
2860 Register dst,
2861 Location dst_pos,
2862 Location copy_length,
2863 Register src_base,
2864 Register dst_base,
2865 Register src_end) {
2866 // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
2867 DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
2868 << "Unexpected element type: " << type;
2869 const int32_t element_size = DataType::Size(type);
2870 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2871
2872 GenArrayAddress(masm, src_base, src, src_pos, type, data_offset);
2873 GenArrayAddress(masm, dst_base, dst, dst_pos, type, data_offset);
2874 if (src_end.IsValid()) {
2875 GenArrayAddress(masm, src_end, src_base, copy_length, type, /*data_offset=*/ 0);
2876 }
2877 }
2878
VisitSystemArrayCopyChar(HInvoke * invoke)2879 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2880 MacroAssembler* masm = GetVIXLAssembler();
2881 LocationSummary* locations = invoke->GetLocations();
2882 Register src = XRegisterFrom(locations->InAt(0));
2883 Location src_pos = locations->InAt(1);
2884 Register dst = XRegisterFrom(locations->InAt(2));
2885 Location dst_pos = locations->InAt(3);
2886 Location length = locations->InAt(4);
2887
2888 SlowPathCodeARM64* slow_path =
2889 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2890 codegen_->AddSlowPath(slow_path);
2891
2892 // If source and destination are the same, take the slow path. Overlapping copy regions must be
2893 // copied in reverse and we can't know in all cases if it's needed.
2894 __ Cmp(src, dst);
2895 __ B(slow_path->GetEntryLabel(), eq);
2896
2897 // Bail out if the source is null.
2898 __ Cbz(src, slow_path->GetEntryLabel());
2899
2900 // Bail out if the destination is null.
2901 __ Cbz(dst, slow_path->GetEntryLabel());
2902
2903 if (!length.IsConstant()) {
2904 // Merge the following two comparisons into one:
2905 // If the length is negative, bail out (delegate to libcore's native implementation).
2906 // If the length > kSystemArrayCopyCharThreshold then (currently) prefer libcore's
2907 // native implementation.
2908 __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
2909 __ B(slow_path->GetEntryLabel(), hi);
2910 } else {
2911 // We have already checked in the LocationsBuilder for the constant case.
2912 DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2913 DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), kSystemArrayCopyCharThreshold);
2914 }
2915
2916 Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
2917 Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
2918 Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
2919
2920 CheckSystemArrayCopyPosition(masm,
2921 src,
2922 src_pos,
2923 length,
2924 slow_path,
2925 src_curr_addr,
2926 /*length_is_array_length=*/ false,
2927 /*position_sign_checked=*/ false);
2928
2929 CheckSystemArrayCopyPosition(masm,
2930 dst,
2931 dst_pos,
2932 length,
2933 slow_path,
2934 src_curr_addr,
2935 /*length_is_array_length=*/ false,
2936 /*position_sign_checked=*/ false);
2937
2938 src_curr_addr = src_curr_addr.X();
2939 dst_curr_addr = dst_curr_addr.X();
2940 src_stop_addr = src_stop_addr.X();
2941
2942 GenSystemArrayCopyAddresses(masm,
2943 DataType::Type::kUint16,
2944 src,
2945 src_pos,
2946 dst,
2947 dst_pos,
2948 length,
2949 src_curr_addr,
2950 dst_curr_addr,
2951 Register());
2952
2953 // Iterate over the arrays and do a raw copy of the chars.
2954 const int32_t char_size = DataType::Size(DataType::Type::kUint16);
2955 UseScratchRegisterScope temps(masm);
2956
2957 // We split processing of the array in two parts: head and tail.
2958 // A first loop handles the head by copying a block of characters per
2959 // iteration (see: chars_per_block).
2960 // A second loop handles the tail by copying the remaining characters.
2961 // If the copy length is not constant, we copy them one-by-one.
2962 // If the copy length is constant, we optimize by always unrolling the tail
2963 // loop, and also unrolling the head loop when the copy length is small (see:
2964 // unroll_threshold).
2965 //
2966 // Both loops are inverted for better performance, meaning they are
2967 // implemented as conditional do-while loops.
2968 // Here, the loop condition is first checked to determine if there are
2969 // sufficient chars to run an iteration, then we enter the do-while: an
2970 // iteration is performed followed by a conditional branch only if another
2971 // iteration is necessary. As opposed to a standard while-loop, this inversion
2972 // can save some branching (e.g. we don't branch back to the initial condition
2973 // at the end of every iteration only to potentially immediately branch
2974 // again).
2975 //
2976 // A full block of chars is subtracted and added before and after the head
2977 // loop, respectively. This ensures that any remaining length after each
2978 // head loop iteration means there is a full block remaining, reducing the
2979 // number of conditional checks required on every iteration.
2980 constexpr int32_t chars_per_block = 4;
2981 constexpr int32_t unroll_threshold = 2 * chars_per_block;
2982 vixl::aarch64::Label loop1, loop2, pre_loop2, done;
2983
2984 Register length_tmp = src_stop_addr.W();
2985 Register tmp = temps.AcquireRegisterOfSize(char_size * chars_per_block * kBitsPerByte);
2986
2987 auto emitHeadLoop = [&]() {
2988 __ Bind(&loop1);
2989 __ Ldr(tmp, MemOperand(src_curr_addr, char_size * chars_per_block, PostIndex));
2990 __ Subs(length_tmp, length_tmp, chars_per_block);
2991 __ Str(tmp, MemOperand(dst_curr_addr, char_size * chars_per_block, PostIndex));
2992 __ B(&loop1, ge);
2993 };
2994
2995 auto emitTailLoop = [&]() {
2996 __ Bind(&loop2);
2997 __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
2998 __ Subs(length_tmp, length_tmp, 1);
2999 __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
3000 __ B(&loop2, gt);
3001 };
3002
3003 auto emitUnrolledTailLoop = [&](const int32_t tail_length) {
3004 DCHECK_LT(tail_length, 4);
3005
3006 // Don't use post-index addressing, and instead add a constant offset later.
3007 if ((tail_length & 2) != 0) {
3008 __ Ldr(tmp.W(), MemOperand(src_curr_addr));
3009 __ Str(tmp.W(), MemOperand(dst_curr_addr));
3010 }
3011 if ((tail_length & 1) != 0) {
3012 const int32_t offset = (tail_length & ~1) * char_size;
3013 __ Ldrh(tmp, MemOperand(src_curr_addr, offset));
3014 __ Strh(tmp, MemOperand(dst_curr_addr, offset));
3015 }
3016 };
3017
3018 if (length.IsConstant()) {
3019 const int32_t constant_length = length.GetConstant()->AsIntConstant()->GetValue();
3020 if (constant_length >= unroll_threshold) {
3021 __ Mov(length_tmp, constant_length - chars_per_block);
3022 emitHeadLoop();
3023 } else {
3024 static_assert(unroll_threshold == 8, "The unroll_threshold must be 8.");
3025 // Fully unroll both the head and tail loops.
3026 if ((constant_length & 4) != 0) {
3027 __ Ldr(tmp, MemOperand(src_curr_addr, 4 * char_size, PostIndex));
3028 __ Str(tmp, MemOperand(dst_curr_addr, 4 * char_size, PostIndex));
3029 }
3030 }
3031 emitUnrolledTailLoop(constant_length % chars_per_block);
3032 } else {
3033 Register length_reg = WRegisterFrom(length);
3034 __ Subs(length_tmp, length_reg, chars_per_block);
3035 __ B(&pre_loop2, lt);
3036
3037 emitHeadLoop();
3038
3039 __ Bind(&pre_loop2);
3040 __ Adds(length_tmp, length_tmp, chars_per_block);
3041 __ B(&done, eq);
3042
3043 emitTailLoop();
3044 }
3045
3046 __ Bind(&done);
3047 __ Bind(slow_path->GetExitLabel());
3048 }
3049
3050 // We choose to use the native implementation for longer copy lengths.
3051 static constexpr int32_t kSystemArrayCopyThreshold = 128;
3052
VisitSystemArrayCopy(HInvoke * invoke)3053 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
3054 // The only read barrier implementation supporting the
3055 // SystemArrayCopy intrinsic is the Baker-style read barriers.
3056 if (codegen_->EmitNonBakerReadBarrier()) {
3057 return;
3058 }
3059
3060 constexpr size_t kInitialNumTemps = 2u; // We need at least two temps.
3061 LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
3062 invoke, kSystemArrayCopyThreshold, kInitialNumTemps);
3063 if (locations != nullptr) {
3064 locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
3065 locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
3066 locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
3067 if (codegen_->EmitBakerReadBarrier()) {
3068 // Temporary register IP0, obtained from the VIXL scratch register
3069 // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
3070 // (because that register is clobbered by ReadBarrierMarkRegX
3071 // entry points). It cannot be used in calls to
3072 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
3073 // either. For these reasons, get a third extra temporary register
3074 // from the register allocator.
3075 locations->AddTemp(Location::RequiresRegister());
3076 } else {
3077 // Cases other than Baker read barriers: the third temporary will
3078 // be acquired from the VIXL scratch register pool.
3079 }
3080 }
3081 }
3082
VisitSystemArrayCopy(HInvoke * invoke)3083 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
3084 // The only read barrier implementation supporting the
3085 // SystemArrayCopy intrinsic is the Baker-style read barriers.
3086 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
3087
3088 MacroAssembler* masm = GetVIXLAssembler();
3089 LocationSummary* locations = invoke->GetLocations();
3090
3091 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3092 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3093 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3094 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3095 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3096
3097 Register src = XRegisterFrom(locations->InAt(0));
3098 Location src_pos = locations->InAt(1);
3099 Register dest = XRegisterFrom(locations->InAt(2));
3100 Location dest_pos = locations->InAt(3);
3101 Location length = locations->InAt(4);
3102 Register temp1 = WRegisterFrom(locations->GetTemp(0));
3103 Location temp1_loc = LocationFrom(temp1);
3104 Register temp2 = WRegisterFrom(locations->GetTemp(1));
3105 Location temp2_loc = LocationFrom(temp2);
3106
3107 SlowPathCodeARM64* intrinsic_slow_path =
3108 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
3109 codegen_->AddSlowPath(intrinsic_slow_path);
3110
3111 vixl::aarch64::Label conditions_on_positions_validated;
3112 SystemArrayCopyOptimizations optimizations(invoke);
3113
3114 // If source and destination are the same, we go to slow path if we need to do forward copying.
3115 // We do not need to do this check if the source and destination positions are the same.
3116 if (!optimizations.GetSourcePositionIsDestinationPosition()) {
3117 if (src_pos.IsConstant()) {
3118 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
3119 if (dest_pos.IsConstant()) {
3120 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
3121 if (optimizations.GetDestinationIsSource()) {
3122 // Checked when building locations.
3123 DCHECK_GE(src_pos_constant, dest_pos_constant);
3124 } else if (src_pos_constant < dest_pos_constant) {
3125 __ Cmp(src, dest);
3126 __ B(intrinsic_slow_path->GetEntryLabel(), eq);
3127 }
3128 } else {
3129 if (!optimizations.GetDestinationIsSource()) {
3130 __ Cmp(src, dest);
3131 __ B(&conditions_on_positions_validated, ne);
3132 }
3133 __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
3134 __ B(intrinsic_slow_path->GetEntryLabel(), gt);
3135 }
3136 } else {
3137 if (!optimizations.GetDestinationIsSource()) {
3138 __ Cmp(src, dest);
3139 __ B(&conditions_on_positions_validated, ne);
3140 }
3141 __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
3142 OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
3143 __ B(intrinsic_slow_path->GetEntryLabel(), lt);
3144 }
3145 }
3146
3147 __ Bind(&conditions_on_positions_validated);
3148
3149 if (!optimizations.GetSourceIsNotNull()) {
3150 // Bail out if the source is null.
3151 __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
3152 }
3153
3154 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
3155 // Bail out if the destination is null.
3156 __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
3157 }
3158
3159 // We have already checked in the LocationsBuilder for the constant case.
3160 if (!length.IsConstant()) {
3161 // Merge the following two comparisons into one:
3162 // If the length is negative, bail out (delegate to libcore's native implementation).
3163 // If the length >= 128 then (currently) prefer native implementation.
3164 __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
3165 __ B(intrinsic_slow_path->GetEntryLabel(), hs);
3166 }
3167 // Validity checks: source.
3168 CheckSystemArrayCopyPosition(masm,
3169 src,
3170 src_pos,
3171 length,
3172 intrinsic_slow_path,
3173 temp1,
3174 optimizations.GetCountIsSourceLength(),
3175 /*position_sign_checked=*/ false);
3176
3177 // Validity checks: dest.
3178 bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
3179 CheckSystemArrayCopyPosition(masm,
3180 dest,
3181 dest_pos,
3182 length,
3183 intrinsic_slow_path,
3184 temp1,
3185 optimizations.GetCountIsDestinationLength(),
3186 dest_position_sign_checked);
3187
3188 auto check_non_primitive_array_class = [&](Register klass, Register temp) {
3189 // No read barrier is needed for reading a chain of constant references for comparing
3190 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3191 // /* HeapReference<Class> */ temp = klass->component_type_
3192 __ Ldr(temp, HeapOperand(klass, component_offset));
3193 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3194 // Check that the component type is not null.
3195 __ Cbz(temp, intrinsic_slow_path->GetEntryLabel());
3196 // Check that the component type is not a primitive.
3197 // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_);
3198 __ Ldrh(temp, HeapOperand(temp, primitive_offset));
3199 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3200 __ Cbnz(temp, intrinsic_slow_path->GetEntryLabel());
3201 };
3202
3203 if (!optimizations.GetDoesNotNeedTypeCheck()) {
3204 // Check whether all elements of the source array are assignable to the component
3205 // type of the destination array. We do two checks: the classes are the same,
3206 // or the destination is Object[]. If none of these checks succeed, we go to the
3207 // slow path.
3208
3209 if (codegen_->EmitBakerReadBarrier()) {
3210 Location temp3_loc = locations->GetTemp(2);
3211 // /* HeapReference<Class> */ temp1 = dest->klass_
3212 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3213 temp1_loc,
3214 dest.W(),
3215 class_offset,
3216 temp3_loc,
3217 /* needs_null_check= */ false,
3218 /* use_load_acquire= */ false);
3219 // Register `temp1` is not trashed by the read barrier emitted
3220 // by GenerateFieldLoadWithBakerReadBarrier below, as that
3221 // method produces a call to a ReadBarrierMarkRegX entry point,
3222 // which saves all potentially live registers, including
3223 // temporaries such a `temp1`.
3224 // /* HeapReference<Class> */ temp2 = src->klass_
3225 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3226 temp2_loc,
3227 src.W(),
3228 class_offset,
3229 temp3_loc,
3230 /* needs_null_check= */ false,
3231 /* use_load_acquire= */ false);
3232 } else {
3233 // /* HeapReference<Class> */ temp1 = dest->klass_
3234 __ Ldr(temp1, MemOperand(dest, class_offset));
3235 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3236 // /* HeapReference<Class> */ temp2 = src->klass_
3237 __ Ldr(temp2, MemOperand(src, class_offset));
3238 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3239 }
3240
3241 __ Cmp(temp1, temp2);
3242 if (optimizations.GetDestinationIsTypedObjectArray()) {
3243 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3244 vixl::aarch64::Label do_copy;
3245 // For class match, we can skip the source type check regardless of the optimization flag.
3246 __ B(&do_copy, eq);
3247 // No read barrier is needed for reading a chain of constant references
3248 // for comparing with null, see `ReadBarrierOption`.
3249 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3250 __ Ldr(temp1, HeapOperand(temp1, component_offset));
3251 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3252 // /* HeapReference<Class> */ temp1 = temp1->super_class_
3253 __ Ldr(temp1, HeapOperand(temp1, super_offset));
3254 // No need to unpoison the result, we're comparing against null.
3255 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
3256 // Bail out if the source is not a non primitive array.
3257 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3258 check_non_primitive_array_class(temp2, temp2);
3259 }
3260 __ Bind(&do_copy);
3261 } else {
3262 DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
3263 // For class match, we can skip the array type check completely if at least one of source
3264 // and destination is known to be a non primitive array, otherwise one check is enough.
3265 __ B(intrinsic_slow_path->GetEntryLabel(), ne);
3266 if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
3267 !optimizations.GetSourceIsNonPrimitiveArray()) {
3268 check_non_primitive_array_class(temp2, temp2);
3269 }
3270 }
3271 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3272 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3273 // Bail out if the source is not a non primitive array.
3274 // No read barrier is needed for reading a chain of constant references for comparing
3275 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3276 // /* HeapReference<Class> */ temp2 = src->klass_
3277 __ Ldr(temp2, MemOperand(src, class_offset));
3278 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3279 check_non_primitive_array_class(temp2, temp2);
3280 }
3281
3282 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
3283 // Null constant length: not need to emit the loop code at all.
3284 } else {
3285 vixl::aarch64::Label skip_copy_and_write_barrier;
3286 if (length.IsRegister()) {
3287 // Don't enter the copy loop if the length is null.
3288 __ Cbz(WRegisterFrom(length), &skip_copy_and_write_barrier);
3289 }
3290
3291 {
3292 // We use a block to end the scratch scope before the write barrier, thus
3293 // freeing the temporary registers so they can be used in `MarkGCCard`.
3294 UseScratchRegisterScope temps(masm);
3295 bool emit_rb = codegen_->EmitBakerReadBarrier();
3296 Register temp3;
3297 Register tmp;
3298 if (emit_rb) {
3299 temp3 = WRegisterFrom(locations->GetTemp(2));
3300 // Make sure `tmp` is not IP0, as it is clobbered by ReadBarrierMarkRegX entry points
3301 // in ReadBarrierSystemArrayCopySlowPathARM64. Explicitly allocate the register IP1.
3302 DCHECK(temps.IsAvailable(ip1));
3303 temps.Exclude(ip1);
3304 tmp = ip1.W();
3305 } else {
3306 temp3 = temps.AcquireW();
3307 tmp = temps.AcquireW();
3308 }
3309
3310 Register src_curr_addr = temp1.X();
3311 Register dst_curr_addr = temp2.X();
3312 Register src_stop_addr = temp3.X();
3313 const DataType::Type type = DataType::Type::kReference;
3314 const int32_t element_size = DataType::Size(type);
3315
3316 SlowPathCodeARM64* read_barrier_slow_path = nullptr;
3317 if (emit_rb) {
3318 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
3319
3320 // SystemArrayCopy implementation for Baker read barriers (see
3321 // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
3322 //
3323 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3324 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
3325 // bool is_gray = (rb_state == ReadBarrier::GrayState());
3326 // if (is_gray) {
3327 // // Slow-path copy.
3328 // do {
3329 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
3330 // } while (src_ptr != end_ptr)
3331 // } else {
3332 // // Fast-path copy.
3333 // do {
3334 // *dest_ptr++ = *src_ptr++;
3335 // } while (src_ptr != end_ptr)
3336 // }
3337
3338 // /* int32_t */ monitor = src->monitor_
3339 __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
3340 // /* LockWord */ lock_word = LockWord(monitor)
3341 static_assert(sizeof(LockWord) == sizeof(int32_t),
3342 "art::LockWord and int32_t have different sizes.");
3343
3344 // Introduce a dependency on the lock_word including rb_state,
3345 // to prevent load-load reordering, and without using
3346 // a memory barrier (which would be more expensive).
3347 // `src` is unchanged by this operation, but its value now depends
3348 // on `tmp`.
3349 __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
3350
3351 // Slow path used to copy array when `src` is gray.
3352 read_barrier_slow_path =
3353 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(
3354 invoke, LocationFrom(tmp));
3355 codegen_->AddSlowPath(read_barrier_slow_path);
3356 }
3357
3358 // Compute base source address, base destination address, and end
3359 // source address for System.arraycopy* intrinsics in `src_base`,
3360 // `dst_base` and `src_end` respectively.
3361 // Note that `src_curr_addr` is computed from from `src` (and
3362 // `src_pos`) here, and thus honors the artificial dependency
3363 // of `src` on `tmp`.
3364 GenSystemArrayCopyAddresses(masm,
3365 type,
3366 src,
3367 src_pos,
3368 dest,
3369 dest_pos,
3370 length,
3371 src_curr_addr,
3372 dst_curr_addr,
3373 src_stop_addr);
3374
3375 if (emit_rb) {
3376 // Given the numeric representation, it's enough to check the low bit of the rb_state.
3377 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
3378 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3379 __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
3380 }
3381
3382 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3383 // poison/unpoison.
3384 vixl::aarch64::Label loop;
3385 __ Bind(&loop);
3386 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
3387 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
3388 __ Cmp(src_curr_addr, src_stop_addr);
3389 __ B(&loop, ne);
3390
3391 if (emit_rb) {
3392 DCHECK(read_barrier_slow_path != nullptr);
3393 __ Bind(read_barrier_slow_path->GetExitLabel());
3394 }
3395 }
3396
3397 // We only need one card marking on the destination array.
3398 codegen_->MarkGCCard(dest.W());
3399
3400 __ Bind(&skip_copy_and_write_barrier);
3401 }
3402
3403 __ Bind(intrinsic_slow_path->GetExitLabel());
3404 }
3405
GenIsInfinite(LocationSummary * locations,bool is64bit,MacroAssembler * masm)3406 static void GenIsInfinite(LocationSummary* locations,
3407 bool is64bit,
3408 MacroAssembler* masm) {
3409 Operand infinity(0);
3410 Operand tst_mask(0);
3411 Register out;
3412
3413 if (is64bit) {
3414 infinity = Operand(kPositiveInfinityDouble);
3415 tst_mask = MaskLeastSignificant<uint64_t>(63);
3416 out = XRegisterFrom(locations->Out());
3417 } else {
3418 infinity = Operand(kPositiveInfinityFloat);
3419 tst_mask = MaskLeastSignificant<uint32_t>(31);
3420 out = WRegisterFrom(locations->Out());
3421 }
3422
3423 MoveFPToInt(locations, is64bit, masm);
3424 // Checks whether exponent bits are all 1 and fraction bits are all 0.
3425 __ Eor(out, out, infinity);
3426 // TST bitmask is used to mask out the sign bit: either 0x7fffffff or 0x7fffffffffffffff
3427 // depending on is64bit.
3428 __ Tst(out, tst_mask);
3429 __ Cset(out, eq);
3430 }
3431
VisitFloatIsInfinite(HInvoke * invoke)3432 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
3433 CreateFPToIntLocations(allocator_, invoke);
3434 }
3435
VisitFloatIsInfinite(HInvoke * invoke)3436 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
3437 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
3438 }
3439
VisitDoubleIsInfinite(HInvoke * invoke)3440 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
3441 CreateFPToIntLocations(allocator_, invoke);
3442 }
3443
VisitDoubleIsInfinite(HInvoke * invoke)3444 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
3445 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
3446 }
3447
3448 #define VISIT_INTRINSIC(name, low, high, type, start_index) \
3449 void IntrinsicLocationsBuilderARM64::Visit##name##ValueOf(HInvoke* invoke) { \
3450 InvokeRuntimeCallingConvention calling_convention; \
3451 IntrinsicVisitor::ComputeValueOfLocations( \
3452 invoke, \
3453 codegen_, \
3454 low, \
3455 (high) - (low) + 1, \
3456 calling_convention.GetReturnLocation(DataType::Type::kReference), \
3457 Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); \
3458 } \
3459 void IntrinsicCodeGeneratorARM64::Visit##name##ValueOf(HInvoke* invoke) { \
3460 IntrinsicVisitor::ValueOfInfo info = \
3461 IntrinsicVisitor::ComputeValueOfInfo(invoke, \
3462 codegen_->GetCompilerOptions(), \
3463 WellKnownClasses::java_lang_##name##_value, \
3464 low, \
3465 (high) - (low) + 1, \
3466 start_index); \
3467 HandleValueOf(invoke, info, type); \
3468 }
BOXED_TYPES(VISIT_INTRINSIC)3469 BOXED_TYPES(VISIT_INTRINSIC)
3470 #undef VISIT_INTRINSIC
3471
3472 void IntrinsicCodeGeneratorARM64::HandleValueOf(HInvoke* invoke,
3473 const IntrinsicVisitor::ValueOfInfo& info,
3474 DataType::Type type) {
3475 LocationSummary* locations = invoke->GetLocations();
3476 MacroAssembler* masm = GetVIXLAssembler();
3477
3478 Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
3479 UseScratchRegisterScope temps(masm);
3480 Register temp = temps.AcquireW();
3481 auto allocate_instance = [&]() {
3482 DCHECK(out.X().Is(InvokeRuntimeCallingConvention().GetRegisterAt(0)));
3483 codegen_->LoadIntrinsicDeclaringClass(out, invoke);
3484 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3485 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3486 };
3487 if (invoke->InputAt(0)->IsIntConstant()) {
3488 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3489 if (static_cast<uint32_t>(value - info.low) < info.length) {
3490 // Just embed the object in the code.
3491 DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3492 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3493 } else {
3494 DCHECK(locations->CanCall());
3495 // Allocate and initialize a new object.
3496 // TODO: If we JIT, we could allocate the object now, and store it in the
3497 // JIT object table.
3498 allocate_instance();
3499 __ Mov(temp.W(), value);
3500 codegen_->Store(type, temp.W(), HeapOperand(out.W(), info.value_offset));
3501 // Class pointer and `value` final field stores require a barrier before publication.
3502 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3503 }
3504 } else {
3505 DCHECK(locations->CanCall());
3506 Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
3507 // Check bounds of our cache.
3508 __ Add(out.W(), in.W(), -info.low);
3509 __ Cmp(out.W(), info.length);
3510 vixl::aarch64::Label allocate, done;
3511 __ B(&allocate, hs);
3512 // If the value is within the bounds, load the object directly from the array.
3513 codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
3514 MemOperand source = HeapOperand(
3515 temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
3516 codegen_->Load(DataType::Type::kReference, out, source);
3517 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
3518 __ B(&done);
3519 __ Bind(&allocate);
3520 // Otherwise allocate and initialize a new object.
3521 allocate_instance();
3522 codegen_->Store(type, in.W(), HeapOperand(out.W(), info.value_offset));
3523 // Class pointer and `value` final field stores require a barrier before publication.
3524 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3525 __ Bind(&done);
3526 }
3527 }
3528
VisitReferenceGetReferent(HInvoke * invoke)3529 void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3530 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3531
3532 if (codegen_->EmitBakerReadBarrier() && invoke->GetLocations() != nullptr) {
3533 invoke->GetLocations()->AddTemp(Location::RequiresRegister());
3534 }
3535 }
3536
VisitReferenceGetReferent(HInvoke * invoke)3537 void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3538 MacroAssembler* masm = GetVIXLAssembler();
3539 LocationSummary* locations = invoke->GetLocations();
3540
3541 Location obj = locations->InAt(0);
3542 Location out = locations->Out();
3543
3544 SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
3545 codegen_->AddSlowPath(slow_path);
3546
3547 if (codegen_->EmitReadBarrier()) {
3548 // Check self->GetWeakRefAccessEnabled().
3549 UseScratchRegisterScope temps(masm);
3550 Register temp = temps.AcquireW();
3551 __ Ldr(temp,
3552 MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArm64PointerSize>().Uint32Value()));
3553 static_assert(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled) == 0);
3554 __ Cbnz(temp, slow_path->GetEntryLabel());
3555 }
3556
3557 {
3558 // Load the java.lang.ref.Reference class.
3559 UseScratchRegisterScope temps(masm);
3560 Register temp = temps.AcquireW();
3561 codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
3562
3563 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3564 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3565 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3566 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3567 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3568 __ Ldrh(temp, HeapOperand(temp, disable_intrinsic_offset.Uint32Value()));
3569 __ Cbnz(temp, slow_path->GetEntryLabel());
3570 }
3571
3572 // Load the value from the field.
3573 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3574 if (codegen_->EmitBakerReadBarrier()) {
3575 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3576 out,
3577 WRegisterFrom(obj),
3578 referent_offset,
3579 /*maybe_temp=*/ locations->GetTemp(0),
3580 /*needs_null_check=*/ true,
3581 /*use_load_acquire=*/ true);
3582 } else {
3583 MemOperand field = HeapOperand(WRegisterFrom(obj), referent_offset);
3584 codegen_->LoadAcquire(
3585 invoke, DataType::Type::kReference, WRegisterFrom(out), field, /*needs_null_check=*/ true);
3586 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3587 }
3588 __ Bind(slow_path->GetExitLabel());
3589 }
3590
VisitReferenceRefersTo(HInvoke * invoke)3591 void IntrinsicLocationsBuilderARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3592 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3593 }
3594
VisitReferenceRefersTo(HInvoke * invoke)3595 void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3596 LocationSummary* locations = invoke->GetLocations();
3597 MacroAssembler* masm = codegen_->GetVIXLAssembler();
3598 UseScratchRegisterScope temps(masm);
3599
3600 Register obj = WRegisterFrom(locations->InAt(0));
3601 Register other = WRegisterFrom(locations->InAt(1));
3602 Register out = WRegisterFrom(locations->Out());
3603 Register tmp = temps.AcquireW();
3604
3605 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3606 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3607
3608 MemOperand field = HeapOperand(obj, referent_offset);
3609 codegen_->LoadAcquire(invoke, DataType::Type::kReference, tmp, field, /*needs_null_check=*/ true);
3610 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(tmp);
3611
3612 __ Cmp(tmp, other);
3613
3614 if (codegen_->EmitReadBarrier()) {
3615 DCHECK(kUseBakerReadBarrier);
3616
3617 vixl::aarch64::Label calculate_result;
3618
3619 // If the GC is not marking, the comparison result is final.
3620 __ Cbz(mr, &calculate_result);
3621
3622 __ B(&calculate_result, eq); // ZF set if taken.
3623
3624 // Check if the loaded reference is null.
3625 __ Cbz(tmp, &calculate_result); // ZF clear if taken.
3626
3627 // For correct memory visibility, we need a barrier before loading the lock word.
3628 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3629
3630 // Load the lockword and check if it is a forwarding address.
3631 static_assert(LockWord::kStateShift == 30u);
3632 static_assert(LockWord::kStateForwardingAddress == 3u);
3633 __ Ldr(tmp, HeapOperand(tmp, monitor_offset));
3634 __ Cmp(tmp, Operand(0xc0000000));
3635 __ B(&calculate_result, lo); // ZF clear if taken.
3636
3637 // Extract the forwarding address and compare with `other`.
3638 __ Cmp(other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
3639
3640 __ Bind(&calculate_result);
3641 }
3642
3643 // Convert ZF into the Boolean result.
3644 __ Cset(out, eq);
3645 }
3646
VisitThreadInterrupted(HInvoke * invoke)3647 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
3648 LocationSummary* locations =
3649 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3650 locations->SetOut(Location::RequiresRegister());
3651 }
3652
VisitThreadInterrupted(HInvoke * invoke)3653 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
3654 MacroAssembler* masm = GetVIXLAssembler();
3655 Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32);
3656 UseScratchRegisterScope temps(masm);
3657 Register temp = temps.AcquireX();
3658
3659 __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
3660 __ Ldar(out.W(), MemOperand(temp));
3661
3662 vixl::aarch64::Label done;
3663 __ Cbz(out.W(), &done);
3664 __ Stlr(wzr, MemOperand(temp));
3665 __ Bind(&done);
3666 }
3667
VisitReachabilityFence(HInvoke * invoke)3668 void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
3669 LocationSummary* locations =
3670 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3671 locations->SetInAt(0, Location::Any());
3672 }
3673
VisitReachabilityFence(HInvoke * invoke)3674 void IntrinsicCodeGeneratorARM64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3675
VisitCRC32Update(HInvoke * invoke)3676 void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
3677 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3678 return;
3679 }
3680
3681 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3682 LocationSummary::kNoCall,
3683 kIntrinsified);
3684
3685 locations->SetInAt(0, Location::RequiresRegister());
3686 locations->SetInAt(1, Location::RequiresRegister());
3687 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3688 }
3689
3690 // Lower the invoke of CRC32.update(int crc, int b).
VisitCRC32Update(HInvoke * invoke)3691 void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) {
3692 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3693
3694 MacroAssembler* masm = GetVIXLAssembler();
3695
3696 Register crc = InputRegisterAt(invoke, 0);
3697 Register val = InputRegisterAt(invoke, 1);
3698 Register out = OutputRegister(invoke);
3699
3700 // The general algorithm of the CRC32 calculation is:
3701 // crc = ~crc
3702 // result = crc32_for_byte(crc, b)
3703 // crc = ~result
3704 // It is directly lowered to three instructions.
3705
3706 UseScratchRegisterScope temps(masm);
3707 Register tmp = temps.AcquireSameSizeAs(out);
3708
3709 __ Mvn(tmp, crc);
3710 __ Crc32b(tmp, tmp, val);
3711 __ Mvn(out, tmp);
3712 }
3713
3714 // Generate code using CRC32 instructions which calculates
3715 // a CRC32 value of a byte.
3716 //
3717 // Parameters:
3718 // masm - VIXL macro assembler
3719 // crc - a register holding an initial CRC value
3720 // ptr - a register holding a memory address of bytes
3721 // length - a register holding a number of bytes to process
3722 // out - a register to put a result of calculation
GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler * masm,const Register & crc,const Register & ptr,const Register & length,const Register & out)3723 static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm,
3724 const Register& crc,
3725 const Register& ptr,
3726 const Register& length,
3727 const Register& out) {
3728 // The algorithm of CRC32 of bytes is:
3729 // crc = ~crc
3730 // process a few first bytes to make the array 8-byte aligned
3731 // while array has 8 bytes do:
3732 // crc = crc32_of_8bytes(crc, 8_bytes(array))
3733 // if array has 4 bytes:
3734 // crc = crc32_of_4bytes(crc, 4_bytes(array))
3735 // if array has 2 bytes:
3736 // crc = crc32_of_2bytes(crc, 2_bytes(array))
3737 // if array has a byte:
3738 // crc = crc32_of_byte(crc, 1_byte(array))
3739 // crc = ~crc
3740
3741 vixl::aarch64::Label loop, done;
3742 vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte;
3743 vixl::aarch64::Label aligned2, aligned4, aligned8;
3744
3745 // Use VIXL scratch registers as the VIXL macro assembler won't use them in
3746 // instructions below.
3747 UseScratchRegisterScope temps(masm);
3748 Register len = temps.AcquireW();
3749 Register array_elem = temps.AcquireW();
3750
3751 __ Mvn(out, crc);
3752 __ Mov(len, length);
3753
3754 __ Tbz(ptr, 0, &aligned2);
3755 __ Subs(len, len, 1);
3756 __ B(&done, lo);
3757 __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex));
3758 __ Crc32b(out, out, array_elem);
3759
3760 __ Bind(&aligned2);
3761 __ Tbz(ptr, 1, &aligned4);
3762 __ Subs(len, len, 2);
3763 __ B(&process_1byte, lo);
3764 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3765 __ Crc32h(out, out, array_elem);
3766
3767 __ Bind(&aligned4);
3768 __ Tbz(ptr, 2, &aligned8);
3769 __ Subs(len, len, 4);
3770 __ B(&process_2bytes, lo);
3771 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3772 __ Crc32w(out, out, array_elem);
3773
3774 __ Bind(&aligned8);
3775 __ Subs(len, len, 8);
3776 // If len < 8 go to process data by 4 bytes, 2 bytes and a byte.
3777 __ B(&process_4bytes, lo);
3778
3779 // The main loop processing data by 8 bytes.
3780 __ Bind(&loop);
3781 __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex));
3782 __ Subs(len, len, 8);
3783 __ Crc32x(out, out, array_elem.X());
3784 // if len >= 8, process the next 8 bytes.
3785 __ B(&loop, hs);
3786
3787 // Process the data which is less than 8 bytes.
3788 // The code generated below works with values of len
3789 // which come in the range [-8, 0].
3790 // The first three bits are used to detect whether 4 bytes or 2 bytes or
3791 // a byte can be processed.
3792 // The checking order is from bit 2 to bit 0:
3793 // bit 2 is set: at least 4 bytes available
3794 // bit 1 is set: at least 2 bytes available
3795 // bit 0 is set: at least a byte available
3796 __ Bind(&process_4bytes);
3797 // Goto process_2bytes if less than four bytes available
3798 __ Tbz(len, 2, &process_2bytes);
3799 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3800 __ Crc32w(out, out, array_elem);
3801
3802 __ Bind(&process_2bytes);
3803 // Goto process_1bytes if less than two bytes available
3804 __ Tbz(len, 1, &process_1byte);
3805 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3806 __ Crc32h(out, out, array_elem);
3807
3808 __ Bind(&process_1byte);
3809 // Goto done if no bytes available
3810 __ Tbz(len, 0, &done);
3811 __ Ldrb(array_elem, MemOperand(ptr));
3812 __ Crc32b(out, out, array_elem);
3813
3814 __ Bind(&done);
3815 __ Mvn(out, out);
3816 }
3817
3818 // The threshold for sizes of arrays to use the library provided implementation
3819 // of CRC32.updateBytes instead of the intrinsic.
3820 static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
3821
VisitCRC32UpdateBytes(HInvoke * invoke)3822 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3823 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3824 return;
3825 }
3826
3827 LocationSummary* locations =
3828 new (allocator_) LocationSummary(invoke,
3829 LocationSummary::kCallOnSlowPath,
3830 kIntrinsified);
3831
3832 locations->SetInAt(0, Location::RequiresRegister());
3833 locations->SetInAt(1, Location::RequiresRegister());
3834 locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
3835 locations->SetInAt(3, Location::RequiresRegister());
3836 locations->AddTemp(Location::RequiresRegister());
3837 locations->SetOut(Location::RequiresRegister());
3838 }
3839
3840 // Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
3841 //
3842 // Note: The intrinsic is not used if len exceeds a threshold.
VisitCRC32UpdateBytes(HInvoke * invoke)3843 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3844 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3845
3846 MacroAssembler* masm = GetVIXLAssembler();
3847 LocationSummary* locations = invoke->GetLocations();
3848
3849 SlowPathCodeARM64* slow_path =
3850 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
3851 codegen_->AddSlowPath(slow_path);
3852
3853 Register length = WRegisterFrom(locations->InAt(3));
3854 __ Cmp(length, kCRC32UpdateBytesThreshold);
3855 __ B(slow_path->GetEntryLabel(), hi);
3856
3857 const uint32_t array_data_offset =
3858 mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
3859 Register ptr = XRegisterFrom(locations->GetTemp(0));
3860 Register array = XRegisterFrom(locations->InAt(1));
3861 Location offset = locations->InAt(2);
3862 if (offset.IsConstant()) {
3863 int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
3864 __ Add(ptr, array, array_data_offset + offset_value);
3865 } else {
3866 __ Add(ptr, array, array_data_offset);
3867 __ Add(ptr, ptr, XRegisterFrom(offset));
3868 }
3869
3870 Register crc = WRegisterFrom(locations->InAt(0));
3871 Register out = WRegisterFrom(locations->Out());
3872
3873 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3874
3875 __ Bind(slow_path->GetExitLabel());
3876 }
3877
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3878 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3879 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3880 return;
3881 }
3882
3883 LocationSummary* locations =
3884 new (allocator_) LocationSummary(invoke,
3885 LocationSummary::kNoCall,
3886 kIntrinsified);
3887
3888 locations->SetInAt(0, Location::RequiresRegister());
3889 locations->SetInAt(1, Location::RequiresRegister());
3890 locations->SetInAt(2, Location::RequiresRegister());
3891 locations->SetInAt(3, Location::RequiresRegister());
3892 locations->AddTemp(Location::RequiresRegister());
3893 locations->SetOut(Location::RequiresRegister());
3894 }
3895
3896 // Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len)
3897 //
3898 // There is no need to generate code checking if addr is 0.
3899 // The method updateByteBuffer is a private method of java.util.zip.CRC32.
3900 // This guarantees no calls outside of the CRC32 class.
3901 // An address of DirectBuffer is always passed to the call of updateByteBuffer.
3902 // It might be an implementation of an empty DirectBuffer which can use a zero
3903 // address but it must have the length to be zero. The current generated code
3904 // correctly works with the zero length.
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3905 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3906 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3907
3908 MacroAssembler* masm = GetVIXLAssembler();
3909 LocationSummary* locations = invoke->GetLocations();
3910
3911 Register addr = XRegisterFrom(locations->InAt(1));
3912 Register ptr = XRegisterFrom(locations->GetTemp(0));
3913 __ Add(ptr, addr, XRegisterFrom(locations->InAt(2)));
3914
3915 Register crc = WRegisterFrom(locations->InAt(0));
3916 Register length = WRegisterFrom(locations->InAt(3));
3917 Register out = WRegisterFrom(locations->Out());
3918 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3919 }
3920
VisitFP16ToFloat(HInvoke * invoke)3921 void IntrinsicLocationsBuilderARM64::VisitFP16ToFloat(HInvoke* invoke) {
3922 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3923 return;
3924 }
3925
3926 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3927 LocationSummary::kNoCall,
3928 kIntrinsified);
3929 locations->SetInAt(0, Location::RequiresRegister());
3930 locations->SetOut(Location::RequiresFpuRegister());
3931 }
3932
VisitFP16ToFloat(HInvoke * invoke)3933 void IntrinsicCodeGeneratorARM64::VisitFP16ToFloat(HInvoke* invoke) {
3934 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3935 MacroAssembler* masm = GetVIXLAssembler();
3936 UseScratchRegisterScope scratch_scope(masm);
3937 Register bits = InputRegisterAt(invoke, 0);
3938 VRegister out = SRegisterFrom(invoke->GetLocations()->Out());
3939 VRegister half = scratch_scope.AcquireH();
3940 __ Fmov(half, bits); // ARMv8.2
3941 __ Fcvt(out, half);
3942 }
3943
VisitFP16ToHalf(HInvoke * invoke)3944 void IntrinsicLocationsBuilderARM64::VisitFP16ToHalf(HInvoke* invoke) {
3945 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3946 return;
3947 }
3948
3949 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3950 LocationSummary::kNoCall,
3951 kIntrinsified);
3952 locations->SetInAt(0, Location::RequiresFpuRegister());
3953 locations->SetOut(Location::RequiresRegister());
3954 }
3955
VisitFP16ToHalf(HInvoke * invoke)3956 void IntrinsicCodeGeneratorARM64::VisitFP16ToHalf(HInvoke* invoke) {
3957 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3958 MacroAssembler* masm = GetVIXLAssembler();
3959 UseScratchRegisterScope scratch_scope(masm);
3960 VRegister in = SRegisterFrom(invoke->GetLocations()->InAt(0));
3961 VRegister half = scratch_scope.AcquireH();
3962 Register out = WRegisterFrom(invoke->GetLocations()->Out());
3963 __ Fcvt(half, in);
3964 __ Fmov(out, half);
3965 __ Sxth(out, out); // sign extend due to returning a short type.
3966 }
3967
3968 template<typename OP>
GenerateFP16Round(HInvoke * invoke,CodeGeneratorARM64 * const codegen_,MacroAssembler * masm,OP && roundOp)3969 void GenerateFP16Round(HInvoke* invoke,
3970 CodeGeneratorARM64* const codegen_,
3971 MacroAssembler* masm,
3972 OP&& roundOp) {
3973 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3974 LocationSummary* locations = invoke->GetLocations();
3975 UseScratchRegisterScope scratch_scope(masm);
3976 Register out = WRegisterFrom(locations->Out());
3977 VRegister half = scratch_scope.AcquireH();
3978 __ Fmov(half, WRegisterFrom(locations->InAt(0)));
3979 roundOp(half, half);
3980 __ Fmov(out, half);
3981 __ Sxth(out, out);
3982 }
3983
VisitFP16Floor(HInvoke * invoke)3984 void IntrinsicLocationsBuilderARM64::VisitFP16Floor(HInvoke* invoke) {
3985 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3986 return;
3987 }
3988
3989 CreateIntToIntLocations(allocator_, invoke);
3990 }
3991
VisitFP16Floor(HInvoke * invoke)3992 void IntrinsicCodeGeneratorARM64::VisitFP16Floor(HInvoke* invoke) {
3993 MacroAssembler* masm = GetVIXLAssembler();
3994 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3995 __ Frintm(out, in); // Round towards Minus infinity
3996 };
3997 GenerateFP16Round(invoke, codegen_, masm, roundOp);
3998 }
3999
VisitFP16Ceil(HInvoke * invoke)4000 void IntrinsicLocationsBuilderARM64::VisitFP16Ceil(HInvoke* invoke) {
4001 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
4002 return;
4003 }
4004
4005 CreateIntToIntLocations(allocator_, invoke);
4006 }
4007
VisitFP16Ceil(HInvoke * invoke)4008 void IntrinsicCodeGeneratorARM64::VisitFP16Ceil(HInvoke* invoke) {
4009 MacroAssembler* masm = GetVIXLAssembler();
4010 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
4011 __ Frintp(out, in); // Round towards Plus infinity
4012 };
4013 GenerateFP16Round(invoke, codegen_, masm, roundOp);
4014 }
4015
VisitFP16Rint(HInvoke * invoke)4016 void IntrinsicLocationsBuilderARM64::VisitFP16Rint(HInvoke* invoke) {
4017 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
4018 return;
4019 }
4020
4021 CreateIntToIntLocations(allocator_, invoke);
4022 }
4023
VisitFP16Rint(HInvoke * invoke)4024 void IntrinsicCodeGeneratorARM64::VisitFP16Rint(HInvoke* invoke) {
4025 MacroAssembler* masm = GetVIXLAssembler();
4026 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
4027 __ Frintn(out, in); // Round to nearest, with ties to even
4028 };
4029 GenerateFP16Round(invoke, codegen_, masm, roundOp);
4030 }
4031
FP16ComparisonLocations(HInvoke * invoke,ArenaAllocator * allocator_,CodeGeneratorARM64 * codegen_,int requiredTemps)4032 void FP16ComparisonLocations(HInvoke* invoke,
4033 ArenaAllocator* allocator_,
4034 CodeGeneratorARM64* codegen_,
4035 int requiredTemps) {
4036 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
4037 return;
4038 }
4039
4040 CreateIntIntToIntLocations(allocator_, invoke);
4041 for (int i = 0; i < requiredTemps; i++) {
4042 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
4043 }
4044 }
4045
4046 template<typename OP>
GenerateFP16Compare(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,const OP compareOp)4047 void GenerateFP16Compare(HInvoke* invoke,
4048 CodeGeneratorARM64* codegen,
4049 MacroAssembler* masm,
4050 const OP compareOp) {
4051 DCHECK(codegen->GetInstructionSetFeatures().HasFP16());
4052 LocationSummary* locations = invoke->GetLocations();
4053 Register out = WRegisterFrom(locations->Out());
4054 VRegister half0 = HRegisterFrom(locations->GetTemp(0));
4055 VRegister half1 = HRegisterFrom(locations->GetTemp(1));
4056 __ Fmov(half0, WRegisterFrom(locations->InAt(0)));
4057 __ Fmov(half1, WRegisterFrom(locations->InAt(1)));
4058 compareOp(out, half0, half1);
4059 }
4060
GenerateFP16Compare(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,vixl::aarch64::Condition cond)4061 static inline void GenerateFP16Compare(HInvoke* invoke,
4062 CodeGeneratorARM64* codegen,
4063 MacroAssembler* masm,
4064 vixl::aarch64::Condition cond) {
4065 auto compareOp = [masm, cond](const Register out, const VRegister& in0, const VRegister& in1) {
4066 __ Fcmp(in0, in1);
4067 __ Cset(out, cond);
4068 };
4069 GenerateFP16Compare(invoke, codegen, masm, compareOp);
4070 }
4071
VisitFP16Greater(HInvoke * invoke)4072 void IntrinsicLocationsBuilderARM64::VisitFP16Greater(HInvoke* invoke) {
4073 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
4074 }
4075
VisitFP16Greater(HInvoke * invoke)4076 void IntrinsicCodeGeneratorARM64::VisitFP16Greater(HInvoke* invoke) {
4077 MacroAssembler* masm = GetVIXLAssembler();
4078 GenerateFP16Compare(invoke, codegen_, masm, gt);
4079 }
4080
VisitFP16GreaterEquals(HInvoke * invoke)4081 void IntrinsicLocationsBuilderARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
4082 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
4083 }
4084
VisitFP16GreaterEquals(HInvoke * invoke)4085 void IntrinsicCodeGeneratorARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
4086 MacroAssembler* masm = GetVIXLAssembler();
4087 GenerateFP16Compare(invoke, codegen_, masm, ge);
4088 }
4089
VisitFP16Less(HInvoke * invoke)4090 void IntrinsicLocationsBuilderARM64::VisitFP16Less(HInvoke* invoke) {
4091 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
4092 }
4093
VisitFP16Less(HInvoke * invoke)4094 void IntrinsicCodeGeneratorARM64::VisitFP16Less(HInvoke* invoke) {
4095 MacroAssembler* masm = GetVIXLAssembler();
4096 GenerateFP16Compare(invoke, codegen_, masm, mi);
4097 }
4098
VisitFP16LessEquals(HInvoke * invoke)4099 void IntrinsicLocationsBuilderARM64::VisitFP16LessEquals(HInvoke* invoke) {
4100 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
4101 }
4102
VisitFP16LessEquals(HInvoke * invoke)4103 void IntrinsicCodeGeneratorARM64::VisitFP16LessEquals(HInvoke* invoke) {
4104 MacroAssembler* masm = GetVIXLAssembler();
4105 GenerateFP16Compare(invoke, codegen_, masm, ls);
4106 }
4107
VisitFP16Compare(HInvoke * invoke)4108 void IntrinsicLocationsBuilderARM64::VisitFP16Compare(HInvoke* invoke) {
4109 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
4110 }
4111
VisitFP16Compare(HInvoke * invoke)4112 void IntrinsicCodeGeneratorARM64::VisitFP16Compare(HInvoke* invoke) {
4113 MacroAssembler* masm = GetVIXLAssembler();
4114 auto compareOp = [masm](const Register out,
4115 const VRegister& in0,
4116 const VRegister& in1) {
4117 vixl::aarch64::Label end;
4118 vixl::aarch64::Label equal;
4119 vixl::aarch64::Label normal;
4120
4121 // The normal cases for this method are:
4122 // - in0 > in1 => out = 1
4123 // - in0 < in1 => out = -1
4124 // - in0 == in1 => out = 0
4125 // +/-Infinity are ordered by default so are handled by the normal case.
4126 // There are two special cases that Fcmp is insufficient for distinguishing:
4127 // - in0 and in1 are +0 and -0 => +0 > -0 so compare encoding instead of value
4128 // - in0 or in1 is NaN => manually compare with in0 and in1 separately
4129 __ Fcmp(in0, in1);
4130 __ B(eq, &equal); // in0==in1 or +0 -0 case.
4131 __ B(vc, &normal); // in0 and in1 are ordered (not NaN).
4132
4133 // Either of the inputs is NaN.
4134 // NaN is equal to itself and greater than any other number so:
4135 // - if only in0 is NaN => return 1
4136 // - if only in1 is NaN => return -1
4137 // - if both in0 and in1 are NaN => return 0
4138 __ Fcmp(in0, 0.0);
4139 __ Mov(out, -1);
4140 __ B(vc, &end); // in0 != NaN => out = -1.
4141 __ Fcmp(in1, 0.0);
4142 __ Cset(out, vc); // if in1 != NaN => out = 1, otherwise both are NaNs => out = 0.
4143 __ B(&end);
4144
4145 // in0 == in1 or if one of the inputs is +0 and the other is -0.
4146 __ Bind(&equal);
4147 // Compare encoding of in0 and in1 as the denormal fraction of single precision float.
4148 // Reverse operand order because -0 > +0 when compared as S registers.
4149 // The instruction Fmov(Hregister, Wregister) zero extends the Hregister.
4150 // Therefore the value of bits[127:16] will not matter when doing the
4151 // below Fcmp as they are set to 0.
4152 __ Fcmp(in1.S(), in0.S());
4153
4154 __ Bind(&normal);
4155 __ Cset(out, gt); // if in0 > in1 => out = 1, otherwise out = 0.
4156 // Note: could be from equals path or original comparison
4157 __ Csinv(out, out, wzr, pl); // if in0 >= in1 out=out, otherwise out=-1.
4158
4159 __ Bind(&end);
4160 };
4161
4162 GenerateFP16Compare(invoke, codegen_, masm, compareOp);
4163 }
4164
4165 const int kFP16NaN = 0x7e00;
4166
GenerateFP16MinMax(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,vixl::aarch64::Condition cond)4167 static inline void GenerateFP16MinMax(HInvoke* invoke,
4168 CodeGeneratorARM64* codegen,
4169 MacroAssembler* masm,
4170 vixl::aarch64::Condition cond) {
4171 DCHECK(codegen->GetInstructionSetFeatures().HasFP16());
4172 LocationSummary* locations = invoke->GetLocations();
4173
4174 vixl::aarch64::Label equal;
4175 vixl::aarch64::Label end;
4176
4177 UseScratchRegisterScope temps(masm);
4178
4179 Register out = WRegisterFrom(locations->Out());
4180 Register in0 = WRegisterFrom(locations->InAt(0));
4181 Register in1 = WRegisterFrom(locations->InAt(1));
4182 VRegister half0 = HRegisterFrom(locations->GetTemp(0));
4183 VRegister half1 = temps.AcquireH();
4184
4185 // The normal cases for this method are:
4186 // - in0.h == in1.h => out = in0 or in1
4187 // - in0.h <cond> in1.h => out = in0
4188 // - in0.h <!cond> in1.h => out = in1
4189 // +/-Infinity are ordered by default so are handled by the normal case.
4190 // There are two special cases that Fcmp is insufficient for distinguishing:
4191 // - in0 and in1 are +0 and -0 => +0 > -0 so compare encoding instead of value
4192 // - in0 or in1 is NaN => out = NaN
4193 __ Fmov(half0, in0);
4194 __ Fmov(half1, in1);
4195 __ Fcmp(half0, half1);
4196 __ B(eq, &equal); // half0 = half1 or +0/-0 case.
4197 __ Csel(out, in0, in1, cond); // if half0 <cond> half1 => out = in0, otherwise out = in1.
4198 __ B(vc, &end); // None of the inputs were NaN.
4199
4200 // Atleast one input was NaN.
4201 __ Mov(out, kFP16NaN); // out=NaN.
4202 __ B(&end);
4203
4204 // in0 == in1 or if one of the inputs is +0 and the other is -0.
4205 __ Bind(&equal);
4206 // Fcmp cannot normally distinguish +0 and -0 so compare encoding.
4207 // Encoding is compared as the denormal fraction of a Single.
4208 // Note: encoding of -0 > encoding of +0 despite +0 > -0 so in0 and in1 are swapped.
4209 // Note: The instruction Fmov(Hregister, Wregister) zero extends the Hregister.
4210 __ Fcmp(half1.S(), half0.S());
4211
4212 __ Csel(out, in0, in1, cond); // if half0 <cond> half1 => out = in0, otherwise out = in1.
4213
4214 __ Bind(&end);
4215 }
4216
VisitFP16Min(HInvoke * invoke)4217 void IntrinsicLocationsBuilderARM64::VisitFP16Min(HInvoke* invoke) {
4218 FP16ComparisonLocations(invoke, allocator_, codegen_, 1);
4219 }
4220
VisitFP16Min(HInvoke * invoke)4221 void IntrinsicCodeGeneratorARM64::VisitFP16Min(HInvoke* invoke) {
4222 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
4223 MacroAssembler* masm = GetVIXLAssembler();
4224 GenerateFP16MinMax(invoke, codegen_, masm, mi);
4225 }
4226
VisitFP16Max(HInvoke * invoke)4227 void IntrinsicLocationsBuilderARM64::VisitFP16Max(HInvoke* invoke) {
4228 FP16ComparisonLocations(invoke, allocator_, codegen_, 1);
4229 }
4230
VisitFP16Max(HInvoke * invoke)4231 void IntrinsicCodeGeneratorARM64::VisitFP16Max(HInvoke* invoke) {
4232 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
4233 MacroAssembler* masm = GetVIXLAssembler();
4234 GenerateFP16MinMax(invoke, codegen_, masm, gt);
4235 }
4236
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorARM64 * codegen)4237 static void GenerateDivideUnsigned(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4238 LocationSummary* locations = invoke->GetLocations();
4239 MacroAssembler* masm = codegen->GetVIXLAssembler();
4240 DataType::Type type = invoke->GetType();
4241 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4242
4243 Register dividend = RegisterFrom(locations->InAt(0), type);
4244 Register divisor = RegisterFrom(locations->InAt(1), type);
4245 Register out = RegisterFrom(locations->Out(), type);
4246
4247 // Check if divisor is zero, bail to managed implementation to handle.
4248 SlowPathCodeARM64* slow_path =
4249 new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
4250 codegen->AddSlowPath(slow_path);
4251 __ Cbz(divisor, slow_path->GetEntryLabel());
4252
4253 __ Udiv(out, dividend, divisor);
4254
4255 __ Bind(slow_path->GetExitLabel());
4256 }
4257
VisitIntegerDivideUnsigned(HInvoke * invoke)4258 void IntrinsicLocationsBuilderARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
4259 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
4260 }
4261
VisitIntegerDivideUnsigned(HInvoke * invoke)4262 void IntrinsicCodeGeneratorARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
4263 GenerateDivideUnsigned(invoke, codegen_);
4264 }
4265
VisitLongDivideUnsigned(HInvoke * invoke)4266 void IntrinsicLocationsBuilderARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
4267 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
4268 }
4269
VisitLongDivideUnsigned(HInvoke * invoke)4270 void IntrinsicCodeGeneratorARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
4271 GenerateDivideUnsigned(invoke, codegen_);
4272 }
4273
VisitMathMultiplyHigh(HInvoke * invoke)4274 void IntrinsicLocationsBuilderARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
4275 CreateIntIntToIntLocations(allocator_, invoke);
4276 }
4277
VisitMathMultiplyHigh(HInvoke * invoke)4278 void IntrinsicCodeGeneratorARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
4279 LocationSummary* locations = invoke->GetLocations();
4280 MacroAssembler* masm = codegen_->GetVIXLAssembler();
4281 DataType::Type type = invoke->GetType();
4282 DCHECK(type == DataType::Type::kInt64);
4283
4284 Register x = RegisterFrom(locations->InAt(0), type);
4285 Register y = RegisterFrom(locations->InAt(1), type);
4286 Register out = RegisterFrom(locations->Out(), type);
4287
4288 __ Smulh(out, x, y);
4289 }
4290
GenerateMathFma(HInvoke * invoke,CodeGeneratorARM64 * codegen)4291 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4292 MacroAssembler* masm = codegen->GetVIXLAssembler();
4293
4294 VRegister n = helpers::InputFPRegisterAt(invoke, 0);
4295 VRegister m = helpers::InputFPRegisterAt(invoke, 1);
4296 VRegister a = helpers::InputFPRegisterAt(invoke, 2);
4297 VRegister out = helpers::OutputFPRegister(invoke);
4298
4299 __ Fmadd(out, n, m, a);
4300 }
4301
VisitMathFmaDouble(HInvoke * invoke)4302 void IntrinsicLocationsBuilderARM64::VisitMathFmaDouble(HInvoke* invoke) {
4303 CreateFPFPFPToFPLocations(allocator_, invoke);
4304 }
4305
VisitMathFmaDouble(HInvoke * invoke)4306 void IntrinsicCodeGeneratorARM64::VisitMathFmaDouble(HInvoke* invoke) {
4307 GenerateMathFma(invoke, codegen_);
4308 }
4309
VisitMathFmaFloat(HInvoke * invoke)4310 void IntrinsicLocationsBuilderARM64::VisitMathFmaFloat(HInvoke* invoke) {
4311 CreateFPFPFPToFPLocations(allocator_, invoke);
4312 }
4313
VisitMathFmaFloat(HInvoke * invoke)4314 void IntrinsicCodeGeneratorARM64::VisitMathFmaFloat(HInvoke* invoke) {
4315 GenerateMathFma(invoke, codegen_);
4316 }
4317
4318 class VarHandleSlowPathARM64 : public IntrinsicSlowPathARM64 {
4319 public:
VarHandleSlowPathARM64(HInvoke * invoke,std::memory_order order)4320 VarHandleSlowPathARM64(HInvoke* invoke, std::memory_order order)
4321 : IntrinsicSlowPathARM64(invoke),
4322 order_(order),
4323 return_success_(false),
4324 strong_(false),
4325 get_and_update_op_(GetAndUpdateOp::kAdd) {
4326 }
4327
GetByteArrayViewCheckLabel()4328 vixl::aarch64::Label* GetByteArrayViewCheckLabel() {
4329 return &byte_array_view_check_label_;
4330 }
4331
GetNativeByteOrderLabel()4332 vixl::aarch64::Label* GetNativeByteOrderLabel() {
4333 return &native_byte_order_label_;
4334 }
4335
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)4336 void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
4337 if (return_success) {
4338 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
4339 } else {
4340 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
4341 }
4342 return_success_ = return_success;
4343 strong_ = strong;
4344 }
4345
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)4346 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
4347 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
4348 get_and_update_op_ = get_and_update_op;
4349 }
4350
EmitNativeCode(CodeGenerator * codegen_in)4351 void EmitNativeCode(CodeGenerator* codegen_in) override {
4352 if (GetByteArrayViewCheckLabel()->IsLinked()) {
4353 EmitByteArrayViewCode(codegen_in);
4354 }
4355 IntrinsicSlowPathARM64::EmitNativeCode(codegen_in);
4356 }
4357
4358 private:
GetInvoke() const4359 HInvoke* GetInvoke() const {
4360 return GetInstruction()->AsInvoke();
4361 }
4362
GetAccessModeTemplate() const4363 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
4364 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
4365 }
4366
4367 void EmitByteArrayViewCode(CodeGenerator* codegen_in);
4368
4369 vixl::aarch64::Label byte_array_view_check_label_;
4370 vixl::aarch64::Label native_byte_order_label_;
4371 // Shared parameter for all VarHandle intrinsics.
4372 std::memory_order order_;
4373 // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
4374 bool return_success_;
4375 bool strong_;
4376 // Extra argument for GenerateVarHandleGetAndUpdate().
4377 GetAndUpdateOp get_and_update_op_;
4378 };
4379
4380 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path,Register object,Register type,bool object_can_be_null=true)4381 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARM64* codegen,
4382 SlowPathCodeARM64* slow_path,
4383 Register object,
4384 Register type,
4385 bool object_can_be_null = true) {
4386 MacroAssembler* masm = codegen->GetVIXLAssembler();
4387
4388 const MemberOffset class_offset = mirror::Object::ClassOffset();
4389 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
4390
4391 vixl::aarch64::Label success;
4392 if (object_can_be_null) {
4393 __ Cbz(object, &success);
4394 }
4395
4396 UseScratchRegisterScope temps(masm);
4397 Register temp = temps.AcquireW();
4398
4399 __ Ldr(temp, HeapOperand(object, class_offset.Int32Value()));
4400 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4401 vixl::aarch64::Label loop;
4402 __ Bind(&loop);
4403 __ Cmp(type, temp);
4404 __ B(&success, eq);
4405 __ Ldr(temp, HeapOperand(temp, super_class_offset.Int32Value()));
4406 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4407 __ Cbz(temp, slow_path->GetEntryLabel());
4408 __ B(&loop);
4409 __ Bind(&success);
4410 }
4411
4412 // Check access mode and the primitive type from VarHandle.varType.
4413 // Check reference arguments against the VarHandle.varType; for references this is a subclass
4414 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path,DataType::Type type)4415 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
4416 CodeGeneratorARM64* codegen,
4417 SlowPathCodeARM64* slow_path,
4418 DataType::Type type) {
4419 mirror::VarHandle::AccessMode access_mode =
4420 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
4421 Primitive::Type primitive_type = DataTypeToPrimitive(type);
4422
4423 MacroAssembler* masm = codegen->GetVIXLAssembler();
4424 Register varhandle = InputRegisterAt(invoke, 0);
4425
4426 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
4427 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
4428 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4429
4430 UseScratchRegisterScope temps(masm);
4431 Register var_type_no_rb = temps.AcquireW();
4432 Register temp2 = temps.AcquireW();
4433
4434 // Check that the operation is permitted and the primitive type of varhandle.varType.
4435 // We do not need a read barrier when loading a reference only for loading constant
4436 // primitive field through the reference. Use LDP to load the fields together.
4437 DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
4438 __ Ldp(var_type_no_rb, temp2, HeapOperand(varhandle, var_type_offset.Int32Value()));
4439 codegen->GetAssembler()->MaybeUnpoisonHeapReference(var_type_no_rb);
4440 __ Tbz(temp2, static_cast<uint32_t>(access_mode), slow_path->GetEntryLabel());
4441 __ Ldrh(temp2, HeapOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
4442 if (primitive_type == Primitive::kPrimNot) {
4443 static_assert(Primitive::kPrimNot == 0);
4444 __ Cbnz(temp2, slow_path->GetEntryLabel());
4445 } else {
4446 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4447 __ B(slow_path->GetEntryLabel(), ne);
4448 }
4449
4450 temps.Release(temp2);
4451
4452 if (type == DataType::Type::kReference) {
4453 // Check reference arguments against the varType.
4454 // False negatives due to varType being an interface or array type
4455 // or due to the missing read barrier are handled by the slow path.
4456 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4457 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4458 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4459 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4460 HInstruction* arg = invoke->InputAt(arg_index);
4461 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
4462 if (!arg->IsNullConstant()) {
4463 Register arg_reg = WRegisterFrom(invoke->GetLocations()->InAt(arg_index));
4464 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
4465 }
4466 }
4467 }
4468 }
4469
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path)4470 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
4471 CodeGeneratorARM64* codegen,
4472 SlowPathCodeARM64* slow_path) {
4473 MacroAssembler* masm = codegen->GetVIXLAssembler();
4474 Register varhandle = InputRegisterAt(invoke, 0);
4475
4476 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4477
4478 UseScratchRegisterScope temps(masm);
4479 Register temp = temps.AcquireW();
4480
4481 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
4482 // Do not emit read barrier (or unpoison the reference) for comparing to null.
4483 __ Ldr(temp, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4484 __ Cbnz(temp, slow_path->GetEntryLabel());
4485 }
4486
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path)4487 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
4488 CodeGeneratorARM64* codegen,
4489 SlowPathCodeARM64* slow_path) {
4490 VarHandleOptimizations optimizations(invoke);
4491 MacroAssembler* masm = codegen->GetVIXLAssembler();
4492 Register varhandle = InputRegisterAt(invoke, 0);
4493 Register object = InputRegisterAt(invoke, 1);
4494
4495 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4496 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4497
4498 // Null-check the object.
4499 if (!optimizations.GetSkipObjectNullCheck()) {
4500 __ Cbz(object, slow_path->GetEntryLabel());
4501 }
4502
4503 if (!optimizations.GetUseKnownImageVarHandle()) {
4504 UseScratchRegisterScope temps(masm);
4505 Register temp = temps.AcquireW();
4506 Register temp2 = temps.AcquireW();
4507
4508 // Check that the VarHandle references an instance field by checking that
4509 // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
4510 // type compatibility check with the source object's type, which will fail for null.
4511 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4512 __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4513 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4514 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4515 __ Cbnz(temp2, slow_path->GetEntryLabel());
4516
4517 // Check that the object has the correct type.
4518 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
4519 temps.Release(temp2); // Needed by GenerateSubTypeObjectCheckNoReadBarrier().
4520 GenerateSubTypeObjectCheckNoReadBarrier(
4521 codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
4522 }
4523 }
4524
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,VarHandleSlowPathARM64 * slow_path)4525 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
4526 CodeGeneratorARM64* codegen,
4527 VarHandleSlowPathARM64* slow_path) {
4528 VarHandleOptimizations optimizations(invoke);
4529 MacroAssembler* masm = codegen->GetVIXLAssembler();
4530 Register varhandle = InputRegisterAt(invoke, 0);
4531 Register object = InputRegisterAt(invoke, 1);
4532 Register index = InputRegisterAt(invoke, 2);
4533 DataType::Type value_type =
4534 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4535 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
4536
4537 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4538 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4539 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
4540 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4541 const MemberOffset class_offset = mirror::Object::ClassOffset();
4542 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
4543
4544 // Null-check the object.
4545 if (!optimizations.GetSkipObjectNullCheck()) {
4546 __ Cbz(object, slow_path->GetEntryLabel());
4547 }
4548
4549 UseScratchRegisterScope temps(masm);
4550 Register temp = temps.AcquireW();
4551 Register temp2 = temps.AcquireW();
4552
4553 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
4554 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
4555 // coordinateType0 shall not be null but we do not explicitly verify that.
4556 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4557 __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4558 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4559 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4560 __ Cbz(temp2, slow_path->GetEntryLabel());
4561
4562 // Check object class against componentType0.
4563 //
4564 // This is an exact check and we defer other cases to the runtime. This includes
4565 // conversion to array of superclass references, which is valid but subsequently
4566 // requires all update operations to check that the value can indeed be stored.
4567 // We do not want to perform such extra checks in the intrinsified code.
4568 //
4569 // We do this check without read barrier, so there can be false negatives which we
4570 // defer to the slow path. There shall be no false negatives for array classes in the
4571 // boot image (including Object[] and primitive arrays) because they are non-movable.
4572 __ Ldr(temp2, HeapOperand(object, class_offset.Int32Value()));
4573 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4574 __ Cmp(temp, temp2);
4575 __ B(slow_path->GetEntryLabel(), ne);
4576
4577 // Check that the coordinateType0 is an array type. We do not need a read barrier
4578 // for loading constant reference fields (or chains of them) for comparison with null,
4579 // nor for finally loading a constant primitive field (primitive type) below.
4580 __ Ldr(temp2, HeapOperand(temp, component_type_offset.Int32Value()));
4581 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4582 __ Cbz(temp2, slow_path->GetEntryLabel());
4583
4584 // Check that the array component type matches the primitive type.
4585 __ Ldrh(temp2, HeapOperand(temp2, primitive_type_offset.Int32Value()));
4586 if (primitive_type == Primitive::kPrimNot) {
4587 static_assert(Primitive::kPrimNot == 0);
4588 __ Cbnz(temp2, slow_path->GetEntryLabel());
4589 } else {
4590 // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
4591 // we shall check for a byte array view in the slow path.
4592 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4593 // so we cannot emit that if we're JITting without boot image.
4594 bool boot_image_available =
4595 codegen->GetCompilerOptions().IsBootImage() ||
4596 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4597 bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
4598 vixl::aarch64::Label* slow_path_label =
4599 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4600 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4601 __ B(slow_path_label, ne);
4602 }
4603
4604 // Check for array index out of bounds.
4605 __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
4606 __ Cmp(index, temp);
4607 __ B(slow_path->GetEntryLabel(), hs);
4608 }
4609
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,VarHandleSlowPathARM64 * slow_path)4610 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4611 CodeGeneratorARM64* codegen,
4612 VarHandleSlowPathARM64* slow_path) {
4613 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4614 if (expected_coordinates_count == 0u) {
4615 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4616 } else if (expected_coordinates_count == 1u) {
4617 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4618 } else {
4619 DCHECK_EQ(expected_coordinates_count, 2u);
4620 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4621 }
4622 }
4623
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,DataType::Type type)4624 static VarHandleSlowPathARM64* GenerateVarHandleChecks(HInvoke* invoke,
4625 CodeGeneratorARM64* codegen,
4626 std::memory_order order,
4627 DataType::Type type) {
4628 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4629 VarHandleOptimizations optimizations(invoke);
4630 if (optimizations.GetUseKnownImageVarHandle()) {
4631 DCHECK_NE(expected_coordinates_count, 2u);
4632 if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4633 return nullptr;
4634 }
4635 }
4636
4637 VarHandleSlowPathARM64* slow_path =
4638 new (codegen->GetScopedAllocator()) VarHandleSlowPathARM64(invoke, order);
4639 codegen->AddSlowPath(slow_path);
4640
4641 if (!optimizations.GetUseKnownImageVarHandle()) {
4642 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4643 }
4644 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4645
4646 return slow_path;
4647 }
4648
4649 struct VarHandleTarget {
4650 Register object; // The object holding the value to operate on.
4651 Register offset; // The offset of the value to operate on.
4652 };
4653
GetVarHandleTarget(HInvoke * invoke)4654 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4655 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4656 LocationSummary* locations = invoke->GetLocations();
4657
4658 VarHandleTarget target;
4659 // The temporary allocated for loading the offset.
4660 target.offset = WRegisterFrom(locations->GetTemp(0u));
4661 // The reference to the object that holds the value to operate on.
4662 target.object = (expected_coordinates_count == 0u)
4663 ? WRegisterFrom(locations->GetTemp(1u))
4664 : InputRegisterAt(invoke, 1);
4665 return target;
4666 }
4667
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorARM64 * codegen)4668 static void GenerateVarHandleTarget(HInvoke* invoke,
4669 const VarHandleTarget& target,
4670 CodeGeneratorARM64* codegen) {
4671 MacroAssembler* masm = codegen->GetVIXLAssembler();
4672 Register varhandle = InputRegisterAt(invoke, 0);
4673 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4674
4675 if (expected_coordinates_count <= 1u) {
4676 if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
4677 ScopedObjectAccess soa(Thread::Current());
4678 ArtField* target_field = GetBootImageVarHandleField(invoke);
4679 if (expected_coordinates_count == 0u) {
4680 ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4681 if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4682 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
4683 codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
4684 } else {
4685 codegen->LoadTypeForBootImageIntrinsic(
4686 target.object,
4687 TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
4688 }
4689 }
4690 __ Mov(target.offset, target_field->GetOffset().Uint32Value());
4691 } else {
4692 // For static fields, we need to fill the `target.object` with the declaring class,
4693 // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
4694 // we do not need the declaring class, so we can forget the `ArtField*` when
4695 // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
4696 Register field = (expected_coordinates_count == 0) ? target.object : target.offset;
4697
4698 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4699 const MemberOffset offset_offset = ArtField::OffsetOffset();
4700
4701 // Load the ArtField*, the offset and, if needed, declaring class.
4702 __ Ldr(field.X(), HeapOperand(varhandle, art_field_offset.Int32Value()));
4703 __ Ldr(target.offset, MemOperand(field.X(), offset_offset.Int32Value()));
4704 if (expected_coordinates_count == 0u) {
4705 codegen->GenerateGcRootFieldLoad(invoke,
4706 LocationFrom(target.object),
4707 field.X(),
4708 ArtField::DeclaringClassOffset().Int32Value(),
4709 /*fixup_label=*/nullptr,
4710 codegen->GetCompilerReadBarrierOption());
4711 }
4712 }
4713 } else {
4714 DCHECK_EQ(expected_coordinates_count, 2u);
4715 DataType::Type value_type =
4716 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4717 size_t size_shift = DataType::SizeShift(value_type);
4718 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4719
4720 Register index = InputRegisterAt(invoke, 2);
4721 Register shifted_index = index;
4722 if (size_shift != 0u) {
4723 shifted_index = target.offset;
4724 __ Lsl(shifted_index, index, size_shift);
4725 }
4726 __ Add(target.offset, shifted_index, data_offset.Int32Value());
4727 }
4728 }
4729
CreateVarHandleCommonLocations(HInvoke * invoke,CodeGeneratorARM64 * codegen)4730 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
4731 CodeGeneratorARM64* codegen) {
4732 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4733 DataType::Type return_type = invoke->GetType();
4734
4735 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4736 LocationSummary* locations =
4737 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4738 locations->SetInAt(0, Location::RequiresRegister());
4739 // Require coordinates in registers. These are the object holding the value
4740 // to operate on (except for static fields) and index (for arrays and views).
4741 for (size_t i = 0; i != expected_coordinates_count; ++i) {
4742 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4743 }
4744 if (return_type != DataType::Type::kVoid) {
4745 if (DataType::IsFloatingPointType(return_type)) {
4746 locations->SetOut(Location::RequiresFpuRegister());
4747 } else {
4748 locations->SetOut(Location::RequiresRegister());
4749 }
4750 }
4751 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4752 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4753 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4754 HInstruction* arg = invoke->InputAt(arg_index);
4755 if (IsZeroBitPattern(arg)) {
4756 locations->SetInAt(arg_index, Location::ConstantLocation(arg));
4757 } else if (DataType::IsFloatingPointType(arg->GetType())) {
4758 locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4759 } else {
4760 locations->SetInAt(arg_index, Location::RequiresRegister());
4761 }
4762 }
4763
4764 // Add a temporary for offset.
4765 if (codegen->EmitNonBakerReadBarrier() &&
4766 GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4767 // To preserve the offset value across the non-Baker read barrier slow path
4768 // for loading the declaring class, use a fixed callee-save register.
4769 constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
4770 locations->AddTemp(Location::RegisterLocation(first_callee_save));
4771 } else {
4772 locations->AddTemp(Location::RequiresRegister());
4773 }
4774 if (expected_coordinates_count == 0u) {
4775 // Add a temporary to hold the declaring class.
4776 locations->AddTemp(Location::RequiresRegister());
4777 }
4778
4779 return locations;
4780 }
4781
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorARM64 * codegen)4782 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4783 VarHandleOptimizations optimizations(invoke);
4784 if (optimizations.GetDoNotIntrinsify()) {
4785 return;
4786 }
4787
4788 if (codegen->EmitNonBakerReadBarrier() &&
4789 invoke->GetType() == DataType::Type::kReference &&
4790 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4791 invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4792 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4793 // the passed reference and reloads it from the field. This gets the memory visibility
4794 // wrong for Acquire/Volatile operations. b/173104084
4795 return;
4796 }
4797
4798 CreateVarHandleCommonLocations(invoke, codegen);
4799 }
4800
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool byte_swap=false)4801 static void GenerateVarHandleGet(HInvoke* invoke,
4802 CodeGeneratorARM64* codegen,
4803 std::memory_order order,
4804 bool byte_swap = false) {
4805 DataType::Type type = invoke->GetType();
4806 DCHECK_NE(type, DataType::Type::kVoid);
4807
4808 LocationSummary* locations = invoke->GetLocations();
4809 MacroAssembler* masm = codegen->GetVIXLAssembler();
4810 CPURegister out = helpers::OutputCPURegister(invoke);
4811
4812 VarHandleTarget target = GetVarHandleTarget(invoke);
4813 VarHandleSlowPathARM64* slow_path = nullptr;
4814 if (!byte_swap) {
4815 slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4816 GenerateVarHandleTarget(invoke, target, codegen);
4817 if (slow_path != nullptr) {
4818 __ Bind(slow_path->GetNativeByteOrderLabel());
4819 }
4820 }
4821
4822 // ARM64 load-acquire instructions are implicitly sequentially consistent.
4823 bool use_load_acquire =
4824 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
4825 DCHECK(use_load_acquire || order == std::memory_order_relaxed);
4826
4827 // Load the value from the target location.
4828 if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
4829 // Piggy-back on the field load path using introspection for the Baker read barrier.
4830 // The `target.offset` is a temporary, use it for field address.
4831 Register tmp_ptr = target.offset.X();
4832 __ Add(tmp_ptr, target.object.X(), target.offset.X());
4833 codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
4834 locations->Out(),
4835 target.object,
4836 MemOperand(tmp_ptr),
4837 /*needs_null_check=*/ false,
4838 use_load_acquire);
4839 DCHECK(!byte_swap);
4840 } else {
4841 MemOperand address(target.object.X(), target.offset.X());
4842 CPURegister load_reg = out;
4843 DataType::Type load_type = type;
4844 UseScratchRegisterScope temps(masm);
4845 if (byte_swap) {
4846 if (type == DataType::Type::kInt16) {
4847 // Avoid unnecessary sign extension before REV16.
4848 load_type = DataType::Type::kUint16;
4849 } else if (type == DataType::Type::kFloat32) {
4850 load_type = DataType::Type::kInt32;
4851 load_reg = target.offset.W();
4852 } else if (type == DataType::Type::kFloat64) {
4853 load_type = DataType::Type::kInt64;
4854 load_reg = target.offset.X();
4855 }
4856 }
4857 if (use_load_acquire) {
4858 codegen->LoadAcquire(invoke, load_type, load_reg, address, /*needs_null_check=*/ false);
4859 } else {
4860 codegen->Load(load_type, load_reg, address);
4861 }
4862 if (type == DataType::Type::kReference) {
4863 DCHECK(!byte_swap);
4864 DCHECK(out.IsW());
4865 Location out_loc = locations->Out();
4866 Location object_loc = LocationFrom(target.object);
4867 Location offset_loc = LocationFrom(target.offset);
4868 codegen->MaybeGenerateReadBarrierSlow(invoke, out_loc, out_loc, object_loc, 0u, offset_loc);
4869 } else if (byte_swap) {
4870 GenerateReverseBytes(masm, type, load_reg, out);
4871 }
4872 }
4873
4874 if (slow_path != nullptr) {
4875 DCHECK(!byte_swap);
4876 __ Bind(slow_path->GetExitLabel());
4877 }
4878 }
4879
VisitVarHandleGet(HInvoke * invoke)4880 void IntrinsicLocationsBuilderARM64::VisitVarHandleGet(HInvoke* invoke) {
4881 CreateVarHandleGetLocations(invoke, codegen_);
4882 }
4883
VisitVarHandleGet(HInvoke * invoke)4884 void IntrinsicCodeGeneratorARM64::VisitVarHandleGet(HInvoke* invoke) {
4885 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
4886 }
4887
VisitVarHandleGetOpaque(HInvoke * invoke)4888 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4889 CreateVarHandleGetLocations(invoke, codegen_);
4890 }
4891
VisitVarHandleGetOpaque(HInvoke * invoke)4892 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4893 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
4894 }
4895
VisitVarHandleGetAcquire(HInvoke * invoke)4896 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4897 CreateVarHandleGetLocations(invoke, codegen_);
4898 }
4899
VisitVarHandleGetAcquire(HInvoke * invoke)4900 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4901 GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire);
4902 }
4903
VisitVarHandleGetVolatile(HInvoke * invoke)4904 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4905 CreateVarHandleGetLocations(invoke, codegen_);
4906 }
4907
VisitVarHandleGetVolatile(HInvoke * invoke)4908 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4909 GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst);
4910 }
4911
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorARM64 * codegen)4912 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4913 VarHandleOptimizations optimizations(invoke);
4914 if (optimizations.GetDoNotIntrinsify()) {
4915 return;
4916 }
4917
4918 CreateVarHandleCommonLocations(invoke, codegen);
4919 }
4920
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool byte_swap=false)4921 static void GenerateVarHandleSet(HInvoke* invoke,
4922 CodeGeneratorARM64* codegen,
4923 std::memory_order order,
4924 bool byte_swap = false) {
4925 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4926 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4927
4928 MacroAssembler* masm = codegen->GetVIXLAssembler();
4929 CPURegister value = InputCPURegisterOrZeroRegAt(invoke, value_index);
4930
4931 VarHandleTarget target = GetVarHandleTarget(invoke);
4932 VarHandleSlowPathARM64* slow_path = nullptr;
4933 if (!byte_swap) {
4934 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4935 GenerateVarHandleTarget(invoke, target, codegen);
4936 if (slow_path != nullptr) {
4937 __ Bind(slow_path->GetNativeByteOrderLabel());
4938 }
4939 }
4940
4941 // ARM64 store-release instructions are implicitly sequentially consistent.
4942 bool use_store_release =
4943 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
4944 DCHECK(use_store_release || order == std::memory_order_relaxed);
4945
4946 // Store the value to the target location.
4947 {
4948 CPURegister source = value;
4949 UseScratchRegisterScope temps(masm);
4950 if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
4951 DCHECK(value.IsW());
4952 Register temp = temps.AcquireW();
4953 __ Mov(temp, value.W());
4954 codegen->GetAssembler()->PoisonHeapReference(temp);
4955 source = temp;
4956 }
4957 if (byte_swap) {
4958 DCHECK(!source.IsZero()); // We use the main path for zero as it does not need a byte swap.
4959 Register temp = source.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
4960 if (value_type == DataType::Type::kInt16) {
4961 // Avoid unnecessary sign extension before storing.
4962 value_type = DataType::Type::kUint16;
4963 } else if (DataType::IsFloatingPointType(value_type)) {
4964 __ Fmov(temp, source.Is64Bits() ? source.D() : source.S());
4965 value_type = source.Is64Bits() ? DataType::Type::kInt64 : DataType::Type::kInt32;
4966 source = temp; // Source for the `GenerateReverseBytes()` below.
4967 }
4968 GenerateReverseBytes(masm, value_type, source, temp);
4969 source = temp;
4970 }
4971 MemOperand address(target.object.X(), target.offset.X());
4972 if (use_store_release) {
4973 codegen->StoreRelease(invoke, value_type, source, address, /*needs_null_check=*/ false);
4974 } else {
4975 codegen->Store(value_type, source, address);
4976 }
4977 }
4978
4979 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4980 codegen->MaybeMarkGCCard(target.object, Register(value), /* emit_null_check= */ true);
4981 }
4982
4983 if (slow_path != nullptr) {
4984 DCHECK(!byte_swap);
4985 __ Bind(slow_path->GetExitLabel());
4986 }
4987 }
4988
VisitVarHandleSet(HInvoke * invoke)4989 void IntrinsicLocationsBuilderARM64::VisitVarHandleSet(HInvoke* invoke) {
4990 CreateVarHandleSetLocations(invoke, codegen_);
4991 }
4992
VisitVarHandleSet(HInvoke * invoke)4993 void IntrinsicCodeGeneratorARM64::VisitVarHandleSet(HInvoke* invoke) {
4994 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
4995 }
4996
VisitVarHandleSetOpaque(HInvoke * invoke)4997 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4998 CreateVarHandleSetLocations(invoke, codegen_);
4999 }
5000
VisitVarHandleSetOpaque(HInvoke * invoke)5001 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
5002 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
5003 }
5004
VisitVarHandleSetRelease(HInvoke * invoke)5005 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
5006 CreateVarHandleSetLocations(invoke, codegen_);
5007 }
5008
VisitVarHandleSetRelease(HInvoke * invoke)5009 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
5010 GenerateVarHandleSet(invoke, codegen_, std::memory_order_release);
5011 }
5012
VisitVarHandleSetVolatile(HInvoke * invoke)5013 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
5014 CreateVarHandleSetLocations(invoke, codegen_);
5015 }
5016
VisitVarHandleSetVolatile(HInvoke * invoke)5017 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
5018 GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst);
5019 }
5020
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorARM64 * codegen,bool return_success)5021 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
5022 CodeGeneratorARM64* codegen,
5023 bool return_success) {
5024 VarHandleOptimizations optimizations(invoke);
5025 if (optimizations.GetDoNotIntrinsify()) {
5026 return;
5027 }
5028
5029 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
5030 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
5031 if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5032 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5033 // the passed reference and reloads it from the field. This breaks the read barriers
5034 // in slow path in different ways. The marked old value may not actually be a to-space
5035 // reference to the same object as `old_value`, breaking slow path assumptions. And
5036 // for CompareAndExchange, marking the old value after comparison failure may actually
5037 // return the reference to `expected`, erroneously indicating success even though we
5038 // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
5039 return;
5040 }
5041
5042 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
5043
5044 if (codegen->EmitNonBakerReadBarrier()) {
5045 // We need callee-save registers for both the class object and offset instead of
5046 // the temporaries reserved in CreateVarHandleCommonLocations().
5047 static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u);
5048 uint32_t first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
5049 uint32_t second_callee_save = CTZ(kArm64CalleeSaveRefSpills ^ (1u << first_callee_save));
5050 if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
5051 DCHECK_EQ(locations->GetTempCount(), 2u);
5052 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
5053 DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
5054 locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
5055 } else {
5056 DCHECK_EQ(locations->GetTempCount(), 1u);
5057 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
5058 locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
5059 }
5060 }
5061 size_t old_temp_count = locations->GetTempCount();
5062 DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
5063 if (!return_success) {
5064 if (DataType::IsFloatingPointType(value_type)) {
5065 // Add a temporary for old value and exclusive store result if floating point
5066 // `expected` and/or `new_value` take scratch registers.
5067 size_t available_scratch_registers =
5068 (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) +
5069 (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u);
5070 size_t temps_needed = /* pointer, old value, store result */ 3u - available_scratch_registers;
5071 // We can reuse the declaring class (if present) and offset temporary.
5072 if (temps_needed > old_temp_count) {
5073 locations->AddRegisterTemps(temps_needed - old_temp_count);
5074 }
5075 } else if ((value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) &&
5076 !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) &&
5077 !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) &&
5078 GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5079 // Allocate a normal temporary for store result in the non-native byte order path
5080 // because scratch registers are used by the byte-swapped `expected` and `new_value`.
5081 DCHECK_EQ(old_temp_count, 1u);
5082 locations->AddTemp(Location::RequiresRegister());
5083 }
5084 }
5085 if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5086 // Add a temporary for the `old_value_temp` in slow path.
5087 locations->AddTemp(Location::RequiresRegister());
5088 }
5089 }
5090
MoveToTempIfFpRegister(const CPURegister & cpu_reg,DataType::Type type,MacroAssembler * masm,UseScratchRegisterScope * temps)5091 static Register MoveToTempIfFpRegister(const CPURegister& cpu_reg,
5092 DataType::Type type,
5093 MacroAssembler* masm,
5094 UseScratchRegisterScope* temps) {
5095 if (cpu_reg.IsS()) {
5096 DCHECK_EQ(type, DataType::Type::kFloat32);
5097 Register reg = temps->AcquireW();
5098 __ Fmov(reg, cpu_reg.S());
5099 return reg;
5100 } else if (cpu_reg.IsD()) {
5101 DCHECK_EQ(type, DataType::Type::kFloat64);
5102 Register reg = temps->AcquireX();
5103 __ Fmov(reg, cpu_reg.D());
5104 return reg;
5105 } else {
5106 return DataType::Is64BitType(type) ? cpu_reg.X() : cpu_reg.W();
5107 }
5108 }
5109
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)5110 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
5111 CodeGeneratorARM64* codegen,
5112 std::memory_order order,
5113 bool return_success,
5114 bool strong,
5115 bool byte_swap = false) {
5116 DCHECK(return_success || strong);
5117
5118 uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
5119 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
5120 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
5121 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
5122
5123 MacroAssembler* masm = codegen->GetVIXLAssembler();
5124 LocationSummary* locations = invoke->GetLocations();
5125 CPURegister expected = InputCPURegisterOrZeroRegAt(invoke, expected_index);
5126 CPURegister new_value = InputCPURegisterOrZeroRegAt(invoke, new_value_index);
5127 CPURegister out = helpers::OutputCPURegister(invoke);
5128
5129 VarHandleTarget target = GetVarHandleTarget(invoke);
5130 VarHandleSlowPathARM64* slow_path = nullptr;
5131 if (!byte_swap) {
5132 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5133 GenerateVarHandleTarget(invoke, target, codegen);
5134 if (slow_path != nullptr) {
5135 slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
5136 __ Bind(slow_path->GetNativeByteOrderLabel());
5137 }
5138 }
5139
5140 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
5141 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
5142 // Mark card for object assuming new value is stored.
5143 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
5144 codegen->MaybeMarkGCCard(target.object, new_value.W(), new_value_can_be_null);
5145 }
5146
5147 // Reuse the `offset` temporary for the pointer to the target location,
5148 // except for references that need the offset for the read barrier.
5149 UseScratchRegisterScope temps(masm);
5150 Register tmp_ptr = target.offset.X();
5151 if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5152 tmp_ptr = temps.AcquireX();
5153 }
5154 __ Add(tmp_ptr, target.object.X(), target.offset.X());
5155
5156 // Move floating point values to scratch registers.
5157 // Note that float/double CAS uses bitwise comparison, rather than the operator==.
5158 Register expected_reg = MoveToTempIfFpRegister(expected, value_type, masm, &temps);
5159 Register new_value_reg = MoveToTempIfFpRegister(new_value, value_type, masm, &temps);
5160 bool is_fp = DataType::IsFloatingPointType(value_type);
5161 DataType::Type cas_type = is_fp
5162 ? ((value_type == DataType::Type::kFloat64) ? DataType::Type::kInt64 : DataType::Type::kInt32)
5163 : value_type;
5164 // Avoid sign extension in the CAS loop by zero-extending `expected` before the loop. This adds
5165 // one instruction for CompareAndExchange as we shall need to sign-extend the returned value.
5166 if (value_type == DataType::Type::kInt16 && !expected.IsZero()) {
5167 Register temp = temps.AcquireW();
5168 __ Uxth(temp, expected_reg);
5169 expected_reg = temp;
5170 cas_type = DataType::Type::kUint16;
5171 } else if (value_type == DataType::Type::kInt8 && !expected.IsZero()) {
5172 Register temp = temps.AcquireW();
5173 __ Uxtb(temp, expected_reg);
5174 expected_reg = temp;
5175 cas_type = DataType::Type::kUint8;
5176 }
5177
5178 if (byte_swap) {
5179 // Do the byte swap and move values to scratch registers if needed.
5180 // Non-zero FP values and non-zero `expected` for `kInt16` are already in scratch registers.
5181 DCHECK_NE(value_type, DataType::Type::kInt8);
5182 if (!expected.IsZero()) {
5183 bool is_scratch = is_fp || (value_type == DataType::Type::kInt16);
5184 Register temp = is_scratch ? expected_reg : temps.AcquireSameSizeAs(expected_reg);
5185 GenerateReverseBytes(masm, cas_type, expected_reg, temp);
5186 expected_reg = temp;
5187 }
5188 if (!new_value.IsZero()) {
5189 Register temp = is_fp ? new_value_reg : temps.AcquireSameSizeAs(new_value_reg);
5190 GenerateReverseBytes(masm, cas_type, new_value_reg, temp);
5191 new_value_reg = temp;
5192 }
5193 }
5194
5195 // Prepare registers for old value and the result of the exclusive store.
5196 Register old_value;
5197 Register store_result;
5198 if (return_success) {
5199 // Use the output register for both old value and exclusive store result.
5200 old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
5201 store_result = out.W();
5202 } else if (DataType::IsFloatingPointType(value_type)) {
5203 // We need two temporary registers but we have already used scratch registers for
5204 // holding the expected and new value unless they are zero bit pattern (+0.0f or
5205 // +0.0). We have allocated sufficient normal temporaries to handle that.
5206 size_t next_temp = 1u;
5207 if (expected.IsZero()) {
5208 old_value = (cas_type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
5209 } else {
5210 Location temp = locations->GetTemp(next_temp);
5211 ++next_temp;
5212 old_value = (cas_type == DataType::Type::kInt64) ? XRegisterFrom(temp) : WRegisterFrom(temp);
5213 }
5214 store_result =
5215 new_value.IsZero() ? temps.AcquireW() : WRegisterFrom(locations->GetTemp(next_temp));
5216 DCHECK(!old_value.Is(tmp_ptr));
5217 DCHECK(!store_result.Is(tmp_ptr));
5218 } else {
5219 // Use the output register for the old value.
5220 old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
5221 // Use scratch register for the store result, except when we have used up
5222 // scratch registers for byte-swapped `expected` and `new_value`.
5223 // In that case, we have allocated a normal temporary.
5224 store_result = (byte_swap && !expected.IsZero() && !new_value.IsZero())
5225 ? WRegisterFrom(locations->GetTemp(1))
5226 : temps.AcquireW();
5227 DCHECK(!store_result.Is(tmp_ptr));
5228 }
5229
5230 vixl::aarch64::Label exit_loop_label;
5231 vixl::aarch64::Label* exit_loop = &exit_loop_label;
5232 vixl::aarch64::Label* cmp_failure = &exit_loop_label;
5233
5234 if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5235 // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
5236 // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
5237 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
5238 Register old_value_temp =
5239 WRegisterFrom(locations->GetTemp((expected_coordinates_count == 0u) ? 2u : 1u));
5240 // For strong CAS, use a scratch register for the store result in slow path.
5241 // For weak CAS, we need to check the store result, so store it in `store_result`.
5242 Register slow_path_store_result = strong ? Register() : store_result;
5243 ReadBarrierCasSlowPathARM64* rb_slow_path =
5244 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
5245 invoke,
5246 order,
5247 strong,
5248 target.object,
5249 target.offset.X(),
5250 expected_reg,
5251 new_value_reg,
5252 old_value,
5253 old_value_temp,
5254 slow_path_store_result,
5255 /*update_old_value=*/ !return_success,
5256 codegen);
5257 codegen->AddSlowPath(rb_slow_path);
5258 exit_loop = rb_slow_path->GetExitLabel();
5259 cmp_failure = rb_slow_path->GetEntryLabel();
5260 }
5261
5262 GenerateCompareAndSet(codegen,
5263 cas_type,
5264 order,
5265 strong,
5266 cmp_failure,
5267 tmp_ptr,
5268 new_value_reg,
5269 old_value,
5270 store_result,
5271 expected_reg);
5272 __ Bind(exit_loop);
5273
5274 if (return_success) {
5275 if (strong) {
5276 __ Cset(out.W(), eq);
5277 } else {
5278 // On success, the Z flag is set and the store result is 1, see GenerateCompareAndSet().
5279 // On failure, either the Z flag is clear or the store result is 0.
5280 // Determine the final success value with a CSEL.
5281 __ Csel(out.W(), store_result, wzr, eq);
5282 }
5283 } else if (byte_swap) {
5284 // Also handles moving to FP registers.
5285 GenerateReverseBytes(masm, value_type, old_value, out);
5286 } else if (DataType::IsFloatingPointType(value_type)) {
5287 __ Fmov((value_type == DataType::Type::kFloat64) ? out.D() : out.S(), old_value);
5288 } else if (value_type == DataType::Type::kInt8) {
5289 __ Sxtb(out.W(), old_value);
5290 } else if (value_type == DataType::Type::kInt16) {
5291 __ Sxth(out.W(), old_value);
5292 }
5293
5294 if (slow_path != nullptr) {
5295 DCHECK(!byte_swap);
5296 __ Bind(slow_path->GetExitLabel());
5297 }
5298 }
5299
VisitVarHandleCompareAndExchange(HInvoke * invoke)5300 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5301 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5302 }
5303
VisitVarHandleCompareAndExchange(HInvoke * invoke)5304 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5305 GenerateVarHandleCompareAndSetOrExchange(
5306 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
5307 }
5308
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5309 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5310 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5311 }
5312
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5313 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5314 GenerateVarHandleCompareAndSetOrExchange(
5315 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
5316 }
5317
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5318 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5319 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5320 }
5321
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5322 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5323 GenerateVarHandleCompareAndSetOrExchange(
5324 invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
5325 }
5326
VisitVarHandleCompareAndSet(HInvoke * invoke)5327 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5328 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5329 }
5330
VisitVarHandleCompareAndSet(HInvoke * invoke)5331 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5332 GenerateVarHandleCompareAndSetOrExchange(
5333 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
5334 }
5335
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5336 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5337 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5338 }
5339
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5340 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5341 GenerateVarHandleCompareAndSetOrExchange(
5342 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
5343 }
5344
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5345 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5346 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5347 }
5348
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5349 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5350 GenerateVarHandleCompareAndSetOrExchange(
5351 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
5352 }
5353
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5354 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5355 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5356 }
5357
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5358 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5359 GenerateVarHandleCompareAndSetOrExchange(
5360 invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
5361 }
5362
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5363 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5364 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5365 }
5366
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5367 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5368 GenerateVarHandleCompareAndSetOrExchange(
5369 invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
5370 }
5371
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op)5372 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
5373 CodeGeneratorARM64* codegen,
5374 GetAndUpdateOp get_and_update_op) {
5375 VarHandleOptimizations optimizations(invoke);
5376 if (optimizations.GetDoNotIntrinsify()) {
5377 return;
5378 }
5379
5380 if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5381 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5382 // the passed reference and reloads it from the field, thus seeing the new value
5383 // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
5384 return;
5385 }
5386
5387 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
5388
5389 size_t old_temp_count = locations->GetTempCount();
5390 DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
5391 if (DataType::IsFloatingPointType(invoke->GetType())) {
5392 if (get_and_update_op == GetAndUpdateOp::kAdd) {
5393 // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
5394 locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister());
5395 } else {
5396 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
5397 // We can reuse the declaring class temporary if present.
5398 if (old_temp_count == 1u &&
5399 !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5400 // Add a temporary for `old_value` if floating point `new_value` takes a scratch register.
5401 locations->AddTemp(Location::RequiresRegister());
5402 }
5403 }
5404 }
5405 // We need a temporary for the byte-swap path for bitwise operations unless the argument is a
5406 // zero which does not need a byte-swap. We can reuse the declaring class temporary if present.
5407 if (old_temp_count == 1u &&
5408 (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) &&
5409 GetExpectedVarHandleCoordinatesCount(invoke) == 2u &&
5410 !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5411 DataType::Type value_type =
5412 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5413 if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) {
5414 locations->AddTemp(Location::RequiresRegister());
5415 }
5416 }
5417 }
5418
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)5419 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5420 CodeGeneratorARM64* codegen,
5421 GetAndUpdateOp get_and_update_op,
5422 std::memory_order order,
5423 bool byte_swap = false) {
5424 uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5425 DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5426 bool is_fp = DataType::IsFloatingPointType(value_type);
5427
5428 MacroAssembler* masm = codegen->GetVIXLAssembler();
5429 LocationSummary* locations = invoke->GetLocations();
5430 CPURegister arg = (is_fp && get_and_update_op == GetAndUpdateOp::kAdd)
5431 ? InputCPURegisterAt(invoke, arg_index)
5432 : InputCPURegisterOrZeroRegAt(invoke, arg_index);
5433 CPURegister out = helpers::OutputCPURegister(invoke);
5434
5435 VarHandleTarget target = GetVarHandleTarget(invoke);
5436 VarHandleSlowPathARM64* slow_path = nullptr;
5437 if (!byte_swap) {
5438 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5439 GenerateVarHandleTarget(invoke, target, codegen);
5440 if (slow_path != nullptr) {
5441 slow_path->SetGetAndUpdateOp(get_and_update_op);
5442 __ Bind(slow_path->GetNativeByteOrderLabel());
5443 }
5444 }
5445
5446 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
5447 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5448 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
5449 // Mark card for object, the new value shall be stored.
5450 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
5451 codegen->MaybeMarkGCCard(target.object, arg.W(), new_value_can_be_null);
5452 }
5453
5454 // Reuse the `target.offset` temporary for the pointer to the target location,
5455 // except for references that need the offset for the non-Baker read barrier.
5456 UseScratchRegisterScope temps(masm);
5457 Register tmp_ptr = target.offset.X();
5458 if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5459 tmp_ptr = temps.AcquireX();
5460 }
5461 __ Add(tmp_ptr, target.object.X(), target.offset.X());
5462
5463 // The load/store type is never floating point.
5464 DataType::Type load_store_type = is_fp
5465 ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
5466 : value_type;
5467 // Avoid sign extension in the CAS loop. Sign-extend after the loop.
5468 // Note: Using unsigned values yields the same value to store (we do not store higher bits).
5469 if (value_type == DataType::Type::kInt8) {
5470 load_store_type = DataType::Type::kUint8;
5471 } else if (value_type == DataType::Type::kInt16) {
5472 load_store_type = DataType::Type::kUint16;
5473 }
5474
5475 // Prepare register for old value.
5476 CPURegister old_value = out;
5477 if (get_and_update_op == GetAndUpdateOp::kSet) {
5478 // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5479 // rather than moving between core and FP registers in the loop.
5480 arg = MoveToTempIfFpRegister(arg, value_type, masm, &temps);
5481 if (is_fp && !arg.IsZero()) {
5482 // We need a temporary register but we have already used a scratch register for
5483 // the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one
5484 // in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that.
5485 old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type);
5486 } else if (value_type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
5487 // Load the old value initially to a scratch register.
5488 // We shall move it to `out` later with a read barrier.
5489 old_value = temps.AcquireW();
5490 }
5491 }
5492
5493 if (byte_swap) {
5494 DCHECK_NE(value_type, DataType::Type::kReference);
5495 DCHECK_NE(DataType::Size(value_type), 1u);
5496 if (get_and_update_op == GetAndUpdateOp::kAdd) {
5497 // We need to do the byte swapping in the CAS loop for GetAndAdd.
5498 get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5499 } else if (!arg.IsZero()) {
5500 // For other operations, avoid byte swap inside the CAS loop by providing an adjusted `arg`.
5501 // For GetAndSet use a scratch register; FP argument is already in a scratch register.
5502 // For bitwise operations GenerateGetAndUpdate() needs both scratch registers;
5503 // we have allocated a normal temporary to handle that.
5504 CPURegister temp = (get_and_update_op == GetAndUpdateOp::kSet)
5505 ? (is_fp ? arg : (arg.Is64Bits() ? temps.AcquireX() : temps.AcquireW()))
5506 : CPURegisterFrom(locations->GetTemp(1u), load_store_type);
5507 GenerateReverseBytes(masm, load_store_type, arg, temp);
5508 arg = temp;
5509 }
5510 }
5511
5512 GenerateGetAndUpdate(codegen, get_and_update_op, load_store_type, order, tmp_ptr, arg, old_value);
5513
5514 if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
5515 // The only adjustment needed is sign-extension for `kInt16`.
5516 // Everything else has been done by the `GenerateGetAndUpdate()`.
5517 DCHECK(byte_swap);
5518 if (value_type == DataType::Type::kInt16) {
5519 DCHECK_EQ(load_store_type, DataType::Type::kUint16);
5520 __ Sxth(out.W(), old_value.W());
5521 }
5522 } else if (byte_swap) {
5523 // Also handles moving to FP registers.
5524 GenerateReverseBytes(masm, value_type, old_value, out);
5525 } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat64) {
5526 __ Fmov(out.D(), old_value.X());
5527 } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat32) {
5528 __ Fmov(out.S(), old_value.W());
5529 } else if (value_type == DataType::Type::kInt8) {
5530 __ Sxtb(out.W(), old_value.W());
5531 } else if (value_type == DataType::Type::kInt16) {
5532 __ Sxth(out.W(), old_value.W());
5533 } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5534 if (kUseBakerReadBarrier) {
5535 codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), old_value.W());
5536 } else {
5537 codegen->GenerateReadBarrierSlow(
5538 invoke,
5539 Location::RegisterLocation(out.GetCode()),
5540 Location::RegisterLocation(old_value.GetCode()),
5541 Location::RegisterLocation(target.object.GetCode()),
5542 /*offset=*/ 0u,
5543 /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
5544 }
5545 }
5546
5547 if (slow_path != nullptr) {
5548 DCHECK(!byte_swap);
5549 __ Bind(slow_path->GetExitLabel());
5550 }
5551 }
5552
VisitVarHandleGetAndSet(HInvoke * invoke)5553 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5554 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5555 }
5556
VisitVarHandleGetAndSet(HInvoke * invoke)5557 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5558 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
5559 }
5560
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5561 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5562 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5563 }
5564
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5565 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5566 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
5567 }
5568
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5569 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5570 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5571 }
5572
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5573 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5574 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
5575 }
5576
VisitVarHandleGetAndAdd(HInvoke * invoke)5577 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5578 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5579 }
5580
VisitVarHandleGetAndAdd(HInvoke * invoke)5581 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5582 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5583 }
5584
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5585 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5586 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5587 }
5588
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5589 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5590 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5591 }
5592
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5593 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5594 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5595 }
5596
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5597 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5598 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5599 }
5600
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5601 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5602 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5603 }
5604
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5605 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5606 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5607 }
5608
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5609 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5610 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5611 }
5612
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5613 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5614 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5615 }
5616
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5617 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5618 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5619 }
5620
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5621 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5622 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5623 }
5624
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5625 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5626 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5627 }
5628
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5629 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5630 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5631 }
5632
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5633 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5634 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5635 }
5636
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5637 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5638 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5639 }
5640
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5641 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5642 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5643 }
5644
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5645 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5646 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5647 }
5648
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5649 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5650 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5651 }
5652
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5653 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5654 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5655 }
5656
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5657 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5658 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5659 }
5660
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5661 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5662 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5663 }
5664
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5665 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5666 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5667 }
5668
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5669 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5670 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5671 }
5672
EmitByteArrayViewCode(CodeGenerator * codegen_in)5673 void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5674 DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5675 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
5676 MacroAssembler* masm = codegen->GetVIXLAssembler();
5677 HInvoke* invoke = GetInvoke();
5678 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5679 DataType::Type value_type =
5680 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5681 DCHECK_NE(value_type, DataType::Type::kReference);
5682 size_t size = DataType::Size(value_type);
5683 DCHECK_GT(size, 1u);
5684 Register varhandle = InputRegisterAt(invoke, 0);
5685 Register object = InputRegisterAt(invoke, 1);
5686 Register index = InputRegisterAt(invoke, 2);
5687
5688 MemberOffset class_offset = mirror::Object::ClassOffset();
5689 MemberOffset array_length_offset = mirror::Array::LengthOffset();
5690 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5691 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5692
5693 __ Bind(GetByteArrayViewCheckLabel());
5694
5695 VarHandleTarget target = GetVarHandleTarget(invoke);
5696 {
5697 UseScratchRegisterScope temps(masm);
5698 Register temp = temps.AcquireW();
5699 Register temp2 = temps.AcquireW();
5700
5701 // The main path checked that the coordinateType0 is an array class that matches
5702 // the class of the actual coordinate argument but it does not match the value type.
5703 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5704 __ Ldr(temp, HeapOperand(varhandle, class_offset.Int32Value()));
5705 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
5706 codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5707 __ Cmp(temp, temp2);
5708 __ B(GetEntryLabel(), ne);
5709
5710 // Check for array index out of bounds.
5711 __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
5712 __ Subs(temp, temp, index);
5713 __ Ccmp(temp, size, NoFlag, hs); // If SUBS yields LO (C=false), keep the C flag clear.
5714 __ B(GetEntryLabel(), lo);
5715
5716 // Construct the target.
5717 __ Add(target.offset, index, data_offset.Int32Value());
5718
5719 // Alignment check. For unaligned access, go to the runtime.
5720 DCHECK(IsPowerOfTwo(size));
5721 if (size == 2u) {
5722 __ Tbnz(target.offset, 0, GetEntryLabel());
5723 } else {
5724 __ Tst(target.offset, size - 1u);
5725 __ B(GetEntryLabel(), ne);
5726 }
5727
5728 // Byte order check. For native byte order return to the main path.
5729 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5730 IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5731 // There is no reason to differentiate between native byte order and byte-swap
5732 // for setting a zero bit pattern. Just return to the main path.
5733 __ B(GetNativeByteOrderLabel());
5734 return;
5735 }
5736 __ Ldr(temp, HeapOperand(varhandle, native_byte_order_offset.Int32Value()));
5737 __ Cbnz(temp, GetNativeByteOrderLabel());
5738 }
5739
5740 switch (access_mode_template) {
5741 case mirror::VarHandle::AccessModeTemplate::kGet:
5742 GenerateVarHandleGet(invoke, codegen, order_, /*byte_swap=*/ true);
5743 break;
5744 case mirror::VarHandle::AccessModeTemplate::kSet:
5745 GenerateVarHandleSet(invoke, codegen, order_, /*byte_swap=*/ true);
5746 break;
5747 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5748 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5749 GenerateVarHandleCompareAndSetOrExchange(
5750 invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5751 break;
5752 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5753 GenerateVarHandleGetAndUpdate(
5754 invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5755 break;
5756 }
5757 __ B(GetExitLabel());
5758 }
5759
5760 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARM64, Name)
5761 UNIMPLEMENTED_INTRINSIC_LIST_ARM64(MARK_UNIMPLEMENTED);
5762 #undef MARK_UNIMPLEMENTED
5763
5764 UNREACHABLE_INTRINSICS(ARM64)
5765
5766 #undef __
5767
5768 } // namespace arm64
5769 } // namespace art
5770