1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm64.h"
18 
19 #include "arch/arm64/callee_save_frame_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "art_method.h"
22 #include "base/bit_utils.h"
23 #include "code_generator_arm64.h"
24 #include "common_arm64.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsics.h"
29 #include "intrinsics_utils.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/object_array-inl.h"
33 #include "mirror/reference.h"
34 #include "mirror/string-inl.h"
35 #include "mirror/var_handle.h"
36 #include "scoped_thread_state_change-inl.h"
37 #include "thread-current-inl.h"
38 #include "utils/arm64/assembler_arm64.h"
39 
40 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
41 
42 // TODO(VIXL): Make VIXL compile with -Wshadow.
43 #pragma GCC diagnostic push
44 #pragma GCC diagnostic ignored "-Wshadow"
45 #include "aarch64/disasm-aarch64.h"
46 #include "aarch64/macro-assembler-aarch64.h"
47 #pragma GCC diagnostic pop
48 
49 namespace art {
50 
51 namespace arm64 {
52 
53 using helpers::CPURegisterFrom;
54 using helpers::DRegisterFrom;
55 using helpers::HeapOperand;
56 using helpers::LocationFrom;
57 using helpers::InputCPURegisterOrZeroRegAt;
58 using helpers::IsConstantZeroBitPattern;
59 using helpers::OperandFrom;
60 using helpers::RegisterFrom;
61 using helpers::SRegisterFrom;
62 using helpers::WRegisterFrom;
63 using helpers::XRegisterFrom;
64 using helpers::HRegisterFrom;
65 using helpers::InputRegisterAt;
66 using helpers::OutputRegister;
67 
68 namespace {
69 
AbsoluteHeapOperandFrom(Location location,size_t offset=0)70 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
71   return MemOperand(XRegisterFrom(location), offset);
72 }
73 
74 }  // namespace
75 
GetVIXLAssembler()76 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
77   return codegen_->GetVIXLAssembler();
78 }
79 
GetAllocator()80 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
81   return codegen_->GetGraph()->GetAllocator();
82 }
83 
84 using IntrinsicSlowPathARM64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM64,
85                                                  SlowPathCodeARM64,
86                                                  Arm64Assembler>;
87 
88 #define __ codegen->GetVIXLAssembler()->
89 
90 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
91 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
92  public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction * instruction,Location tmp)93   ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
94       : SlowPathCodeARM64(instruction), tmp_(tmp) {
95     DCHECK(kEmitCompilerReadBarrier);
96     DCHECK(kUseBakerReadBarrier);
97   }
98 
EmitNativeCode(CodeGenerator * codegen_in)99   void EmitNativeCode(CodeGenerator* codegen_in) override {
100     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
101     LocationSummary* locations = instruction_->GetLocations();
102     DCHECK(locations->CanCall());
103     DCHECK(instruction_->IsInvokeStaticOrDirect())
104         << "Unexpected instruction in read barrier arraycopy slow path: "
105         << instruction_->DebugName();
106     DCHECK(instruction_->GetLocations()->Intrinsified());
107     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
108 
109     const int32_t element_size = DataType::Size(DataType::Type::kReference);
110 
111     Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
112     Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
113     Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
114     Register tmp_reg = WRegisterFrom(tmp_);
115 
116     __ Bind(GetEntryLabel());
117     vixl::aarch64::Label slow_copy_loop;
118     __ Bind(&slow_copy_loop);
119     __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
120     codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
121     // TODO: Inline the mark bit check before calling the runtime?
122     // tmp_reg = ReadBarrier::Mark(tmp_reg);
123     // No need to save live registers; it's taken care of by the
124     // entrypoint. Also, there is no need to update the stack mask,
125     // as this runtime call will not trigger a garbage collection.
126     // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
127     // explanations.)
128     DCHECK_NE(tmp_.reg(), LR);
129     DCHECK_NE(tmp_.reg(), WSP);
130     DCHECK_NE(tmp_.reg(), WZR);
131     // IP0 is used internally by the ReadBarrierMarkRegX entry point
132     // as a temporary (and not preserved).  It thus cannot be used by
133     // any live register in this slow path.
134     DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
135     DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
136     DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
137     DCHECK_NE(tmp_.reg(), IP0);
138     DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
139     // TODO: Load the entrypoint once before the loop, instead of
140     // loading it at every iteration.
141     int32_t entry_point_offset =
142         Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
143     // This runtime call does not require a stack map.
144     codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
145     codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
146     __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
147     __ Cmp(src_curr_addr, src_stop_addr);
148     __ B(&slow_copy_loop, ne);
149     __ B(GetExitLabel());
150   }
151 
GetDescription() const152   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
153 
154  private:
155   Location tmp_;
156 
157   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
158 };
159 #undef __
160 
TryDispatch(HInvoke * invoke)161 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
162   Dispatch(invoke);
163   LocationSummary* res = invoke->GetLocations();
164   if (res == nullptr) {
165     return false;
166   }
167   return res->Intrinsified();
168 }
169 
170 #define __ masm->
171 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)172 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
173   LocationSummary* locations =
174       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
175   locations->SetInAt(0, Location::RequiresFpuRegister());
176   locations->SetOut(Location::RequiresRegister());
177 }
178 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)179 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
180   LocationSummary* locations =
181       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
182   locations->SetInAt(0, Location::RequiresRegister());
183   locations->SetOut(Location::RequiresFpuRegister());
184 }
185 
MoveFPToInt(LocationSummary * locations,bool is64bit,MacroAssembler * masm)186 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
187   Location input = locations->InAt(0);
188   Location output = locations->Out();
189   __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
190           is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
191 }
192 
MoveIntToFP(LocationSummary * locations,bool is64bit,MacroAssembler * masm)193 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
194   Location input = locations->InAt(0);
195   Location output = locations->Out();
196   __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
197           is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
198 }
199 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)200 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
201   CreateFPToIntLocations(allocator_, invoke);
202 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)203 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
204   CreateIntToFPLocations(allocator_, invoke);
205 }
206 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)207 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
208   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
209 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)210 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
211   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
212 }
213 
VisitFloatFloatToRawIntBits(HInvoke * invoke)214 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
215   CreateFPToIntLocations(allocator_, invoke);
216 }
VisitFloatIntBitsToFloat(HInvoke * invoke)217 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
218   CreateIntToFPLocations(allocator_, invoke);
219 }
220 
VisitFloatFloatToRawIntBits(HInvoke * invoke)221 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
222   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
223 }
VisitFloatIntBitsToFloat(HInvoke * invoke)224 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
225   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
226 }
227 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)228 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
229   LocationSummary* locations =
230       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
231   locations->SetInAt(0, Location::RequiresRegister());
232   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
233 }
234 
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)235 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
236   LocationSummary* locations =
237       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
238   locations->SetInAt(0, Location::RequiresRegister());
239   locations->SetInAt(1, Location::RequiresRegister());
240   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
241 }
242 
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)243 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
244   LocationSummary* locations =
245       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
246   locations->SetInAt(0, Location::RequiresRegister());
247   locations->SetInAt(1, Location::RequiresRegister());
248   // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
249   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
250 }
251 
GenerateReverseBytes(MacroAssembler * masm,DataType::Type type,CPURegister in,CPURegister out)252 static void GenerateReverseBytes(MacroAssembler* masm,
253                                  DataType::Type type,
254                                  CPURegister in,
255                                  CPURegister out) {
256   switch (type) {
257     case DataType::Type::kUint16:
258       __ Rev16(out.W(), in.W());
259       break;
260     case DataType::Type::kInt16:
261       __ Rev16(out.W(), in.W());
262       __ Sxth(out.W(), out.W());
263       break;
264     case DataType::Type::kInt32:
265       __ Rev(out.W(), in.W());
266       break;
267     case DataType::Type::kInt64:
268       __ Rev(out.X(), in.X());
269       break;
270     case DataType::Type::kFloat32:
271       __ Rev(in.W(), in.W());  // Note: Clobbers `in`.
272       __ Fmov(out.S(), in.W());
273       break;
274     case DataType::Type::kFloat64:
275       __ Rev(in.X(), in.X());  // Note: Clobbers `in`.
276       __ Fmov(out.D(), in.X());
277       break;
278     default:
279       LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
280       UNREACHABLE();
281   }
282 }
283 
GenReverseBytes(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)284 static void GenReverseBytes(LocationSummary* locations,
285                             DataType::Type type,
286                             MacroAssembler* masm) {
287   Location in = locations->InAt(0);
288   Location out = locations->Out();
289   GenerateReverseBytes(masm, type, CPURegisterFrom(in, type), CPURegisterFrom(out, type));
290 }
291 
VisitIntegerReverseBytes(HInvoke * invoke)292 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
293   CreateIntToIntLocations(allocator_, invoke);
294 }
295 
VisitIntegerReverseBytes(HInvoke * invoke)296 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
297   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
298 }
299 
VisitLongReverseBytes(HInvoke * invoke)300 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
301   CreateIntToIntLocations(allocator_, invoke);
302 }
303 
VisitLongReverseBytes(HInvoke * invoke)304 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
305   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
306 }
307 
VisitShortReverseBytes(HInvoke * invoke)308 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
309   CreateIntToIntLocations(allocator_, invoke);
310 }
311 
VisitShortReverseBytes(HInvoke * invoke)312 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
313   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
314 }
315 
GenNumberOfLeadingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)316 static void GenNumberOfLeadingZeros(LocationSummary* locations,
317                                     DataType::Type type,
318                                     MacroAssembler* masm) {
319   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
320 
321   Location in = locations->InAt(0);
322   Location out = locations->Out();
323 
324   __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
325 }
326 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)327 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
328   CreateIntToIntLocations(allocator_, invoke);
329 }
330 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)331 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
332   GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
333 }
334 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)335 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
336   CreateIntToIntLocations(allocator_, invoke);
337 }
338 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)339 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
340   GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
341 }
342 
GenNumberOfTrailingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)343 static void GenNumberOfTrailingZeros(LocationSummary* locations,
344                                      DataType::Type type,
345                                      MacroAssembler* masm) {
346   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
347 
348   Location in = locations->InAt(0);
349   Location out = locations->Out();
350 
351   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
352   __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
353 }
354 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)355 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
356   CreateIntToIntLocations(allocator_, invoke);
357 }
358 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)359 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
360   GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
361 }
362 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)363 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
364   CreateIntToIntLocations(allocator_, invoke);
365 }
366 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)367 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
368   GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
369 }
370 
GenReverse(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)371 static void GenReverse(LocationSummary* locations,
372                        DataType::Type type,
373                        MacroAssembler* masm) {
374   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
375 
376   Location in = locations->InAt(0);
377   Location out = locations->Out();
378 
379   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
380 }
381 
VisitIntegerReverse(HInvoke * invoke)382 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
383   CreateIntToIntLocations(allocator_, invoke);
384 }
385 
VisitIntegerReverse(HInvoke * invoke)386 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
387   GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
388 }
389 
VisitLongReverse(HInvoke * invoke)390 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
391   CreateIntToIntLocations(allocator_, invoke);
392 }
393 
VisitLongReverse(HInvoke * invoke)394 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
395   GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
396 }
397 
GenBitCount(HInvoke * instr,DataType::Type type,MacroAssembler * masm)398 static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) {
399   DCHECK(DataType::IsIntOrLongType(type)) << type;
400   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
401   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
402 
403   UseScratchRegisterScope temps(masm);
404 
405   Register src = InputRegisterAt(instr, 0);
406   Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
407   VRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS();
408 
409   __ Fmov(fpr, src);
410   __ Cnt(fpr.V8B(), fpr.V8B());
411   __ Addv(fpr.B(), fpr.V8B());
412   __ Fmov(dst, fpr);
413 }
414 
VisitLongBitCount(HInvoke * invoke)415 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
416   CreateIntToIntLocations(allocator_, invoke);
417 }
418 
VisitLongBitCount(HInvoke * invoke)419 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
420   GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler());
421 }
422 
VisitIntegerBitCount(HInvoke * invoke)423 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
424   CreateIntToIntLocations(allocator_, invoke);
425 }
426 
VisitIntegerBitCount(HInvoke * invoke)427 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
428   GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler());
429 }
430 
GenHighestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)431 static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
432   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
433 
434   UseScratchRegisterScope temps(masm);
435 
436   Register src = InputRegisterAt(invoke, 0);
437   Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
438   Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
439   size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u;
440   size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u;
441 
442   __ Clz(temp, src);
443   __ Mov(dst, UINT64_C(1) << high_bit);  // MOV (bitmask immediate)
444   __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit));  // Clear dst if src was 0.
445   __ Lsr(dst, dst, temp);
446 }
447 
VisitIntegerHighestOneBit(HInvoke * invoke)448 void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
449   CreateIntToIntLocations(allocator_, invoke);
450 }
451 
VisitIntegerHighestOneBit(HInvoke * invoke)452 void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
453   GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
454 }
455 
VisitLongHighestOneBit(HInvoke * invoke)456 void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
457   CreateIntToIntLocations(allocator_, invoke);
458 }
459 
VisitLongHighestOneBit(HInvoke * invoke)460 void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
461   GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
462 }
463 
GenLowestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)464 static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
465   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
466 
467   UseScratchRegisterScope temps(masm);
468 
469   Register src = InputRegisterAt(invoke, 0);
470   Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
471   Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
472 
473   __ Neg(temp, src);
474   __ And(dst, temp, src);
475 }
476 
VisitIntegerLowestOneBit(HInvoke * invoke)477 void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
478   CreateIntToIntLocations(allocator_, invoke);
479 }
480 
VisitIntegerLowestOneBit(HInvoke * invoke)481 void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
482   GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
483 }
484 
VisitLongLowestOneBit(HInvoke * invoke)485 void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
486   CreateIntToIntLocations(allocator_, invoke);
487 }
488 
VisitLongLowestOneBit(HInvoke * invoke)489 void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
490   GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
491 }
492 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)493 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
494   LocationSummary* locations =
495       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
496   locations->SetInAt(0, Location::RequiresFpuRegister());
497   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
498 }
499 
VisitMathSqrt(HInvoke * invoke)500 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
501   CreateFPToFPLocations(allocator_, invoke);
502 }
503 
VisitMathSqrt(HInvoke * invoke)504 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
505   LocationSummary* locations = invoke->GetLocations();
506   MacroAssembler* masm = GetVIXLAssembler();
507   __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
508 }
509 
VisitMathCeil(HInvoke * invoke)510 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
511   CreateFPToFPLocations(allocator_, invoke);
512 }
513 
VisitMathCeil(HInvoke * invoke)514 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
515   LocationSummary* locations = invoke->GetLocations();
516   MacroAssembler* masm = GetVIXLAssembler();
517   __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
518 }
519 
VisitMathFloor(HInvoke * invoke)520 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
521   CreateFPToFPLocations(allocator_, invoke);
522 }
523 
VisitMathFloor(HInvoke * invoke)524 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
525   LocationSummary* locations = invoke->GetLocations();
526   MacroAssembler* masm = GetVIXLAssembler();
527   __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
528 }
529 
VisitMathRint(HInvoke * invoke)530 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
531   CreateFPToFPLocations(allocator_, invoke);
532 }
533 
VisitMathRint(HInvoke * invoke)534 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
535   LocationSummary* locations = invoke->GetLocations();
536   MacroAssembler* masm = GetVIXLAssembler();
537   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
538 }
539 
CreateFPToIntPlusFPTempLocations(ArenaAllocator * allocator,HInvoke * invoke)540 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) {
541   LocationSummary* locations =
542       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
543   locations->SetInAt(0, Location::RequiresFpuRegister());
544   locations->SetOut(Location::RequiresRegister());
545   locations->AddTemp(Location::RequiresFpuRegister());
546 }
547 
GenMathRound(HInvoke * invoke,bool is_double,vixl::aarch64::MacroAssembler * masm)548 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
549   // Java 8 API definition for Math.round():
550   // Return the closest long or int to the argument, with ties rounding to positive infinity.
551   //
552   // There is no single instruction in ARMv8 that can support the above definition.
553   // We choose to use FCVTAS here, because it has closest semantic.
554   // FCVTAS performs rounding to nearest integer, ties away from zero.
555   // For most inputs (positive values, zero or NaN), this instruction is enough.
556   // We only need a few handling code after FCVTAS if the input is negative half value.
557   //
558   // The reason why we didn't choose FCVTPS instruction here is that
559   // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
560   // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
561   // If we were using this instruction, for most inputs, more handling code would be needed.
562   LocationSummary* l = invoke->GetLocations();
563   VRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
564   VRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
565   Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
566   vixl::aarch64::Label done;
567 
568   // Round to nearest integer, ties away from zero.
569   __ Fcvtas(out_reg, in_reg);
570 
571   // For positive values, zero or NaN inputs, rounding is done.
572   __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
573 
574   // Handle input < 0 cases.
575   // If input is negative but not a tie, previous result (round to nearest) is valid.
576   // If input is a negative tie, out_reg += 1.
577   __ Frinta(tmp_fp, in_reg);
578   __ Fsub(tmp_fp, in_reg, tmp_fp);
579   __ Fcmp(tmp_fp, 0.5);
580   __ Cinc(out_reg, out_reg, eq);
581 
582   __ Bind(&done);
583 }
584 
VisitMathRoundDouble(HInvoke * invoke)585 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
586   CreateFPToIntPlusFPTempLocations(allocator_, invoke);
587 }
588 
VisitMathRoundDouble(HInvoke * invoke)589 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
590   GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler());
591 }
592 
VisitMathRoundFloat(HInvoke * invoke)593 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
594   CreateFPToIntPlusFPTempLocations(allocator_, invoke);
595 }
596 
VisitMathRoundFloat(HInvoke * invoke)597 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
598   GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler());
599 }
600 
VisitMemoryPeekByte(HInvoke * invoke)601 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
602   CreateIntToIntLocations(allocator_, invoke);
603 }
604 
VisitMemoryPeekByte(HInvoke * invoke)605 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
606   MacroAssembler* masm = GetVIXLAssembler();
607   __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
608           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
609 }
610 
VisitMemoryPeekIntNative(HInvoke * invoke)611 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
612   CreateIntToIntLocations(allocator_, invoke);
613 }
614 
VisitMemoryPeekIntNative(HInvoke * invoke)615 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
616   MacroAssembler* masm = GetVIXLAssembler();
617   __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
618          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
619 }
620 
VisitMemoryPeekLongNative(HInvoke * invoke)621 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
622   CreateIntToIntLocations(allocator_, invoke);
623 }
624 
VisitMemoryPeekLongNative(HInvoke * invoke)625 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
626   MacroAssembler* masm = GetVIXLAssembler();
627   __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
628          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
629 }
630 
VisitMemoryPeekShortNative(HInvoke * invoke)631 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
632   CreateIntToIntLocations(allocator_, invoke);
633 }
634 
VisitMemoryPeekShortNative(HInvoke * invoke)635 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
636   MacroAssembler* masm = GetVIXLAssembler();
637   __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
638            AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
639 }
640 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)641 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
642   LocationSummary* locations =
643       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
644   locations->SetInAt(0, Location::RequiresRegister());
645   locations->SetInAt(1, Location::RequiresRegister());
646 }
647 
VisitMemoryPokeByte(HInvoke * invoke)648 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
649   CreateIntIntToVoidLocations(allocator_, invoke);
650 }
651 
VisitMemoryPokeByte(HInvoke * invoke)652 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
653   MacroAssembler* masm = GetVIXLAssembler();
654   __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
655           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
656 }
657 
VisitMemoryPokeIntNative(HInvoke * invoke)658 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
659   CreateIntIntToVoidLocations(allocator_, invoke);
660 }
661 
VisitMemoryPokeIntNative(HInvoke * invoke)662 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
663   MacroAssembler* masm = GetVIXLAssembler();
664   __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
665          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
666 }
667 
VisitMemoryPokeLongNative(HInvoke * invoke)668 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
669   CreateIntIntToVoidLocations(allocator_, invoke);
670 }
671 
VisitMemoryPokeLongNative(HInvoke * invoke)672 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
673   MacroAssembler* masm = GetVIXLAssembler();
674   __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
675          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
676 }
677 
VisitMemoryPokeShortNative(HInvoke * invoke)678 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
679   CreateIntIntToVoidLocations(allocator_, invoke);
680 }
681 
VisitMemoryPokeShortNative(HInvoke * invoke)682 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
683   MacroAssembler* masm = GetVIXLAssembler();
684   __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
685           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
686 }
687 
VisitThreadCurrentThread(HInvoke * invoke)688 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
689   LocationSummary* locations =
690       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
691   locations->SetOut(Location::RequiresRegister());
692 }
693 
VisitThreadCurrentThread(HInvoke * invoke)694 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
695   codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()),
696                  MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
697 }
698 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARM64 * codegen)699 static void GenUnsafeGet(HInvoke* invoke,
700                          DataType::Type type,
701                          bool is_volatile,
702                          CodeGeneratorARM64* codegen) {
703   LocationSummary* locations = invoke->GetLocations();
704   DCHECK((type == DataType::Type::kInt32) ||
705          (type == DataType::Type::kInt64) ||
706          (type == DataType::Type::kReference));
707   Location base_loc = locations->InAt(1);
708   Register base = WRegisterFrom(base_loc);      // Object pointer.
709   Location offset_loc = locations->InAt(2);
710   Register offset = XRegisterFrom(offset_loc);  // Long offset.
711   Location trg_loc = locations->Out();
712   Register trg = RegisterFrom(trg_loc, type);
713 
714   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
715     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
716     Register temp = WRegisterFrom(locations->GetTemp(0));
717     MacroAssembler* masm = codegen->GetVIXLAssembler();
718     // Piggy-back on the field load path using introspection for the Baker read barrier.
719     __ Add(temp, base, offset.W());  // Offset should not exceed 32 bits.
720     codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
721                                                    trg_loc,
722                                                    base,
723                                                    MemOperand(temp.X()),
724                                                    /* needs_null_check= */ false,
725                                                    is_volatile);
726   } else {
727     // Other cases.
728     MemOperand mem_op(base.X(), offset);
729     if (is_volatile) {
730       codegen->LoadAcquire(invoke, type, trg, mem_op, /* needs_null_check= */ true);
731     } else {
732       codegen->Load(type, trg, mem_op);
733     }
734 
735     if (type == DataType::Type::kReference) {
736       DCHECK(trg.IsW());
737       codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
738     }
739   }
740 }
741 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)742 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
743   bool can_call = kEmitCompilerReadBarrier &&
744       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
745        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
746   LocationSummary* locations =
747       new (allocator) LocationSummary(invoke,
748                                       can_call
749                                           ? LocationSummary::kCallOnSlowPath
750                                           : LocationSummary::kNoCall,
751                                       kIntrinsified);
752   if (can_call && kUseBakerReadBarrier) {
753     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
754     // We need a temporary register for the read barrier load in order to use
755     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
756     locations->AddTemp(FixedTempLocation());
757   }
758   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
759   locations->SetInAt(1, Location::RequiresRegister());
760   locations->SetInAt(2, Location::RequiresRegister());
761   locations->SetOut(Location::RequiresRegister(),
762                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
763 }
764 
VisitUnsafeGet(HInvoke * invoke)765 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
766   CreateIntIntIntToIntLocations(allocator_, invoke);
767 }
VisitUnsafeGetVolatile(HInvoke * invoke)768 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
769   CreateIntIntIntToIntLocations(allocator_, invoke);
770 }
VisitUnsafeGetLong(HInvoke * invoke)771 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
772   CreateIntIntIntToIntLocations(allocator_, invoke);
773 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)774 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
775   CreateIntIntIntToIntLocations(allocator_, invoke);
776 }
VisitUnsafeGetObject(HInvoke * invoke)777 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
778   CreateIntIntIntToIntLocations(allocator_, invoke);
779 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)780 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
781   CreateIntIntIntToIntLocations(allocator_, invoke);
782 }
783 
VisitUnsafeGet(HInvoke * invoke)784 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
785   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
786 }
VisitUnsafeGetVolatile(HInvoke * invoke)787 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
788   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
789 }
VisitUnsafeGetLong(HInvoke * invoke)790 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
791   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
792 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)793 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
794   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
795 }
VisitUnsafeGetObject(HInvoke * invoke)796 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
797   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
798 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)799 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
800   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
801 }
802 
CreateIntIntIntIntToVoid(ArenaAllocator * allocator,HInvoke * invoke)803 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
804   LocationSummary* locations =
805       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
806   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
807   locations->SetInAt(1, Location::RequiresRegister());
808   locations->SetInAt(2, Location::RequiresRegister());
809   locations->SetInAt(3, Location::RequiresRegister());
810 }
811 
VisitUnsafePut(HInvoke * invoke)812 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
813   CreateIntIntIntIntToVoid(allocator_, invoke);
814 }
VisitUnsafePutOrdered(HInvoke * invoke)815 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
816   CreateIntIntIntIntToVoid(allocator_, invoke);
817 }
VisitUnsafePutVolatile(HInvoke * invoke)818 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
819   CreateIntIntIntIntToVoid(allocator_, invoke);
820 }
VisitUnsafePutObject(HInvoke * invoke)821 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
822   CreateIntIntIntIntToVoid(allocator_, invoke);
823 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)824 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
825   CreateIntIntIntIntToVoid(allocator_, invoke);
826 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)827 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
828   CreateIntIntIntIntToVoid(allocator_, invoke);
829 }
VisitUnsafePutLong(HInvoke * invoke)830 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
831   CreateIntIntIntIntToVoid(allocator_, invoke);
832 }
VisitUnsafePutLongOrdered(HInvoke * invoke)833 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
834   CreateIntIntIntIntToVoid(allocator_, invoke);
835 }
VisitUnsafePutLongVolatile(HInvoke * invoke)836 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
837   CreateIntIntIntIntToVoid(allocator_, invoke);
838 }
839 
GenUnsafePut(HInvoke * invoke,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM64 * codegen)840 static void GenUnsafePut(HInvoke* invoke,
841                          DataType::Type type,
842                          bool is_volatile,
843                          bool is_ordered,
844                          CodeGeneratorARM64* codegen) {
845   LocationSummary* locations = invoke->GetLocations();
846   MacroAssembler* masm = codegen->GetVIXLAssembler();
847 
848   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
849   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
850   Register value = RegisterFrom(locations->InAt(3), type);
851   Register source = value;
852   MemOperand mem_op(base.X(), offset);
853 
854   {
855     // We use a block to end the scratch scope before the write barrier, thus
856     // freeing the temporary registers so they can be used in `MarkGCCard`.
857     UseScratchRegisterScope temps(masm);
858 
859     if (kPoisonHeapReferences && type == DataType::Type::kReference) {
860       DCHECK(value.IsW());
861       Register temp = temps.AcquireW();
862       __ Mov(temp.W(), value.W());
863       codegen->GetAssembler()->PoisonHeapReference(temp.W());
864       source = temp;
865     }
866 
867     if (is_volatile || is_ordered) {
868       codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false);
869     } else {
870       codegen->Store(type, source, mem_op);
871     }
872   }
873 
874   if (type == DataType::Type::kReference) {
875     bool value_can_be_null = true;  // TODO: Worth finding out this information?
876     codegen->MarkGCCard(base, value, value_can_be_null);
877   }
878 }
879 
VisitUnsafePut(HInvoke * invoke)880 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
881   GenUnsafePut(invoke,
882                DataType::Type::kInt32,
883                /* is_volatile= */ false,
884                /* is_ordered= */ false,
885                codegen_);
886 }
VisitUnsafePutOrdered(HInvoke * invoke)887 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
888   GenUnsafePut(invoke,
889                DataType::Type::kInt32,
890                /* is_volatile= */ false,
891                /* is_ordered= */ true,
892                codegen_);
893 }
VisitUnsafePutVolatile(HInvoke * invoke)894 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
895   GenUnsafePut(invoke,
896                DataType::Type::kInt32,
897                /* is_volatile= */ true,
898                /* is_ordered= */ false,
899                codegen_);
900 }
VisitUnsafePutObject(HInvoke * invoke)901 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
902   GenUnsafePut(invoke,
903                DataType::Type::kReference,
904                /* is_volatile= */ false,
905                /* is_ordered= */ false,
906                codegen_);
907 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)908 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
909   GenUnsafePut(invoke,
910                DataType::Type::kReference,
911                /* is_volatile= */ false,
912                /* is_ordered= */ true,
913                codegen_);
914 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)915 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
916   GenUnsafePut(invoke,
917                DataType::Type::kReference,
918                /* is_volatile= */ true,
919                /* is_ordered= */ false,
920                codegen_);
921 }
VisitUnsafePutLong(HInvoke * invoke)922 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
923   GenUnsafePut(invoke,
924                DataType::Type::kInt64,
925                /* is_volatile= */ false,
926                /* is_ordered= */ false,
927                codegen_);
928 }
VisitUnsafePutLongOrdered(HInvoke * invoke)929 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
930   GenUnsafePut(invoke,
931                DataType::Type::kInt64,
932                /* is_volatile= */ false,
933                /* is_ordered= */ true,
934                codegen_);
935 }
VisitUnsafePutLongVolatile(HInvoke * invoke)936 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
937   GenUnsafePut(invoke,
938                DataType::Type::kInt64,
939                /* is_volatile= */ true,
940                /* is_ordered= */ false,
941                codegen_);
942 }
943 
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke)944 static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
945   bool can_call = kEmitCompilerReadBarrier &&
946       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
947   LocationSummary* locations =
948       new (allocator) LocationSummary(invoke,
949                                       can_call
950                                           ? LocationSummary::kCallOnSlowPath
951                                           : LocationSummary::kNoCall,
952                                       kIntrinsified);
953   if (can_call && kUseBakerReadBarrier) {
954     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
955   }
956   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
957   locations->SetInAt(1, Location::RequiresRegister());
958   locations->SetInAt(2, Location::RequiresRegister());
959   locations->SetInAt(3, Location::RequiresRegister());
960   locations->SetInAt(4, Location::RequiresRegister());
961 
962   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
963 }
964 
EmitLoadExclusive(CodeGeneratorARM64 * codegen,DataType::Type type,Register ptr,Register old_value,bool use_load_acquire)965 static void EmitLoadExclusive(CodeGeneratorARM64* codegen,
966                               DataType::Type type,
967                               Register ptr,
968                               Register old_value,
969                               bool use_load_acquire) {
970   Arm64Assembler* assembler = codegen->GetAssembler();
971   MacroAssembler* masm = assembler->GetVIXLAssembler();
972   switch (type) {
973     case DataType::Type::kBool:
974     case DataType::Type::kUint8:
975     case DataType::Type::kInt8:
976       if (use_load_acquire) {
977         __ Ldaxrb(old_value, MemOperand(ptr));
978       } else {
979         __ Ldxrb(old_value, MemOperand(ptr));
980       }
981       break;
982     case DataType::Type::kUint16:
983     case DataType::Type::kInt16:
984       if (use_load_acquire) {
985         __ Ldaxrh(old_value, MemOperand(ptr));
986       } else {
987         __ Ldxrh(old_value, MemOperand(ptr));
988       }
989       break;
990     case DataType::Type::kInt32:
991     case DataType::Type::kInt64:
992     case DataType::Type::kReference:
993       if (use_load_acquire) {
994         __ Ldaxr(old_value, MemOperand(ptr));
995       } else {
996         __ Ldxr(old_value, MemOperand(ptr));
997       }
998       break;
999     default:
1000       LOG(FATAL) << "Unexpected type: " << type;
1001       UNREACHABLE();
1002   }
1003   switch (type) {
1004     case DataType::Type::kInt8:
1005       __ Sxtb(old_value, old_value);
1006       break;
1007     case DataType::Type::kInt16:
1008       __ Sxth(old_value, old_value);
1009       break;
1010     case DataType::Type::kReference:
1011       assembler->MaybeUnpoisonHeapReference(old_value);
1012       break;
1013     default:
1014       break;
1015   }
1016 }
1017 
EmitStoreExclusive(CodeGeneratorARM64 * codegen,DataType::Type type,Register ptr,Register store_result,Register new_value,bool use_store_release)1018 static void EmitStoreExclusive(CodeGeneratorARM64* codegen,
1019                                DataType::Type type,
1020                                Register ptr,
1021                                Register store_result,
1022                                Register new_value,
1023                                bool use_store_release) {
1024   Arm64Assembler* assembler = codegen->GetAssembler();
1025   MacroAssembler* masm = assembler->GetVIXLAssembler();
1026   if (type == DataType::Type::kReference) {
1027     assembler->MaybePoisonHeapReference(new_value);
1028   }
1029   switch (type) {
1030     case DataType::Type::kBool:
1031     case DataType::Type::kUint8:
1032     case DataType::Type::kInt8:
1033       if (use_store_release) {
1034         __ Stlxrb(store_result, new_value, MemOperand(ptr));
1035       } else {
1036         __ Stxrb(store_result, new_value, MemOperand(ptr));
1037       }
1038       break;
1039     case DataType::Type::kUint16:
1040     case DataType::Type::kInt16:
1041       if (use_store_release) {
1042         __ Stlxrh(store_result, new_value, MemOperand(ptr));
1043       } else {
1044         __ Stxrh(store_result, new_value, MemOperand(ptr));
1045       }
1046       break;
1047     case DataType::Type::kInt32:
1048     case DataType::Type::kInt64:
1049     case DataType::Type::kReference:
1050       if (use_store_release) {
1051         __ Stlxr(store_result, new_value, MemOperand(ptr));
1052       } else {
1053         __ Stxr(store_result, new_value, MemOperand(ptr));
1054       }
1055       break;
1056     default:
1057       LOG(FATAL) << "Unexpected type: " << type;
1058       UNREACHABLE();
1059   }
1060   if (type == DataType::Type::kReference) {
1061     assembler->MaybeUnpoisonHeapReference(new_value);
1062   }
1063 }
1064 
GenerateCompareAndSet(CodeGeneratorARM64 * codegen,DataType::Type type,std::memory_order order,bool strong,vixl::aarch64::Label * cmp_failure,Register ptr,Register new_value,Register old_value,Register store_result,Register expected,Register expected2=Register ())1065 static void GenerateCompareAndSet(CodeGeneratorARM64* codegen,
1066                                   DataType::Type type,
1067                                   std::memory_order order,
1068                                   bool strong,
1069                                   vixl::aarch64::Label* cmp_failure,
1070                                   Register ptr,
1071                                   Register new_value,
1072                                   Register old_value,
1073                                   Register store_result,
1074                                   Register expected,
1075                                   Register expected2 = Register()) {
1076   // The `expected2` is valid only for reference slow path and represents the unmarked old value
1077   // from the main path attempt to emit CAS when the marked old value matched `expected`.
1078   DCHECK(type == DataType::Type::kReference || !expected2.IsValid());
1079 
1080   DCHECK(ptr.IsX());
1081   DCHECK_EQ(new_value.IsX(), type == DataType::Type::kInt64);
1082   DCHECK_EQ(old_value.IsX(), type == DataType::Type::kInt64);
1083   DCHECK(store_result.IsW());
1084   DCHECK_EQ(expected.IsX(), type == DataType::Type::kInt64);
1085   DCHECK(!expected2.IsValid() || expected2.IsW());
1086 
1087   Arm64Assembler* assembler = codegen->GetAssembler();
1088   MacroAssembler* masm = assembler->GetVIXLAssembler();
1089 
1090   bool use_load_acquire =
1091       (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1092   bool use_store_release =
1093       (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1094   DCHECK(use_load_acquire || use_store_release || order == std::memory_order_relaxed);
1095 
1096   // repeat: {
1097   //   old_value = [ptr];  // Load exclusive.
1098   //   if (old_value != expected && old_value != expected2) goto cmp_failure;
1099   //   store_result = failed([ptr] <- new_value);  // Store exclusive.
1100   // }
1101   // if (strong) {
1102   //   if (store_result) goto repeat;  // Repeat until compare fails or store exclusive succeeds.
1103   // } else {
1104   //   store_result = store_result ^ 1;  // Report success as 1, failure as 0.
1105   // }
1106   //
1107   // Flag Z indicates whether `old_value == expected || old_value == expected2`.
1108   // (Is `expected2` is not valid, the `old_value == expected2` part is not emitted.)
1109 
1110   vixl::aarch64::Label loop_head;
1111   if (strong) {
1112     __ Bind(&loop_head);
1113   }
1114   EmitLoadExclusive(codegen, type, ptr, old_value, use_load_acquire);
1115   __ Cmp(old_value, expected);
1116   if (expected2.IsValid()) {
1117     __ Ccmp(old_value, expected2, ZFlag, ne);
1118   }
1119   // If the comparison failed, the Z flag is cleared as we branch to the `cmp_failure` label.
1120   // If the comparison succeeded, the Z flag is set and remains set after the end of the
1121   // code emitted here, unless we retry the whole operation.
1122   __ B(cmp_failure, ne);
1123   EmitStoreExclusive(codegen, type, ptr, store_result, new_value, use_store_release);
1124   if (strong) {
1125     __ Cbnz(store_result, &loop_head);
1126   } else {
1127     // Flip the `store_result` register to indicate success by 1 and failure by 0.
1128     __ Eor(store_result, store_result, 1);
1129   }
1130 }
1131 
1132 class ReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
1133  public:
ReadBarrierCasSlowPathARM64(HInvoke * invoke,std::memory_order order,bool strong,Register base,Register offset,Register expected,Register new_value,Register old_value,Register old_value_temp,Register store_result,bool update_old_value,CodeGeneratorARM64 * arm64_codegen)1134   ReadBarrierCasSlowPathARM64(HInvoke* invoke,
1135                               std::memory_order order,
1136                               bool strong,
1137                               Register base,
1138                               Register offset,
1139                               Register expected,
1140                               Register new_value,
1141                               Register old_value,
1142                               Register old_value_temp,
1143                               Register store_result,
1144                               bool update_old_value,
1145                               CodeGeneratorARM64* arm64_codegen)
1146       : SlowPathCodeARM64(invoke),
1147         order_(order),
1148         strong_(strong),
1149         base_(base),
1150         offset_(offset),
1151         expected_(expected),
1152         new_value_(new_value),
1153         old_value_(old_value),
1154         old_value_temp_(old_value_temp),
1155         store_result_(store_result),
1156         update_old_value_(update_old_value),
1157         mark_old_value_slow_path_(nullptr),
1158         update_old_value_slow_path_(nullptr) {
1159     if (!kUseBakerReadBarrier) {
1160       // We need to add the slow path now, it is too late when emitting slow path code.
1161       mark_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1162           invoke,
1163           Location::RegisterLocation(old_value_temp.GetCode()),
1164           Location::RegisterLocation(old_value.GetCode()),
1165           Location::RegisterLocation(base.GetCode()),
1166           /*offset=*/ 0u,
1167           /*index=*/ Location::RegisterLocation(offset.GetCode()));
1168       if (update_old_value_) {
1169         update_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1170             invoke,
1171             Location::RegisterLocation(old_value.GetCode()),
1172             Location::RegisterLocation(old_value_temp.GetCode()),
1173             Location::RegisterLocation(base.GetCode()),
1174             /*offset=*/ 0u,
1175             /*index=*/ Location::RegisterLocation(offset.GetCode()));
1176       }
1177     }
1178   }
1179 
GetDescription() const1180   const char* GetDescription() const override { return "ReadBarrierCasSlowPathARM64"; }
1181 
EmitNativeCode(CodeGenerator * codegen)1182   void EmitNativeCode(CodeGenerator* codegen) override {
1183     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1184     Arm64Assembler* assembler = arm64_codegen->GetAssembler();
1185     MacroAssembler* masm = assembler->GetVIXLAssembler();
1186     __ Bind(GetEntryLabel());
1187 
1188     // Mark the `old_value_` from the main path and compare with `expected_`.
1189     if (kUseBakerReadBarrier) {
1190       DCHECK(mark_old_value_slow_path_ == nullptr);
1191       arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_);
1192     } else {
1193       DCHECK(mark_old_value_slow_path_ != nullptr);
1194       __ B(mark_old_value_slow_path_->GetEntryLabel());
1195       __ Bind(mark_old_value_slow_path_->GetExitLabel());
1196     }
1197     __ Cmp(old_value_temp_, expected_);
1198     if (update_old_value_) {
1199       // Update the old value if we're going to return from the slow path.
1200       __ Csel(old_value_, old_value_temp_, old_value_, ne);
1201     }
1202     __ B(GetExitLabel(), ne);  // If taken, Z=false indicates failure.
1203 
1204     // The `old_value` we have read did not match `expected` (which is always a to-space
1205     // reference) but after the read barrier the marked to-space value matched, so the
1206     // `old_value` must be a from-space reference to the same object. Do the same CAS loop
1207     // as the main path but check for both `expected` and the unmarked old value
1208     // representing the to-space and from-space references for the same object.
1209 
1210     UseScratchRegisterScope temps(masm);
1211     DCHECK(!store_result_.IsValid() || !temps.IsAvailable(store_result_));
1212     Register tmp_ptr = temps.AcquireX();
1213     Register store_result = store_result_.IsValid() ? store_result_ : temps.AcquireW();
1214 
1215     // Recalculate the `tmp_ptr` from main path clobbered by the read barrier above.
1216     __ Add(tmp_ptr, base_.X(), Operand(offset_));
1217 
1218     vixl::aarch64::Label mark_old_value;
1219     GenerateCompareAndSet(arm64_codegen,
1220                           DataType::Type::kReference,
1221                           order_,
1222                           strong_,
1223                           /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
1224                           tmp_ptr,
1225                           new_value_,
1226                           /*old_value=*/ old_value_temp_,
1227                           store_result,
1228                           expected_,
1229                           /*expected2=*/ old_value_);
1230     if (update_old_value_) {
1231       // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
1232       // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
1233       __ Mov(old_value_, expected_);
1234     }
1235 
1236     // Z=true from the CMP+CCMP in GenerateCompareAndSet() above indicates comparison success.
1237     // For strong CAS, that's the overall success. For weak CAS, the code also needs
1238     // to check the `store_result` after returning from the slow path.
1239     __ B(GetExitLabel());
1240 
1241     if (update_old_value_) {
1242       __ Bind(&mark_old_value);
1243       if (kUseBakerReadBarrier) {
1244         DCHECK(update_old_value_slow_path_ == nullptr);
1245         arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_);
1246       } else {
1247         // Note: We could redirect the `failure` above directly to the entry label and bind
1248         // the exit label in the main path, but the main path would need to access the
1249         // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
1250         DCHECK(update_old_value_slow_path_ != nullptr);
1251         __ B(update_old_value_slow_path_->GetEntryLabel());
1252         __ Bind(update_old_value_slow_path_->GetExitLabel());
1253       }
1254       __ B(GetExitLabel());
1255     }
1256   }
1257 
1258  private:
1259   std::memory_order order_;
1260   bool strong_;
1261   Register base_;
1262   Register offset_;
1263   Register expected_;
1264   Register new_value_;
1265   Register old_value_;
1266   Register old_value_temp_;
1267   Register store_result_;
1268   bool update_old_value_;
1269   SlowPathCodeARM64* mark_old_value_slow_path_;
1270   SlowPathCodeARM64* update_old_value_slow_path_;
1271 };
1272 
GenUnsafeCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARM64 * codegen)1273 static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
1274   MacroAssembler* masm = codegen->GetVIXLAssembler();
1275   LocationSummary* locations = invoke->GetLocations();
1276 
1277   Register out = WRegisterFrom(locations->Out());                 // Boolean result.
1278   Register base = WRegisterFrom(locations->InAt(1));              // Object pointer.
1279   Register offset = XRegisterFrom(locations->InAt(2));            // Long offset.
1280   Register expected = RegisterFrom(locations->InAt(3), type);     // Expected.
1281   Register new_value = RegisterFrom(locations->InAt(4), type);    // New value.
1282 
1283   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1284   if (type == DataType::Type::kReference) {
1285     // Mark card for object assuming new value is stored.
1286     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
1287     codegen->MarkGCCard(base, new_value, new_value_can_be_null);
1288   }
1289 
1290   UseScratchRegisterScope temps(masm);
1291   Register tmp_ptr = temps.AcquireX();                             // Pointer to actual memory.
1292   Register old_value;                                              // Value in memory.
1293 
1294   vixl::aarch64::Label exit_loop_label;
1295   vixl::aarch64::Label* exit_loop = &exit_loop_label;
1296   vixl::aarch64::Label* cmp_failure = &exit_loop_label;
1297 
1298   if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
1299     // We need to store the `old_value` in a non-scratch register to make sure
1300     // the read barrier in the slow path does not clobber it.
1301     old_value = WRegisterFrom(locations->GetTemp(0));  // The old value from main path.
1302     // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
1303     // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
1304     Register old_value_temp = WRegisterFrom(locations->GetTemp(1));
1305     ReadBarrierCasSlowPathARM64* slow_path =
1306         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
1307             invoke,
1308             std::memory_order_seq_cst,
1309             /*strong=*/ true,
1310             base,
1311             offset,
1312             expected,
1313             new_value,
1314             old_value,
1315             old_value_temp,
1316             /*store_result=*/ Register(),  // Use a scratch register.
1317             /*update_old_value=*/ false,
1318             codegen);
1319     codegen->AddSlowPath(slow_path);
1320     exit_loop = slow_path->GetExitLabel();
1321     cmp_failure = slow_path->GetEntryLabel();
1322   } else {
1323     old_value = temps.AcquireSameSizeAs(new_value);
1324   }
1325 
1326   __ Add(tmp_ptr, base.X(), Operand(offset));
1327 
1328   GenerateCompareAndSet(codegen,
1329                         type,
1330                         std::memory_order_seq_cst,
1331                         /*strong=*/ true,
1332                         cmp_failure,
1333                         tmp_ptr,
1334                         new_value,
1335                         old_value,
1336                         /*store_result=*/ old_value.W(),  // Reuse `old_value` for ST*XR* result.
1337                         expected);
1338   __ Bind(exit_loop);
1339   __ Cset(out, eq);
1340 }
1341 
VisitUnsafeCASInt(HInvoke * invoke)1342 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1343   CreateUnsafeCASLocations(allocator_, invoke);
1344 }
VisitUnsafeCASLong(HInvoke * invoke)1345 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1346   CreateUnsafeCASLocations(allocator_, invoke);
1347 }
VisitUnsafeCASObject(HInvoke * invoke)1348 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1349   // The only read barrier implementation supporting the
1350   // UnsafeCASObject intrinsic is the Baker-style read barriers. b/173104084
1351   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1352     return;
1353   }
1354 
1355   CreateUnsafeCASLocations(allocator_, invoke);
1356   if (kEmitCompilerReadBarrier) {
1357     // We need two non-scratch temporary registers for read barrier.
1358     LocationSummary* locations = invoke->GetLocations();
1359     if (kUseBakerReadBarrier) {
1360       locations->AddTemp(Location::RequiresRegister());
1361       locations->AddTemp(Location::RequiresRegister());
1362     } else {
1363       // To preserve the old value across the non-Baker read barrier
1364       // slow path, use a fixed callee-save register.
1365       constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
1366       locations->AddTemp(Location::RegisterLocation(first_callee_save));
1367       // To reduce the number of moves, request x0 as the second temporary.
1368       DCHECK(InvokeRuntimeCallingConvention().GetReturnLocation(DataType::Type::kReference).Equals(
1369                  Location::RegisterLocation(x0.GetCode())));
1370       locations->AddTemp(Location::RegisterLocation(x0.GetCode()));
1371     }
1372   }
1373 }
1374 
VisitUnsafeCASInt(HInvoke * invoke)1375 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1376   GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
1377 }
VisitUnsafeCASLong(HInvoke * invoke)1378 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1379   GenUnsafeCas(invoke, DataType::Type::kInt64, codegen_);
1380 }
VisitUnsafeCASObject(HInvoke * invoke)1381 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1382   GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
1383 }
1384 
1385 enum class GetAndUpdateOp {
1386   kSet,
1387   kAdd,
1388   kAddWithByteSwap,
1389   kAnd,
1390   kOr,
1391   kXor
1392 };
1393 
GenerateGetAndUpdate(CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op,DataType::Type load_store_type,std::memory_order order,Register ptr,CPURegister arg,CPURegister old_value)1394 static void GenerateGetAndUpdate(CodeGeneratorARM64* codegen,
1395                                  GetAndUpdateOp get_and_update_op,
1396                                  DataType::Type load_store_type,
1397                                  std::memory_order order,
1398                                  Register ptr,
1399                                  CPURegister arg,
1400                                  CPURegister old_value) {
1401   MacroAssembler* masm = codegen->GetVIXLAssembler();
1402   UseScratchRegisterScope temps(masm);
1403   Register store_result = temps.AcquireW();
1404 
1405   DCHECK_EQ(old_value.GetSizeInBits(), arg.GetSizeInBits());
1406   Register old_value_reg;
1407   Register new_value;
1408   switch (get_and_update_op) {
1409     case GetAndUpdateOp::kSet:
1410       old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1411       new_value = arg.IsX() ? arg.X() : arg.W();
1412       break;
1413     case GetAndUpdateOp::kAddWithByteSwap:
1414     case GetAndUpdateOp::kAdd:
1415       if (arg.IsVRegister()) {
1416         old_value_reg = arg.IsD() ? temps.AcquireX() : temps.AcquireW();
1417         new_value = old_value_reg;  // Use the same temporary.
1418         break;
1419       }
1420       FALLTHROUGH_INTENDED;
1421     case GetAndUpdateOp::kAnd:
1422     case GetAndUpdateOp::kOr:
1423     case GetAndUpdateOp::kXor:
1424       old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1425       new_value = old_value.IsX() ? temps.AcquireX() : temps.AcquireW();
1426       break;
1427   }
1428 
1429   bool use_load_acquire =
1430       (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1431   bool use_store_release =
1432       (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1433   DCHECK(use_load_acquire || use_store_release);
1434 
1435   vixl::aarch64::Label loop_label;
1436   __ Bind(&loop_label);
1437   EmitLoadExclusive(codegen, load_store_type, ptr, old_value_reg, use_load_acquire);
1438   switch (get_and_update_op) {
1439     case GetAndUpdateOp::kSet:
1440       break;
1441     case GetAndUpdateOp::kAddWithByteSwap:
1442       // To avoid unnecessary sign extension before REV16, the caller must specify `kUint16`
1443       // instead of `kInt16` and do the sign-extension explicitly afterwards.
1444       DCHECK_NE(load_store_type, DataType::Type::kInt16);
1445       GenerateReverseBytes(masm, load_store_type, old_value_reg, old_value_reg);
1446       FALLTHROUGH_INTENDED;
1447     case GetAndUpdateOp::kAdd:
1448       if (arg.IsVRegister()) {
1449         VRegister old_value_vreg = old_value.IsD() ? old_value.D() : old_value.S();
1450         VRegister sum = temps.AcquireSameSizeAs(old_value_vreg);
1451         __ Fmov(old_value_vreg, old_value_reg);
1452         __ Fadd(sum, old_value_vreg, arg.IsD() ? arg.D() : arg.S());
1453         __ Fmov(new_value, sum);
1454       } else {
1455         __ Add(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1456       }
1457       if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
1458         GenerateReverseBytes(masm, load_store_type, new_value, new_value);
1459       }
1460       break;
1461     case GetAndUpdateOp::kAnd:
1462       __ And(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1463       break;
1464     case GetAndUpdateOp::kOr:
1465       __ Orr(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1466       break;
1467     case GetAndUpdateOp::kXor:
1468       __ Eor(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1469       break;
1470   }
1471   EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value, use_store_release);
1472   __ Cbnz(store_result, &loop_label);
1473 }
1474 
VisitStringCompareTo(HInvoke * invoke)1475 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
1476   LocationSummary* locations =
1477       new (allocator_) LocationSummary(invoke,
1478                                        invoke->InputAt(1)->CanBeNull()
1479                                            ? LocationSummary::kCallOnSlowPath
1480                                            : LocationSummary::kNoCall,
1481                                        kIntrinsified);
1482   locations->SetInAt(0, Location::RequiresRegister());
1483   locations->SetInAt(1, Location::RequiresRegister());
1484   locations->AddTemp(Location::RequiresRegister());
1485   locations->AddTemp(Location::RequiresRegister());
1486   locations->AddTemp(Location::RequiresRegister());
1487   // Need temporary registers for String compression's feature.
1488   if (mirror::kUseStringCompression) {
1489     locations->AddTemp(Location::RequiresRegister());
1490   }
1491   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1492 }
1493 
VisitStringCompareTo(HInvoke * invoke)1494 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
1495   MacroAssembler* masm = GetVIXLAssembler();
1496   LocationSummary* locations = invoke->GetLocations();
1497 
1498   Register str = InputRegisterAt(invoke, 0);
1499   Register arg = InputRegisterAt(invoke, 1);
1500   DCHECK(str.IsW());
1501   DCHECK(arg.IsW());
1502   Register out = OutputRegister(invoke);
1503 
1504   Register temp0 = WRegisterFrom(locations->GetTemp(0));
1505   Register temp1 = WRegisterFrom(locations->GetTemp(1));
1506   Register temp2 = WRegisterFrom(locations->GetTemp(2));
1507   Register temp3;
1508   if (mirror::kUseStringCompression) {
1509     temp3 = WRegisterFrom(locations->GetTemp(3));
1510   }
1511 
1512   vixl::aarch64::Label loop;
1513   vixl::aarch64::Label find_char_diff;
1514   vixl::aarch64::Label end;
1515   vixl::aarch64::Label different_compression;
1516 
1517   // Get offsets of count and value fields within a string object.
1518   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1519   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1520 
1521   // Note that the null check must have been done earlier.
1522   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1523 
1524   // Take slow path and throw if input can be and is null.
1525   SlowPathCodeARM64* slow_path = nullptr;
1526   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1527   if (can_slow_path) {
1528     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1529     codegen_->AddSlowPath(slow_path);
1530     __ Cbz(arg, slow_path->GetEntryLabel());
1531   }
1532 
1533   // Reference equality check, return 0 if same reference.
1534   __ Subs(out, str, arg);
1535   __ B(&end, eq);
1536 
1537   if (mirror::kUseStringCompression) {
1538     // Load `count` fields of this and argument strings.
1539     __ Ldr(temp3, HeapOperand(str, count_offset));
1540     __ Ldr(temp2, HeapOperand(arg, count_offset));
1541     // Clean out compression flag from lengths.
1542     __ Lsr(temp0, temp3, 1u);
1543     __ Lsr(temp1, temp2, 1u);
1544   } else {
1545     // Load lengths of this and argument strings.
1546     __ Ldr(temp0, HeapOperand(str, count_offset));
1547     __ Ldr(temp1, HeapOperand(arg, count_offset));
1548   }
1549   // out = length diff.
1550   __ Subs(out, temp0, temp1);
1551   // temp0 = min(len(str), len(arg)).
1552   __ Csel(temp0, temp1, temp0, ge);
1553   // Shorter string is empty?
1554   __ Cbz(temp0, &end);
1555 
1556   if (mirror::kUseStringCompression) {
1557     // Check if both strings using same compression style to use this comparison loop.
1558     __ Eor(temp2, temp2, Operand(temp3));
1559     // Interleave with compression flag extraction which is needed for both paths
1560     // and also set flags which is needed only for the different compressions path.
1561     __ Ands(temp3.W(), temp3.W(), Operand(1));
1562     __ Tbnz(temp2, 0, &different_compression);  // Does not use flags.
1563   }
1564   // Store offset of string value in preparation for comparison loop.
1565   __ Mov(temp1, value_offset);
1566   if (mirror::kUseStringCompression) {
1567     // For string compression, calculate the number of bytes to compare (not chars).
1568     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1569     __ Lsl(temp0, temp0, temp3);
1570   }
1571 
1572   UseScratchRegisterScope scratch_scope(masm);
1573   Register temp4 = scratch_scope.AcquireX();
1574 
1575   // Assertions that must hold in order to compare strings 8 bytes at a time.
1576   DCHECK_ALIGNED(value_offset, 8);
1577   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1578 
1579   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1580   DCHECK_EQ(char_size, 2u);
1581 
1582   // Promote temp2 to an X reg, ready for LDR.
1583   temp2 = temp2.X();
1584 
1585   // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
1586   __ Bind(&loop);
1587   __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
1588   __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
1589   __ Cmp(temp4, temp2);
1590   __ B(ne, &find_char_diff);
1591   __ Add(temp1, temp1, char_size * 4);
1592   // With string compression, we have compared 8 bytes, otherwise 4 chars.
1593   __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
1594   __ B(&loop, hi);
1595   __ B(&end);
1596 
1597   // Promote temp1 to an X reg, ready for EOR.
1598   temp1 = temp1.X();
1599 
1600   // Find the single character difference.
1601   __ Bind(&find_char_diff);
1602   // Get the bit position of the first character that differs.
1603   __ Eor(temp1, temp2, temp4);
1604   __ Rbit(temp1, temp1);
1605   __ Clz(temp1, temp1);
1606 
1607   // If the number of chars remaining <= the index where the difference occurs (0-3), then
1608   // the difference occurs outside the remaining string data, so just return length diff (out).
1609   // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
1610   // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
1611   // unsigned when string compression is disabled.
1612   // When it's enabled, the comparison must be unsigned.
1613   __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
1614   __ B(ls, &end);
1615 
1616   // Extract the characters and calculate the difference.
1617   if (mirror:: kUseStringCompression) {
1618     __ Bic(temp1, temp1, 0x7);
1619     __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
1620   } else {
1621     __ Bic(temp1, temp1, 0xf);
1622   }
1623   __ Lsr(temp2, temp2, temp1);
1624   __ Lsr(temp4, temp4, temp1);
1625   if (mirror::kUseStringCompression) {
1626     // Prioritize the case of compressed strings and calculate such result first.
1627     __ Uxtb(temp1, temp4);
1628     __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
1629     __ Tbz(temp3, 0u, &end);  // If actually compressed, we're done.
1630   }
1631   __ Uxth(temp4, temp4);
1632   __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
1633 
1634   if (mirror::kUseStringCompression) {
1635     __ B(&end);
1636     __ Bind(&different_compression);
1637 
1638     // Comparison for different compression style.
1639     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1640     DCHECK_EQ(c_char_size, 1u);
1641     temp1 = temp1.W();
1642     temp2 = temp2.W();
1643     temp4 = temp4.W();
1644 
1645     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1646     // Note that flags have been set by the `str` compression flag extraction to `temp3`
1647     // before branching to the `different_compression` label.
1648     __ Csel(temp1, str, arg, eq);   // Pointer to the compressed string.
1649     __ Csel(temp2, str, arg, ne);   // Pointer to the uncompressed string.
1650 
1651     // We want to free up the temp3, currently holding `str` compression flag, for comparison.
1652     // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
1653     // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
1654     // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1655     __ Lsl(temp0, temp0, 1u);
1656 
1657     // Adjust temp1 and temp2 from string pointers to data pointers.
1658     __ Add(temp1, temp1, Operand(value_offset));
1659     __ Add(temp2, temp2, Operand(value_offset));
1660 
1661     // Complete the move of the compression flag.
1662     __ Sub(temp0, temp0, Operand(temp3));
1663 
1664     vixl::aarch64::Label different_compression_loop;
1665     vixl::aarch64::Label different_compression_diff;
1666 
1667     __ Bind(&different_compression_loop);
1668     __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
1669     __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
1670     __ Subs(temp4, temp4, Operand(temp3));
1671     __ B(&different_compression_diff, ne);
1672     __ Subs(temp0, temp0, 2);
1673     __ B(&different_compression_loop, hi);
1674     __ B(&end);
1675 
1676     // Calculate the difference.
1677     __ Bind(&different_compression_diff);
1678     __ Tst(temp0, Operand(1));
1679     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1680                   "Expecting 0=compressed, 1=uncompressed");
1681     __ Cneg(out, temp4, ne);
1682   }
1683 
1684   __ Bind(&end);
1685 
1686   if (can_slow_path) {
1687     __ Bind(slow_path->GetExitLabel());
1688   }
1689 }
1690 
1691 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1692 // The normal loop plus the pre-header is 9 instructions without string compression and 12
1693 // instructions with string compression. We can compare up to 8 bytes in 4 instructions
1694 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
1695 // to 10 instructions for the unrolled loop.
1696 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
1697 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1698 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1699   if (candidate->IsLoadString()) {
1700     HLoadString* load_string = candidate->AsLoadString();
1701     const DexFile& dex_file = load_string->GetDexFile();
1702     return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1703   }
1704   return nullptr;
1705 }
1706 
VisitStringEquals(HInvoke * invoke)1707 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
1708   LocationSummary* locations =
1709       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1710   locations->SetInAt(0, Location::RequiresRegister());
1711   locations->SetInAt(1, Location::RequiresRegister());
1712 
1713   // For the generic implementation and for long const strings we need a temporary.
1714   // We do not need it for short const strings, up to 8 bytes, see code generation below.
1715   uint32_t const_string_length = 0u;
1716   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1717   if (const_string == nullptr) {
1718     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1719   }
1720   bool is_compressed =
1721       mirror::kUseStringCompression &&
1722       const_string != nullptr &&
1723       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1724   if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
1725     locations->AddTemp(Location::RequiresRegister());
1726   }
1727 
1728   // TODO: If the String.equals() is used only for an immediately following HIf, we can
1729   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1730   // Then we shall need an extra temporary register instead of the output register.
1731   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1732 }
1733 
VisitStringEquals(HInvoke * invoke)1734 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
1735   MacroAssembler* masm = GetVIXLAssembler();
1736   LocationSummary* locations = invoke->GetLocations();
1737 
1738   Register str = WRegisterFrom(locations->InAt(0));
1739   Register arg = WRegisterFrom(locations->InAt(1));
1740   Register out = XRegisterFrom(locations->Out());
1741 
1742   UseScratchRegisterScope scratch_scope(masm);
1743   Register temp = scratch_scope.AcquireW();
1744   Register temp1 = scratch_scope.AcquireW();
1745 
1746   vixl::aarch64::Label loop;
1747   vixl::aarch64::Label end;
1748   vixl::aarch64::Label return_true;
1749   vixl::aarch64::Label return_false;
1750 
1751   // Get offsets of count, value, and class fields within a string object.
1752   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1753   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1754   const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1755 
1756   // Note that the null check must have been done earlier.
1757   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1758 
1759   StringEqualsOptimizations optimizations(invoke);
1760   if (!optimizations.GetArgumentNotNull()) {
1761     // Check if input is null, return false if it is.
1762     __ Cbz(arg, &return_false);
1763   }
1764 
1765   // Reference equality check, return true if same reference.
1766   __ Cmp(str, arg);
1767   __ B(&return_true, eq);
1768 
1769   if (!optimizations.GetArgumentIsString()) {
1770     // Instanceof check for the argument by comparing class fields.
1771     // All string objects must have the same type since String cannot be subclassed.
1772     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1773     // If the argument is a string object, its class field must be equal to receiver's class field.
1774     //
1775     // As the String class is expected to be non-movable, we can read the class
1776     // field from String.equals' arguments without read barriers.
1777     AssertNonMovableStringClass();
1778     // /* HeapReference<Class> */ temp = str->klass_
1779     __ Ldr(temp, MemOperand(str.X(), class_offset));
1780     // /* HeapReference<Class> */ temp1 = arg->klass_
1781     __ Ldr(temp1, MemOperand(arg.X(), class_offset));
1782     // Also, because we use the previously loaded class references only in the
1783     // following comparison, we don't need to unpoison them.
1784     __ Cmp(temp, temp1);
1785     __ B(&return_false, ne);
1786   }
1787 
1788   // Check if one of the inputs is a const string. Do not special-case both strings
1789   // being const, such cases should be handled by constant folding if needed.
1790   uint32_t const_string_length = 0u;
1791   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1792   if (const_string == nullptr) {
1793     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1794     if (const_string != nullptr) {
1795       std::swap(str, arg);  // Make sure the const string is in `str`.
1796     }
1797   }
1798   bool is_compressed =
1799       mirror::kUseStringCompression &&
1800       const_string != nullptr &&
1801       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1802 
1803   if (const_string != nullptr) {
1804     // Load `count` field of the argument string and check if it matches the const string.
1805     // Also compares the compression style, if differs return false.
1806     __ Ldr(temp, MemOperand(arg.X(), count_offset));
1807     // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
1808     scratch_scope.Release(temp1);
1809     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1810     temp1 = scratch_scope.AcquireW();
1811     __ B(&return_false, ne);
1812   } else {
1813     // Load `count` fields of this and argument strings.
1814     __ Ldr(temp, MemOperand(str.X(), count_offset));
1815     __ Ldr(temp1, MemOperand(arg.X(), count_offset));
1816     // Check if `count` fields are equal, return false if they're not.
1817     // Also compares the compression style, if differs return false.
1818     __ Cmp(temp, temp1);
1819     __ B(&return_false, ne);
1820   }
1821 
1822   // Assertions that must hold in order to compare strings 8 bytes at a time.
1823   // Ok to do this because strings are zero-padded to kObjectAlignment.
1824   DCHECK_ALIGNED(value_offset, 8);
1825   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1826 
1827   if (const_string != nullptr &&
1828       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1829                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
1830     // Load and compare the contents. Though we know the contents of the short const string
1831     // at compile time, materializing constants may be more code than loading from memory.
1832     int32_t offset = value_offset;
1833     size_t remaining_bytes =
1834         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
1835     temp = temp.X();
1836     temp1 = temp1.X();
1837     while (remaining_bytes > sizeof(uint64_t)) {
1838       Register temp2 = XRegisterFrom(locations->GetTemp(0));
1839       __ Ldp(temp, temp1, MemOperand(str.X(), offset));
1840       __ Ldp(temp2, out, MemOperand(arg.X(), offset));
1841       __ Cmp(temp, temp2);
1842       __ Ccmp(temp1, out, NoFlag, eq);
1843       __ B(&return_false, ne);
1844       offset += 2u * sizeof(uint64_t);
1845       remaining_bytes -= 2u * sizeof(uint64_t);
1846     }
1847     if (remaining_bytes != 0u) {
1848       __ Ldr(temp, MemOperand(str.X(), offset));
1849       __ Ldr(temp1, MemOperand(arg.X(), offset));
1850       __ Cmp(temp, temp1);
1851       __ B(&return_false, ne);
1852     }
1853   } else {
1854     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1855     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1856                   "Expecting 0=compressed, 1=uncompressed");
1857     __ Cbz(temp, &return_true);
1858 
1859     if (mirror::kUseStringCompression) {
1860       // For string compression, calculate the number of bytes to compare (not chars).
1861       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1862       __ And(temp1, temp, Operand(1));    // Extract compression flag.
1863       __ Lsr(temp, temp, 1u);             // Extract length.
1864       __ Lsl(temp, temp, temp1);          // Calculate number of bytes to compare.
1865     }
1866 
1867     // Store offset of string value in preparation for comparison loop
1868     __ Mov(temp1, value_offset);
1869 
1870     temp1 = temp1.X();
1871     Register temp2 = XRegisterFrom(locations->GetTemp(0));
1872     // Loop to compare strings 8 bytes at a time starting at the front of the string.
1873     __ Bind(&loop);
1874     __ Ldr(out, MemOperand(str.X(), temp1));
1875     __ Ldr(temp2, MemOperand(arg.X(), temp1));
1876     __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
1877     __ Cmp(out, temp2);
1878     __ B(&return_false, ne);
1879     // With string compression, we have compared 8 bytes, otherwise 4 chars.
1880     __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
1881     __ B(&loop, hi);
1882   }
1883 
1884   // Return true and exit the function.
1885   // If loop does not result in returning false, we return true.
1886   __ Bind(&return_true);
1887   __ Mov(out, 1);
1888   __ B(&end);
1889 
1890   // Return false and exit the function.
1891   __ Bind(&return_false);
1892   __ Mov(out, 0);
1893   __ Bind(&end);
1894 }
1895 
GenerateVisitStringIndexOf(HInvoke * invoke,MacroAssembler * masm,CodeGeneratorARM64 * codegen,bool start_at_zero)1896 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1897                                        MacroAssembler* masm,
1898                                        CodeGeneratorARM64* codegen,
1899                                        bool start_at_zero) {
1900   LocationSummary* locations = invoke->GetLocations();
1901 
1902   // Note that the null check must have been done earlier.
1903   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1904 
1905   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1906   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1907   SlowPathCodeARM64* slow_path = nullptr;
1908   HInstruction* code_point = invoke->InputAt(1);
1909   if (code_point->IsIntConstant()) {
1910     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
1911       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1912       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1913       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1914       codegen->AddSlowPath(slow_path);
1915       __ B(slow_path->GetEntryLabel());
1916       __ Bind(slow_path->GetExitLabel());
1917       return;
1918     }
1919   } else if (code_point->GetType() != DataType::Type::kUint16) {
1920     Register char_reg = WRegisterFrom(locations->InAt(1));
1921     __ Tst(char_reg, 0xFFFF0000);
1922     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1923     codegen->AddSlowPath(slow_path);
1924     __ B(ne, slow_path->GetEntryLabel());
1925   }
1926 
1927   if (start_at_zero) {
1928     // Start-index = 0.
1929     Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
1930     __ Mov(tmp_reg, 0);
1931   }
1932 
1933   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1934   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1935 
1936   if (slow_path != nullptr) {
1937     __ Bind(slow_path->GetExitLabel());
1938   }
1939 }
1940 
VisitStringIndexOf(HInvoke * invoke)1941 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
1942   LocationSummary* locations = new (allocator_) LocationSummary(
1943       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1944   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1945   // best to align the inputs accordingly.
1946   InvokeRuntimeCallingConvention calling_convention;
1947   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1948   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1949   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1950 
1951   // Need to send start_index=0.
1952   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1953 }
1954 
VisitStringIndexOf(HInvoke * invoke)1955 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
1956   GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true);
1957 }
1958 
VisitStringIndexOfAfter(HInvoke * invoke)1959 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1960   LocationSummary* locations = new (allocator_) LocationSummary(
1961       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1962   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1963   // best to align the inputs accordingly.
1964   InvokeRuntimeCallingConvention calling_convention;
1965   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1966   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1967   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1968   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1969 }
1970 
VisitStringIndexOfAfter(HInvoke * invoke)1971 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1972   GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false);
1973 }
1974 
VisitStringNewStringFromBytes(HInvoke * invoke)1975 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1976   LocationSummary* locations = new (allocator_) LocationSummary(
1977       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1978   InvokeRuntimeCallingConvention calling_convention;
1979   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1980   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1981   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1982   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1983   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1984 }
1985 
VisitStringNewStringFromBytes(HInvoke * invoke)1986 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1987   MacroAssembler* masm = GetVIXLAssembler();
1988   LocationSummary* locations = invoke->GetLocations();
1989 
1990   Register byte_array = WRegisterFrom(locations->InAt(0));
1991   __ Cmp(byte_array, 0);
1992   SlowPathCodeARM64* slow_path =
1993       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1994   codegen_->AddSlowPath(slow_path);
1995   __ B(eq, slow_path->GetEntryLabel());
1996 
1997   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1998   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1999   __ Bind(slow_path->GetExitLabel());
2000 }
2001 
VisitStringNewStringFromChars(HInvoke * invoke)2002 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
2003   LocationSummary* locations =
2004       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2005   InvokeRuntimeCallingConvention calling_convention;
2006   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2007   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2008   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2009   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
2010 }
2011 
VisitStringNewStringFromChars(HInvoke * invoke)2012 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
2013   // No need to emit code checking whether `locations->InAt(2)` is a null
2014   // pointer, as callers of the native method
2015   //
2016   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
2017   //
2018   // all include a null check on `data` before calling that method.
2019   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
2020   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
2021 }
2022 
VisitStringNewStringFromString(HInvoke * invoke)2023 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
2024   LocationSummary* locations = new (allocator_) LocationSummary(
2025       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2026   InvokeRuntimeCallingConvention calling_convention;
2027   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2028   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
2029 }
2030 
VisitStringNewStringFromString(HInvoke * invoke)2031 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
2032   MacroAssembler* masm = GetVIXLAssembler();
2033   LocationSummary* locations = invoke->GetLocations();
2034 
2035   Register string_to_copy = WRegisterFrom(locations->InAt(0));
2036   __ Cmp(string_to_copy, 0);
2037   SlowPathCodeARM64* slow_path =
2038       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2039   codegen_->AddSlowPath(slow_path);
2040   __ B(eq, slow_path->GetEntryLabel());
2041 
2042   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
2043   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
2044   __ Bind(slow_path->GetExitLabel());
2045 }
2046 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2047 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2048   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2049   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2050   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2051 
2052   LocationSummary* const locations =
2053       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2054   InvokeRuntimeCallingConvention calling_convention;
2055 
2056   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2057   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2058 }
2059 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2060 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2061   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2062   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2063   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
2064   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2065 
2066   LocationSummary* const locations =
2067       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2068   InvokeRuntimeCallingConvention calling_convention;
2069 
2070   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2071   locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
2072   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2073 }
2074 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorARM64 * codegen,QuickEntrypointEnum entry)2075 static void GenFPToFPCall(HInvoke* invoke,
2076                           CodeGeneratorARM64* codegen,
2077                           QuickEntrypointEnum entry) {
2078   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2079 }
2080 
VisitMathCos(HInvoke * invoke)2081 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
2082   CreateFPToFPCallLocations(allocator_, invoke);
2083 }
2084 
VisitMathCos(HInvoke * invoke)2085 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
2086   GenFPToFPCall(invoke, codegen_, kQuickCos);
2087 }
2088 
VisitMathSin(HInvoke * invoke)2089 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
2090   CreateFPToFPCallLocations(allocator_, invoke);
2091 }
2092 
VisitMathSin(HInvoke * invoke)2093 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
2094   GenFPToFPCall(invoke, codegen_, kQuickSin);
2095 }
2096 
VisitMathAcos(HInvoke * invoke)2097 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
2098   CreateFPToFPCallLocations(allocator_, invoke);
2099 }
2100 
VisitMathAcos(HInvoke * invoke)2101 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
2102   GenFPToFPCall(invoke, codegen_, kQuickAcos);
2103 }
2104 
VisitMathAsin(HInvoke * invoke)2105 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
2106   CreateFPToFPCallLocations(allocator_, invoke);
2107 }
2108 
VisitMathAsin(HInvoke * invoke)2109 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
2110   GenFPToFPCall(invoke, codegen_, kQuickAsin);
2111 }
2112 
VisitMathAtan(HInvoke * invoke)2113 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
2114   CreateFPToFPCallLocations(allocator_, invoke);
2115 }
2116 
VisitMathAtan(HInvoke * invoke)2117 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
2118   GenFPToFPCall(invoke, codegen_, kQuickAtan);
2119 }
2120 
VisitMathCbrt(HInvoke * invoke)2121 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
2122   CreateFPToFPCallLocations(allocator_, invoke);
2123 }
2124 
VisitMathCbrt(HInvoke * invoke)2125 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
2126   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
2127 }
2128 
VisitMathCosh(HInvoke * invoke)2129 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
2130   CreateFPToFPCallLocations(allocator_, invoke);
2131 }
2132 
VisitMathCosh(HInvoke * invoke)2133 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
2134   GenFPToFPCall(invoke, codegen_, kQuickCosh);
2135 }
2136 
VisitMathExp(HInvoke * invoke)2137 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
2138   CreateFPToFPCallLocations(allocator_, invoke);
2139 }
2140 
VisitMathExp(HInvoke * invoke)2141 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
2142   GenFPToFPCall(invoke, codegen_, kQuickExp);
2143 }
2144 
VisitMathExpm1(HInvoke * invoke)2145 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
2146   CreateFPToFPCallLocations(allocator_, invoke);
2147 }
2148 
VisitMathExpm1(HInvoke * invoke)2149 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
2150   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
2151 }
2152 
VisitMathLog(HInvoke * invoke)2153 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
2154   CreateFPToFPCallLocations(allocator_, invoke);
2155 }
2156 
VisitMathLog(HInvoke * invoke)2157 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
2158   GenFPToFPCall(invoke, codegen_, kQuickLog);
2159 }
2160 
VisitMathLog10(HInvoke * invoke)2161 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
2162   CreateFPToFPCallLocations(allocator_, invoke);
2163 }
2164 
VisitMathLog10(HInvoke * invoke)2165 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
2166   GenFPToFPCall(invoke, codegen_, kQuickLog10);
2167 }
2168 
VisitMathSinh(HInvoke * invoke)2169 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
2170   CreateFPToFPCallLocations(allocator_, invoke);
2171 }
2172 
VisitMathSinh(HInvoke * invoke)2173 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
2174   GenFPToFPCall(invoke, codegen_, kQuickSinh);
2175 }
2176 
VisitMathTan(HInvoke * invoke)2177 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
2178   CreateFPToFPCallLocations(allocator_, invoke);
2179 }
2180 
VisitMathTan(HInvoke * invoke)2181 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
2182   GenFPToFPCall(invoke, codegen_, kQuickTan);
2183 }
2184 
VisitMathTanh(HInvoke * invoke)2185 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
2186   CreateFPToFPCallLocations(allocator_, invoke);
2187 }
2188 
VisitMathTanh(HInvoke * invoke)2189 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
2190   GenFPToFPCall(invoke, codegen_, kQuickTanh);
2191 }
2192 
VisitMathAtan2(HInvoke * invoke)2193 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
2194   CreateFPFPToFPCallLocations(allocator_, invoke);
2195 }
2196 
VisitMathAtan2(HInvoke * invoke)2197 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
2198   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
2199 }
2200 
VisitMathPow(HInvoke * invoke)2201 void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
2202   CreateFPFPToFPCallLocations(allocator_, invoke);
2203 }
2204 
VisitMathPow(HInvoke * invoke)2205 void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
2206   GenFPToFPCall(invoke, codegen_, kQuickPow);
2207 }
2208 
VisitMathHypot(HInvoke * invoke)2209 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
2210   CreateFPFPToFPCallLocations(allocator_, invoke);
2211 }
2212 
VisitMathHypot(HInvoke * invoke)2213 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
2214   GenFPToFPCall(invoke, codegen_, kQuickHypot);
2215 }
2216 
VisitMathNextAfter(HInvoke * invoke)2217 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
2218   CreateFPFPToFPCallLocations(allocator_, invoke);
2219 }
2220 
VisitMathNextAfter(HInvoke * invoke)2221 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
2222   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
2223 }
2224 
VisitStringGetCharsNoCheck(HInvoke * invoke)2225 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2226   LocationSummary* locations =
2227       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2228   locations->SetInAt(0, Location::RequiresRegister());
2229   locations->SetInAt(1, Location::RequiresRegister());
2230   locations->SetInAt(2, Location::RequiresRegister());
2231   locations->SetInAt(3, Location::RequiresRegister());
2232   locations->SetInAt(4, Location::RequiresRegister());
2233 
2234   locations->AddTemp(Location::RequiresRegister());
2235   locations->AddTemp(Location::RequiresRegister());
2236   locations->AddTemp(Location::RequiresRegister());
2237 }
2238 
VisitStringGetCharsNoCheck(HInvoke * invoke)2239 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2240   MacroAssembler* masm = GetVIXLAssembler();
2241   LocationSummary* locations = invoke->GetLocations();
2242 
2243   // Check assumption that sizeof(Char) is 2 (used in scaling below).
2244   const size_t char_size = DataType::Size(DataType::Type::kUint16);
2245   DCHECK_EQ(char_size, 2u);
2246 
2247   // Location of data in char array buffer.
2248   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2249 
2250   // Location of char array data in string.
2251   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2252 
2253   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2254   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2255   Register srcObj = XRegisterFrom(locations->InAt(0));
2256   Register srcBegin = XRegisterFrom(locations->InAt(1));
2257   Register srcEnd = XRegisterFrom(locations->InAt(2));
2258   Register dstObj = XRegisterFrom(locations->InAt(3));
2259   Register dstBegin = XRegisterFrom(locations->InAt(4));
2260 
2261   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
2262   Register num_chr = XRegisterFrom(locations->GetTemp(1));
2263   Register tmp1 = XRegisterFrom(locations->GetTemp(2));
2264 
2265   UseScratchRegisterScope temps(masm);
2266   Register dst_ptr = temps.AcquireX();
2267   Register tmp2 = temps.AcquireX();
2268 
2269   vixl::aarch64::Label done;
2270   vixl::aarch64::Label compressed_string_vector_loop;
2271   vixl::aarch64::Label compressed_string_remainder;
2272   __ Sub(num_chr, srcEnd, srcBegin);
2273   // Early out for valid zero-length retrievals.
2274   __ Cbz(num_chr, &done);
2275 
2276   // dst address start to copy to.
2277   __ Add(dst_ptr, dstObj, Operand(data_offset));
2278   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
2279 
2280   // src address to copy from.
2281   __ Add(src_ptr, srcObj, Operand(value_offset));
2282   vixl::aarch64::Label compressed_string_preloop;
2283   if (mirror::kUseStringCompression) {
2284     // Location of count in string.
2285     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2286     // String's length.
2287     __ Ldr(tmp2, MemOperand(srcObj, count_offset));
2288     __ Tbz(tmp2, 0, &compressed_string_preloop);
2289   }
2290   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
2291 
2292   // Do the copy.
2293   vixl::aarch64::Label loop;
2294   vixl::aarch64::Label remainder;
2295 
2296   // Save repairing the value of num_chr on the < 8 character path.
2297   __ Subs(tmp1, num_chr, 8);
2298   __ B(lt, &remainder);
2299 
2300   // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2301   __ Mov(num_chr, tmp1);
2302 
2303   // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
2304   // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
2305   __ Bind(&loop);
2306   __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
2307   __ Subs(num_chr, num_chr, 8);
2308   __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
2309   __ B(ge, &loop);
2310 
2311   __ Adds(num_chr, num_chr, 8);
2312   __ B(eq, &done);
2313 
2314   // Main loop for < 8 character case and remainder handling. Loads and stores one
2315   // 16-bit Java character at a time.
2316   __ Bind(&remainder);
2317   __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
2318   __ Subs(num_chr, num_chr, 1);
2319   __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2320   __ B(gt, &remainder);
2321   __ B(&done);
2322 
2323   if (mirror::kUseStringCompression) {
2324     // For compressed strings, acquire a SIMD temporary register.
2325     VRegister vtmp1 = temps.AcquireVRegisterOfSize(kQRegSize);
2326     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2327     DCHECK_EQ(c_char_size, 1u);
2328     __ Bind(&compressed_string_preloop);
2329     __ Add(src_ptr, src_ptr, Operand(srcBegin));
2330 
2331     // Save repairing the value of num_chr on the < 8 character path.
2332     __ Subs(tmp1, num_chr, 8);
2333     __ B(lt, &compressed_string_remainder);
2334 
2335     // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2336     __ Mov(num_chr, tmp1);
2337 
2338     // Main loop for compressed src, copying 8 characters (8-bit) to (16-bit) at a time.
2339     // Uses SIMD instructions.
2340     __ Bind(&compressed_string_vector_loop);
2341     __ Ld1(vtmp1.V8B(), MemOperand(src_ptr, c_char_size * 8, PostIndex));
2342     __ Subs(num_chr, num_chr, 8);
2343     __ Uxtl(vtmp1.V8H(), vtmp1.V8B());
2344     __ St1(vtmp1.V8H(), MemOperand(dst_ptr, char_size * 8, PostIndex));
2345     __ B(ge, &compressed_string_vector_loop);
2346 
2347     __ Adds(num_chr, num_chr, 8);
2348     __ B(eq, &done);
2349 
2350     // Loop for < 8 character case and remainder handling with a compressed src.
2351     // Copies 1 character (8-bit) to (16-bit) at a time.
2352     __ Bind(&compressed_string_remainder);
2353     __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
2354     __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2355     __ Subs(num_chr, num_chr, Operand(1));
2356     __ B(gt, &compressed_string_remainder);
2357   }
2358 
2359   __ Bind(&done);
2360 }
2361 
2362 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
2363 // implementation there for longer copy lengths.
2364 static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
2365 
SetSystemArrayCopyLocationRequires(LocationSummary * locations,uint32_t at,HInstruction * input)2366 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
2367                                                uint32_t at,
2368                                                HInstruction* input) {
2369   HIntConstant* const_input = input->AsIntConstant();
2370   if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
2371     locations->SetInAt(at, Location::RequiresRegister());
2372   } else {
2373     locations->SetInAt(at, Location::RegisterOrConstant(input));
2374   }
2375 }
2376 
VisitSystemArrayCopyChar(HInvoke * invoke)2377 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2378   // Check to see if we have known failures that will cause us to have to bail out
2379   // to the runtime, and just generate the runtime call directly.
2380   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2381   HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
2382 
2383   // The positions must be non-negative.
2384   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2385       (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2386     // We will have to fail anyways.
2387     return;
2388   }
2389 
2390   // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2391   // native implementation.
2392   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2393   if (length != nullptr) {
2394     int32_t len = length->GetValue();
2395     if (len < 0 || len > kSystemArrayCopyCharThreshold) {
2396       // Just call as normal.
2397       return;
2398     }
2399   }
2400 
2401   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2402   LocationSummary* locations =
2403       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2404   // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2405   locations->SetInAt(0, Location::RequiresRegister());
2406   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2407   locations->SetInAt(2, Location::RequiresRegister());
2408   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2409   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2410 
2411   locations->AddTemp(Location::RequiresRegister());
2412   locations->AddTemp(Location::RequiresRegister());
2413   locations->AddTemp(Location::RequiresRegister());
2414 }
2415 
CheckSystemArrayCopyPosition(MacroAssembler * masm,const Location & pos,const Register & input,const Location & length,SlowPathCodeARM64 * slow_path,const Register & temp,bool length_is_input_length=false)2416 static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
2417                                          const Location& pos,
2418                                          const Register& input,
2419                                          const Location& length,
2420                                          SlowPathCodeARM64* slow_path,
2421                                          const Register& temp,
2422                                          bool length_is_input_length = false) {
2423   const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2424   if (pos.IsConstant()) {
2425     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2426     if (pos_const == 0) {
2427       if (!length_is_input_length) {
2428         // Check that length(input) >= length.
2429         __ Ldr(temp, MemOperand(input, length_offset));
2430         __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2431         __ B(slow_path->GetEntryLabel(), lt);
2432       }
2433     } else {
2434       // Check that length(input) >= pos.
2435       __ Ldr(temp, MemOperand(input, length_offset));
2436       __ Subs(temp, temp, pos_const);
2437       __ B(slow_path->GetEntryLabel(), lt);
2438 
2439       // Check that (length(input) - pos) >= length.
2440       __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2441       __ B(slow_path->GetEntryLabel(), lt);
2442     }
2443   } else if (length_is_input_length) {
2444     // The only way the copy can succeed is if pos is zero.
2445     __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
2446   } else {
2447     // Check that pos >= 0.
2448     Register pos_reg = WRegisterFrom(pos);
2449     __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
2450 
2451     // Check that pos <= length(input) && (length(input) - pos) >= length.
2452     __ Ldr(temp, MemOperand(input, length_offset));
2453     __ Subs(temp, temp, pos_reg);
2454     // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
2455     __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge);
2456     __ B(slow_path->GetEntryLabel(), lt);
2457   }
2458 }
2459 
2460 // Compute base source address, base destination address, and end
2461 // source address for System.arraycopy* intrinsics in `src_base`,
2462 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(MacroAssembler * masm,DataType::Type type,const Register & src,const Location & src_pos,const Register & dst,const Location & dst_pos,const Location & copy_length,const Register & src_base,const Register & dst_base,const Register & src_end)2463 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
2464                                         DataType::Type type,
2465                                         const Register& src,
2466                                         const Location& src_pos,
2467                                         const Register& dst,
2468                                         const Location& dst_pos,
2469                                         const Location& copy_length,
2470                                         const Register& src_base,
2471                                         const Register& dst_base,
2472                                         const Register& src_end) {
2473   // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
2474   DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
2475       << "Unexpected element type: " << type;
2476   const int32_t element_size = DataType::Size(type);
2477   const int32_t element_size_shift = DataType::SizeShift(type);
2478   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2479 
2480   if (src_pos.IsConstant()) {
2481     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2482     __ Add(src_base, src, element_size * constant + data_offset);
2483   } else {
2484     __ Add(src_base, src, data_offset);
2485     __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
2486   }
2487 
2488   if (dst_pos.IsConstant()) {
2489     int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
2490     __ Add(dst_base, dst, element_size * constant + data_offset);
2491   } else {
2492     __ Add(dst_base, dst, data_offset);
2493     __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
2494   }
2495 
2496   if (copy_length.IsConstant()) {
2497     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2498     __ Add(src_end, src_base, element_size * constant);
2499   } else {
2500     __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
2501   }
2502 }
2503 
VisitSystemArrayCopyChar(HInvoke * invoke)2504 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2505   MacroAssembler* masm = GetVIXLAssembler();
2506   LocationSummary* locations = invoke->GetLocations();
2507   Register src = XRegisterFrom(locations->InAt(0));
2508   Location src_pos = locations->InAt(1);
2509   Register dst = XRegisterFrom(locations->InAt(2));
2510   Location dst_pos = locations->InAt(3);
2511   Location length = locations->InAt(4);
2512 
2513   SlowPathCodeARM64* slow_path =
2514       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2515   codegen_->AddSlowPath(slow_path);
2516 
2517   // If source and destination are the same, take the slow path. Overlapping copy regions must be
2518   // copied in reverse and we can't know in all cases if it's needed.
2519   __ Cmp(src, dst);
2520   __ B(slow_path->GetEntryLabel(), eq);
2521 
2522   // Bail out if the source is null.
2523   __ Cbz(src, slow_path->GetEntryLabel());
2524 
2525   // Bail out if the destination is null.
2526   __ Cbz(dst, slow_path->GetEntryLabel());
2527 
2528   if (!length.IsConstant()) {
2529     // Merge the following two comparisons into one:
2530     //   If the length is negative, bail out (delegate to libcore's native implementation).
2531     //   If the length > 32 then (currently) prefer libcore's native implementation.
2532     __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
2533     __ B(slow_path->GetEntryLabel(), hi);
2534   } else {
2535     // We have already checked in the LocationsBuilder for the constant case.
2536     DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2537     DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
2538   }
2539 
2540   Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
2541   Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
2542   Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
2543 
2544   CheckSystemArrayCopyPosition(masm,
2545                                src_pos,
2546                                src,
2547                                length,
2548                                slow_path,
2549                                src_curr_addr,
2550                                false);
2551 
2552   CheckSystemArrayCopyPosition(masm,
2553                                dst_pos,
2554                                dst,
2555                                length,
2556                                slow_path,
2557                                src_curr_addr,
2558                                false);
2559 
2560   src_curr_addr = src_curr_addr.X();
2561   dst_curr_addr = dst_curr_addr.X();
2562   src_stop_addr = src_stop_addr.X();
2563 
2564   GenSystemArrayCopyAddresses(masm,
2565                               DataType::Type::kUint16,
2566                               src,
2567                               src_pos,
2568                               dst,
2569                               dst_pos,
2570                               length,
2571                               src_curr_addr,
2572                               dst_curr_addr,
2573                               src_stop_addr);
2574 
2575   // Iterate over the arrays and do a raw copy of the chars.
2576   const int32_t char_size = DataType::Size(DataType::Type::kUint16);
2577   UseScratchRegisterScope temps(masm);
2578   Register tmp = temps.AcquireW();
2579   vixl::aarch64::Label loop, done;
2580   __ Bind(&loop);
2581   __ Cmp(src_curr_addr, src_stop_addr);
2582   __ B(&done, eq);
2583   __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
2584   __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
2585   __ B(&loop);
2586   __ Bind(&done);
2587 
2588   __ Bind(slow_path->GetExitLabel());
2589 }
2590 
2591 // We can choose to use the native implementation there for longer copy lengths.
2592 static constexpr int32_t kSystemArrayCopyThreshold = 128;
2593 
2594 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
2595 // We want to use two temporary registers in order to reduce the register pressure in arm64.
2596 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
VisitSystemArrayCopy(HInvoke * invoke)2597 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2598   // The only read barrier implementation supporting the
2599   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2600   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2601     return;
2602   }
2603 
2604   // Check to see if we have known failures that will cause us to have to bail out
2605   // to the runtime, and just generate the runtime call directly.
2606   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2607   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2608 
2609   // The positions must be non-negative.
2610   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2611       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
2612     // We will have to fail anyways.
2613     return;
2614   }
2615 
2616   // The length must be >= 0.
2617   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2618   if (length != nullptr) {
2619     int32_t len = length->GetValue();
2620     if (len < 0 || len >= kSystemArrayCopyThreshold) {
2621       // Just call as normal.
2622       return;
2623     }
2624   }
2625 
2626   SystemArrayCopyOptimizations optimizations(invoke);
2627 
2628   if (optimizations.GetDestinationIsSource()) {
2629     if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
2630       // We only support backward copying if source and destination are the same.
2631       return;
2632     }
2633   }
2634 
2635   if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
2636     // We currently don't intrinsify primitive copying.
2637     return;
2638   }
2639 
2640   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2641   LocationSummary* locations =
2642       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2643   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
2644   locations->SetInAt(0, Location::RequiresRegister());
2645   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2646   locations->SetInAt(2, Location::RequiresRegister());
2647   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2648   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2649 
2650   locations->AddTemp(Location::RequiresRegister());
2651   locations->AddTemp(Location::RequiresRegister());
2652   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2653     // Temporary register IP0, obtained from the VIXL scratch register
2654     // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
2655     // (because that register is clobbered by ReadBarrierMarkRegX
2656     // entry points). It cannot be used in calls to
2657     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
2658     // either. For these reasons, get a third extra temporary register
2659     // from the register allocator.
2660     locations->AddTemp(Location::RequiresRegister());
2661   } else {
2662     // Cases other than Baker read barriers: the third temporary will
2663     // be acquired from the VIXL scratch register pool.
2664   }
2665 }
2666 
VisitSystemArrayCopy(HInvoke * invoke)2667 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2668   // The only read barrier implementation supporting the
2669   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2670   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2671 
2672   MacroAssembler* masm = GetVIXLAssembler();
2673   LocationSummary* locations = invoke->GetLocations();
2674 
2675   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2676   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2677   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2678   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2679   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2680 
2681   Register src = XRegisterFrom(locations->InAt(0));
2682   Location src_pos = locations->InAt(1);
2683   Register dest = XRegisterFrom(locations->InAt(2));
2684   Location dest_pos = locations->InAt(3);
2685   Location length = locations->InAt(4);
2686   Register temp1 = WRegisterFrom(locations->GetTemp(0));
2687   Location temp1_loc = LocationFrom(temp1);
2688   Register temp2 = WRegisterFrom(locations->GetTemp(1));
2689   Location temp2_loc = LocationFrom(temp2);
2690 
2691   SlowPathCodeARM64* intrinsic_slow_path =
2692       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2693   codegen_->AddSlowPath(intrinsic_slow_path);
2694 
2695   vixl::aarch64::Label conditions_on_positions_validated;
2696   SystemArrayCopyOptimizations optimizations(invoke);
2697 
2698   // If source and destination are the same, we go to slow path if we need to do
2699   // forward copying.
2700   if (src_pos.IsConstant()) {
2701     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2702     if (dest_pos.IsConstant()) {
2703       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2704       if (optimizations.GetDestinationIsSource()) {
2705         // Checked when building locations.
2706         DCHECK_GE(src_pos_constant, dest_pos_constant);
2707       } else if (src_pos_constant < dest_pos_constant) {
2708         __ Cmp(src, dest);
2709         __ B(intrinsic_slow_path->GetEntryLabel(), eq);
2710       }
2711       // Checked when building locations.
2712       DCHECK(!optimizations.GetDestinationIsSource()
2713              || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
2714     } else {
2715       if (!optimizations.GetDestinationIsSource()) {
2716         __ Cmp(src, dest);
2717         __ B(&conditions_on_positions_validated, ne);
2718       }
2719       __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
2720       __ B(intrinsic_slow_path->GetEntryLabel(), gt);
2721     }
2722   } else {
2723     if (!optimizations.GetDestinationIsSource()) {
2724       __ Cmp(src, dest);
2725       __ B(&conditions_on_positions_validated, ne);
2726     }
2727     __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
2728            OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
2729     __ B(intrinsic_slow_path->GetEntryLabel(), lt);
2730   }
2731 
2732   __ Bind(&conditions_on_positions_validated);
2733 
2734   if (!optimizations.GetSourceIsNotNull()) {
2735     // Bail out if the source is null.
2736     __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
2737   }
2738 
2739   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2740     // Bail out if the destination is null.
2741     __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
2742   }
2743 
2744   // We have already checked in the LocationsBuilder for the constant case.
2745   if (!length.IsConstant() &&
2746       !optimizations.GetCountIsSourceLength() &&
2747       !optimizations.GetCountIsDestinationLength()) {
2748     // Merge the following two comparisons into one:
2749     //   If the length is negative, bail out (delegate to libcore's native implementation).
2750     //   If the length >= 128 then (currently) prefer native implementation.
2751     __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
2752     __ B(intrinsic_slow_path->GetEntryLabel(), hs);
2753   }
2754   // Validity checks: source.
2755   CheckSystemArrayCopyPosition(masm,
2756                                src_pos,
2757                                src,
2758                                length,
2759                                intrinsic_slow_path,
2760                                temp1,
2761                                optimizations.GetCountIsSourceLength());
2762 
2763   // Validity checks: dest.
2764   CheckSystemArrayCopyPosition(masm,
2765                                dest_pos,
2766                                dest,
2767                                length,
2768                                intrinsic_slow_path,
2769                                temp1,
2770                                optimizations.GetCountIsDestinationLength());
2771   {
2772     // We use a block to end the scratch scope before the write barrier, thus
2773     // freeing the temporary registers so they can be used in `MarkGCCard`.
2774     UseScratchRegisterScope temps(masm);
2775     Location temp3_loc;  // Used only for Baker read barrier.
2776     Register temp3;
2777     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2778       temp3_loc = locations->GetTemp(2);
2779       temp3 = WRegisterFrom(temp3_loc);
2780     } else {
2781       temp3 = temps.AcquireW();
2782     }
2783 
2784     if (!optimizations.GetDoesNotNeedTypeCheck()) {
2785       // Check whether all elements of the source array are assignable to the component
2786       // type of the destination array. We do two checks: the classes are the same,
2787       // or the destination is Object[]. If none of these checks succeed, we go to the
2788       // slow path.
2789 
2790       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2791         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2792           // /* HeapReference<Class> */ temp1 = src->klass_
2793           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2794                                                           temp1_loc,
2795                                                           src.W(),
2796                                                           class_offset,
2797                                                           temp3_loc,
2798                                                           /* needs_null_check= */ false,
2799                                                           /* use_load_acquire= */ false);
2800           // Bail out if the source is not a non primitive array.
2801           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2802           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2803                                                           temp1_loc,
2804                                                           temp1,
2805                                                           component_offset,
2806                                                           temp3_loc,
2807                                                           /* needs_null_check= */ false,
2808                                                           /* use_load_acquire= */ false);
2809           __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
2810           // If heap poisoning is enabled, `temp1` has been unpoisoned
2811           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2812           // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2813           __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
2814           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2815           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2816         }
2817 
2818         // /* HeapReference<Class> */ temp1 = dest->klass_
2819         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2820                                                         temp1_loc,
2821                                                         dest.W(),
2822                                                         class_offset,
2823                                                         temp3_loc,
2824                                                         /* needs_null_check= */ false,
2825                                                         /* use_load_acquire= */ false);
2826 
2827         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2828           // Bail out if the destination is not a non primitive array.
2829           //
2830           // Register `temp1` is not trashed by the read barrier emitted
2831           // by GenerateFieldLoadWithBakerReadBarrier below, as that
2832           // method produces a call to a ReadBarrierMarkRegX entry point,
2833           // which saves all potentially live registers, including
2834           // temporaries such a `temp1`.
2835           // /* HeapReference<Class> */ temp2 = temp1->component_type_
2836           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2837                                                           temp2_loc,
2838                                                           temp1,
2839                                                           component_offset,
2840                                                           temp3_loc,
2841                                                           /* needs_null_check= */ false,
2842                                                           /* use_load_acquire= */ false);
2843           __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2844           // If heap poisoning is enabled, `temp2` has been unpoisoned
2845           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2846           // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2847           __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2848           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2849           __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2850         }
2851 
2852         // For the same reason given earlier, `temp1` is not trashed by the
2853         // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2854         // /* HeapReference<Class> */ temp2 = src->klass_
2855         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2856                                                         temp2_loc,
2857                                                         src.W(),
2858                                                         class_offset,
2859                                                         temp3_loc,
2860                                                         /* needs_null_check= */ false,
2861                                                         /* use_load_acquire= */ false);
2862         // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2863         __ Cmp(temp1, temp2);
2864 
2865         if (optimizations.GetDestinationIsTypedObjectArray()) {
2866           vixl::aarch64::Label do_copy;
2867           __ B(&do_copy, eq);
2868           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2869           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2870                                                           temp1_loc,
2871                                                           temp1,
2872                                                           component_offset,
2873                                                           temp3_loc,
2874                                                           /* needs_null_check= */ false,
2875                                                           /* use_load_acquire= */ false);
2876           // /* HeapReference<Class> */ temp1 = temp1->super_class_
2877           // We do not need to emit a read barrier for the following
2878           // heap reference load, as `temp1` is only used in a
2879           // comparison with null below, and this reference is not
2880           // kept afterwards.
2881           __ Ldr(temp1, HeapOperand(temp1, super_offset));
2882           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2883           __ Bind(&do_copy);
2884         } else {
2885           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2886         }
2887       } else {
2888         // Non read barrier code.
2889 
2890         // /* HeapReference<Class> */ temp1 = dest->klass_
2891         __ Ldr(temp1, MemOperand(dest, class_offset));
2892         // /* HeapReference<Class> */ temp2 = src->klass_
2893         __ Ldr(temp2, MemOperand(src, class_offset));
2894         bool did_unpoison = false;
2895         if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2896             !optimizations.GetSourceIsNonPrimitiveArray()) {
2897           // One or two of the references need to be unpoisoned. Unpoison them
2898           // both to make the identity check valid.
2899           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2900           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2901           did_unpoison = true;
2902         }
2903 
2904         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2905           // Bail out if the destination is not a non primitive array.
2906           // /* HeapReference<Class> */ temp3 = temp1->component_type_
2907           __ Ldr(temp3, HeapOperand(temp1, component_offset));
2908           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2909           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2910           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2911           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2912           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2913           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2914         }
2915 
2916         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2917           // Bail out if the source is not a non primitive array.
2918           // /* HeapReference<Class> */ temp3 = temp2->component_type_
2919           __ Ldr(temp3, HeapOperand(temp2, component_offset));
2920           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2921           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2922           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2923           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2924           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2925           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2926         }
2927 
2928         __ Cmp(temp1, temp2);
2929 
2930         if (optimizations.GetDestinationIsTypedObjectArray()) {
2931           vixl::aarch64::Label do_copy;
2932           __ B(&do_copy, eq);
2933           if (!did_unpoison) {
2934             codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2935           }
2936           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2937           __ Ldr(temp1, HeapOperand(temp1, component_offset));
2938           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2939           // /* HeapReference<Class> */ temp1 = temp1->super_class_
2940           __ Ldr(temp1, HeapOperand(temp1, super_offset));
2941           // No need to unpoison the result, we're comparing against null.
2942           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2943           __ Bind(&do_copy);
2944         } else {
2945           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2946         }
2947       }
2948     } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2949       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2950       // Bail out if the source is not a non primitive array.
2951       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2952         // /* HeapReference<Class> */ temp1 = src->klass_
2953         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2954                                                         temp1_loc,
2955                                                         src.W(),
2956                                                         class_offset,
2957                                                         temp3_loc,
2958                                                         /* needs_null_check= */ false,
2959                                                         /* use_load_acquire= */ false);
2960         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2961         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2962                                                         temp2_loc,
2963                                                         temp1,
2964                                                         component_offset,
2965                                                         temp3_loc,
2966                                                         /* needs_null_check= */ false,
2967                                                         /* use_load_acquire= */ false);
2968         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2969         // If heap poisoning is enabled, `temp2` has been unpoisoned
2970         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2971       } else {
2972         // /* HeapReference<Class> */ temp1 = src->klass_
2973         __ Ldr(temp1, HeapOperand(src.W(), class_offset));
2974         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2975         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2976         __ Ldr(temp2, HeapOperand(temp1, component_offset));
2977         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2978         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2979       }
2980       // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2981       __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2982       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2983       __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2984     }
2985 
2986     if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
2987       // Null constant length: not need to emit the loop code at all.
2988     } else {
2989       Register src_curr_addr = temp1.X();
2990       Register dst_curr_addr = temp2.X();
2991       Register src_stop_addr = temp3.X();
2992       vixl::aarch64::Label done;
2993       const DataType::Type type = DataType::Type::kReference;
2994       const int32_t element_size = DataType::Size(type);
2995 
2996       if (length.IsRegister()) {
2997         // Don't enter the copy loop if the length is null.
2998         __ Cbz(WRegisterFrom(length), &done);
2999       }
3000 
3001       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3002         // TODO: Also convert this intrinsic to the IsGcMarking strategy?
3003 
3004         // SystemArrayCopy implementation for Baker read barriers (see
3005         // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
3006         //
3007         //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3008         //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
3009         //   bool is_gray = (rb_state == ReadBarrier::GrayState());
3010         //   if (is_gray) {
3011         //     // Slow-path copy.
3012         //     do {
3013         //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
3014         //     } while (src_ptr != end_ptr)
3015         //   } else {
3016         //     // Fast-path copy.
3017         //     do {
3018         //       *dest_ptr++ = *src_ptr++;
3019         //     } while (src_ptr != end_ptr)
3020         //   }
3021 
3022         // Make sure `tmp` is not IP0, as it is clobbered by
3023         // ReadBarrierMarkRegX entry points in
3024         // ReadBarrierSystemArrayCopySlowPathARM64.
3025         DCHECK(temps.IsAvailable(ip0));
3026         temps.Exclude(ip0);
3027         Register tmp = temps.AcquireW();
3028         DCHECK_NE(LocationFrom(tmp).reg(), IP0);
3029         // Put IP0 back in the pool so that VIXL has at least one
3030         // scratch register available to emit macro-instructions (note
3031         // that IP1 is already used for `tmp`). Indeed some
3032         // macro-instructions used in GenSystemArrayCopyAddresses
3033         // (invoked hereunder) may require a scratch register (for
3034         // instance to emit a load with a large constant offset).
3035         temps.Include(ip0);
3036 
3037         // /* int32_t */ monitor = src->monitor_
3038         __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
3039         // /* LockWord */ lock_word = LockWord(monitor)
3040         static_assert(sizeof(LockWord) == sizeof(int32_t),
3041                       "art::LockWord and int32_t have different sizes.");
3042 
3043         // Introduce a dependency on the lock_word including rb_state,
3044         // to prevent load-load reordering, and without using
3045         // a memory barrier (which would be more expensive).
3046         // `src` is unchanged by this operation, but its value now depends
3047         // on `tmp`.
3048         __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
3049 
3050         // Compute base source address, base destination address, and end
3051         // source address for System.arraycopy* intrinsics in `src_base`,
3052         // `dst_base` and `src_end` respectively.
3053         // Note that `src_curr_addr` is computed from from `src` (and
3054         // `src_pos`) here, and thus honors the artificial dependency
3055         // of `src` on `tmp`.
3056         GenSystemArrayCopyAddresses(masm,
3057                                     type,
3058                                     src,
3059                                     src_pos,
3060                                     dest,
3061                                     dest_pos,
3062                                     length,
3063                                     src_curr_addr,
3064                                     dst_curr_addr,
3065                                     src_stop_addr);
3066 
3067         // Slow path used to copy array when `src` is gray.
3068         SlowPathCodeARM64* read_barrier_slow_path =
3069             new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(
3070                 invoke, LocationFrom(tmp));
3071         codegen_->AddSlowPath(read_barrier_slow_path);
3072 
3073         // Given the numeric representation, it's enough to check the low bit of the rb_state.
3074         static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
3075         static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3076         __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
3077 
3078         // Fast-path copy.
3079         // Iterate over the arrays and do a raw copy of the objects. We don't need to
3080         // poison/unpoison.
3081         vixl::aarch64::Label loop;
3082         __ Bind(&loop);
3083         __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
3084         __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
3085         __ Cmp(src_curr_addr, src_stop_addr);
3086         __ B(&loop, ne);
3087 
3088         __ Bind(read_barrier_slow_path->GetExitLabel());
3089       } else {
3090         // Non read barrier code.
3091         // Compute base source address, base destination address, and end
3092         // source address for System.arraycopy* intrinsics in `src_base`,
3093         // `dst_base` and `src_end` respectively.
3094         GenSystemArrayCopyAddresses(masm,
3095                                     type,
3096                                     src,
3097                                     src_pos,
3098                                     dest,
3099                                     dest_pos,
3100                                     length,
3101                                     src_curr_addr,
3102                                     dst_curr_addr,
3103                                     src_stop_addr);
3104         // Iterate over the arrays and do a raw copy of the objects. We don't need to
3105         // poison/unpoison.
3106         vixl::aarch64::Label loop;
3107         __ Bind(&loop);
3108         {
3109           Register tmp = temps.AcquireW();
3110           __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
3111           __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
3112         }
3113         __ Cmp(src_curr_addr, src_stop_addr);
3114         __ B(&loop, ne);
3115       }
3116       __ Bind(&done);
3117     }
3118   }
3119 
3120   // We only need one card marking on the destination array.
3121   codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false);
3122 
3123   __ Bind(intrinsic_slow_path->GetExitLabel());
3124 }
3125 
GenIsInfinite(LocationSummary * locations,bool is64bit,MacroAssembler * masm)3126 static void GenIsInfinite(LocationSummary* locations,
3127                           bool is64bit,
3128                           MacroAssembler* masm) {
3129   Operand infinity(0);
3130   Operand tst_mask(0);
3131   Register out;
3132 
3133   if (is64bit) {
3134     infinity = Operand(kPositiveInfinityDouble);
3135     tst_mask = MaskLeastSignificant<uint64_t>(63);
3136     out = XRegisterFrom(locations->Out());
3137   } else {
3138     infinity = Operand(kPositiveInfinityFloat);
3139     tst_mask = MaskLeastSignificant<uint32_t>(31);
3140     out = WRegisterFrom(locations->Out());
3141   }
3142 
3143   MoveFPToInt(locations, is64bit, masm);
3144   // Checks whether exponent bits are all 1 and fraction bits are all 0.
3145   __ Eor(out, out, infinity);
3146   // TST bitmask is used to mask out the sign bit: either 0x7fffffff or 0x7fffffffffffffff
3147   // depending on is64bit.
3148   __ Tst(out, tst_mask);
3149   __ Cset(out, eq);
3150 }
3151 
VisitFloatIsInfinite(HInvoke * invoke)3152 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
3153   CreateFPToIntLocations(allocator_, invoke);
3154 }
3155 
VisitFloatIsInfinite(HInvoke * invoke)3156 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
3157   GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
3158 }
3159 
VisitDoubleIsInfinite(HInvoke * invoke)3160 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
3161   CreateFPToIntLocations(allocator_, invoke);
3162 }
3163 
VisitDoubleIsInfinite(HInvoke * invoke)3164 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
3165   GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
3166 }
3167 
VisitIntegerValueOf(HInvoke * invoke)3168 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
3169   InvokeRuntimeCallingConvention calling_convention;
3170   IntrinsicVisitor::ComputeIntegerValueOfLocations(
3171       invoke,
3172       codegen_,
3173       calling_convention.GetReturnLocation(DataType::Type::kReference),
3174       Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3175 }
3176 
VisitIntegerValueOf(HInvoke * invoke)3177 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
3178   IntrinsicVisitor::IntegerValueOfInfo info =
3179       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
3180   LocationSummary* locations = invoke->GetLocations();
3181   MacroAssembler* masm = GetVIXLAssembler();
3182 
3183   Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
3184   UseScratchRegisterScope temps(masm);
3185   Register temp = temps.AcquireW();
3186   auto allocate_instance = [&]() {
3187     DCHECK(out.X().Is(InvokeRuntimeCallingConvention().GetRegisterAt(0)));
3188     codegen_->LoadIntrinsicDeclaringClass(out, invoke);
3189     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3190     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3191   };
3192   if (invoke->InputAt(0)->IsConstant()) {
3193     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3194     if (static_cast<uint32_t>(value - info.low) < info.length) {
3195       // Just embed the j.l.Integer in the code.
3196       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
3197       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3198     } else {
3199       DCHECK(locations->CanCall());
3200       // Allocate and initialize a new j.l.Integer.
3201       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3202       // JIT object table.
3203       allocate_instance();
3204       __ Mov(temp.W(), value);
3205       __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
3206       // Class pointer and `value` final field stores require a barrier before publication.
3207       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3208     }
3209   } else {
3210     DCHECK(locations->CanCall());
3211     Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
3212     // Check bounds of our cache.
3213     __ Add(out.W(), in.W(), -info.low);
3214     __ Cmp(out.W(), info.length);
3215     vixl::aarch64::Label allocate, done;
3216     __ B(&allocate, hs);
3217     // If the value is within the bounds, load the j.l.Integer directly from the array.
3218     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
3219     MemOperand source = HeapOperand(
3220         temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
3221     codegen_->Load(DataType::Type::kReference, out, source);
3222     codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
3223     __ B(&done);
3224     __ Bind(&allocate);
3225     // Otherwise allocate and initialize a new j.l.Integer.
3226     allocate_instance();
3227     __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
3228     // Class pointer and `value` final field stores require a barrier before publication.
3229     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3230     __ Bind(&done);
3231   }
3232 }
3233 
VisitReferenceGetReferent(HInvoke * invoke)3234 void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3235   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3236 
3237   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) {
3238     invoke->GetLocations()->AddTemp(Location::RequiresRegister());
3239   }
3240 }
3241 
VisitReferenceGetReferent(HInvoke * invoke)3242 void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3243   MacroAssembler* masm = GetVIXLAssembler();
3244   LocationSummary* locations = invoke->GetLocations();
3245 
3246   Location obj = locations->InAt(0);
3247   Location out = locations->Out();
3248 
3249   SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
3250   codegen_->AddSlowPath(slow_path);
3251 
3252   if (kEmitCompilerReadBarrier) {
3253     // Check self->GetWeakRefAccessEnabled().
3254     UseScratchRegisterScope temps(masm);
3255     Register temp = temps.AcquireW();
3256     __ Ldr(temp,
3257            MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArm64PointerSize>().Uint32Value()));
3258     __ Cbz(temp, slow_path->GetEntryLabel());
3259   }
3260 
3261   {
3262     // Load the java.lang.ref.Reference class.
3263     UseScratchRegisterScope temps(masm);
3264     Register temp = temps.AcquireW();
3265     codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
3266 
3267     // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3268     MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3269     DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3270     DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3271               IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3272     __ Ldrh(temp, HeapOperand(temp, disable_intrinsic_offset.Uint32Value()));
3273     __ Cbnz(temp, slow_path->GetEntryLabel());
3274   }
3275 
3276   // Load the value from the field.
3277   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3278   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3279     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3280                                                     out,
3281                                                     WRegisterFrom(obj),
3282                                                     referent_offset,
3283                                                     /*maybe_temp=*/ locations->GetTemp(0),
3284                                                     /*needs_null_check=*/ true,
3285                                                     /*use_load_acquire=*/ true);
3286   } else {
3287     MemOperand field = HeapOperand(WRegisterFrom(obj), referent_offset);
3288     codegen_->LoadAcquire(
3289         invoke, DataType::Type::kReference, WRegisterFrom(out), field, /*needs_null_check=*/ true);
3290     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3291   }
3292   __ Bind(slow_path->GetExitLabel());
3293 }
3294 
VisitReferenceRefersTo(HInvoke * invoke)3295 void IntrinsicLocationsBuilderARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3296   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
3297 }
3298 
VisitReferenceRefersTo(HInvoke * invoke)3299 void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3300   LocationSummary* locations = invoke->GetLocations();
3301   MacroAssembler* masm = codegen_->GetVIXLAssembler();
3302   UseScratchRegisterScope temps(masm);
3303 
3304   Register obj = WRegisterFrom(locations->InAt(0));
3305   Register other = WRegisterFrom(locations->InAt(1));
3306   Register out = WRegisterFrom(locations->Out());
3307   Register tmp = temps.AcquireW();
3308 
3309   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3310   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3311 
3312   MemOperand field = HeapOperand(obj, referent_offset);
3313   codegen_->LoadAcquire(invoke, DataType::Type::kReference, tmp, field, /*needs_null_check=*/ true);
3314   codegen_->GetAssembler()->MaybeUnpoisonHeapReference(tmp);
3315 
3316   __ Cmp(tmp, other);
3317 
3318   if (kEmitCompilerReadBarrier) {
3319     DCHECK(kUseBakerReadBarrier);
3320 
3321     vixl::aarch64::Label calculate_result;
3322 
3323     // If the GC is not marking, the comparison result is final.
3324     __ Cbz(mr, &calculate_result);
3325 
3326     __ B(&calculate_result, eq);  // ZF set if taken.
3327 
3328     // Check if the loaded reference is null.
3329     __ Cbz(tmp, &calculate_result);  // ZF clear if taken.
3330 
3331     // For correct memory visibility, we need a barrier before loading the lock word.
3332     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3333 
3334     // Load the lockword and check if it is a forwarding address.
3335     static_assert(LockWord::kStateShift == 30u);
3336     static_assert(LockWord::kStateForwardingAddress == 3u);
3337     __ Ldr(tmp, HeapOperand(tmp, monitor_offset));
3338     __ Cmp(tmp, Operand(0xc0000000));
3339     __ B(&calculate_result, lo);   // ZF clear if taken.
3340 
3341     // Extract the forwarding address and compare with `other`.
3342     __ Cmp(other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
3343 
3344     __ Bind(&calculate_result);
3345   }
3346 
3347   // Convert ZF into the Boolean result.
3348   __ Cset(out, eq);
3349 }
3350 
VisitThreadInterrupted(HInvoke * invoke)3351 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
3352   LocationSummary* locations =
3353       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3354   locations->SetOut(Location::RequiresRegister());
3355 }
3356 
VisitThreadInterrupted(HInvoke * invoke)3357 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
3358   MacroAssembler* masm = GetVIXLAssembler();
3359   Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32);
3360   UseScratchRegisterScope temps(masm);
3361   Register temp = temps.AcquireX();
3362 
3363   __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
3364   __ Ldar(out.W(), MemOperand(temp));
3365 
3366   vixl::aarch64::Label done;
3367   __ Cbz(out.W(), &done);
3368   __ Stlr(wzr, MemOperand(temp));
3369   __ Bind(&done);
3370 }
3371 
VisitReachabilityFence(HInvoke * invoke)3372 void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
3373   LocationSummary* locations =
3374       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3375   locations->SetInAt(0, Location::Any());
3376 }
3377 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3378 void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3379 
VisitCRC32Update(HInvoke * invoke)3380 void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
3381   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3382     return;
3383   }
3384 
3385   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3386                                                                 LocationSummary::kNoCall,
3387                                                                 kIntrinsified);
3388 
3389   locations->SetInAt(0, Location::RequiresRegister());
3390   locations->SetInAt(1, Location::RequiresRegister());
3391   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3392 }
3393 
3394 // Lower the invoke of CRC32.update(int crc, int b).
VisitCRC32Update(HInvoke * invoke)3395 void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) {
3396   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3397 
3398   MacroAssembler* masm = GetVIXLAssembler();
3399 
3400   Register crc = InputRegisterAt(invoke, 0);
3401   Register val = InputRegisterAt(invoke, 1);
3402   Register out = OutputRegister(invoke);
3403 
3404   // The general algorithm of the CRC32 calculation is:
3405   //   crc = ~crc
3406   //   result = crc32_for_byte(crc, b)
3407   //   crc = ~result
3408   // It is directly lowered to three instructions.
3409 
3410   UseScratchRegisterScope temps(masm);
3411   Register tmp = temps.AcquireSameSizeAs(out);
3412 
3413   __ Mvn(tmp, crc);
3414   __ Crc32b(tmp, tmp, val);
3415   __ Mvn(out, tmp);
3416 }
3417 
3418 // Generate code using CRC32 instructions which calculates
3419 // a CRC32 value of a byte.
3420 //
3421 // Parameters:
3422 //   masm   - VIXL macro assembler
3423 //   crc    - a register holding an initial CRC value
3424 //   ptr    - a register holding a memory address of bytes
3425 //   length - a register holding a number of bytes to process
3426 //   out    - a register to put a result of calculation
GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler * masm,const Register & crc,const Register & ptr,const Register & length,const Register & out)3427 static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm,
3428                                                         const Register& crc,
3429                                                         const Register& ptr,
3430                                                         const Register& length,
3431                                                         const Register& out) {
3432   // The algorithm of CRC32 of bytes is:
3433   //   crc = ~crc
3434   //   process a few first bytes to make the array 8-byte aligned
3435   //   while array has 8 bytes do:
3436   //     crc = crc32_of_8bytes(crc, 8_bytes(array))
3437   //   if array has 4 bytes:
3438   //     crc = crc32_of_4bytes(crc, 4_bytes(array))
3439   //   if array has 2 bytes:
3440   //     crc = crc32_of_2bytes(crc, 2_bytes(array))
3441   //   if array has a byte:
3442   //     crc = crc32_of_byte(crc, 1_byte(array))
3443   //   crc = ~crc
3444 
3445   vixl::aarch64::Label loop, done;
3446   vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte;
3447   vixl::aarch64::Label aligned2, aligned4, aligned8;
3448 
3449   // Use VIXL scratch registers as the VIXL macro assembler won't use them in
3450   // instructions below.
3451   UseScratchRegisterScope temps(masm);
3452   Register len = temps.AcquireW();
3453   Register array_elem = temps.AcquireW();
3454 
3455   __ Mvn(out, crc);
3456   __ Mov(len, length);
3457 
3458   __ Tbz(ptr, 0, &aligned2);
3459   __ Subs(len, len, 1);
3460   __ B(&done, lo);
3461   __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex));
3462   __ Crc32b(out, out, array_elem);
3463 
3464   __ Bind(&aligned2);
3465   __ Tbz(ptr, 1, &aligned4);
3466   __ Subs(len, len, 2);
3467   __ B(&process_1byte, lo);
3468   __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3469   __ Crc32h(out, out, array_elem);
3470 
3471   __ Bind(&aligned4);
3472   __ Tbz(ptr, 2, &aligned8);
3473   __ Subs(len, len, 4);
3474   __ B(&process_2bytes, lo);
3475   __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3476   __ Crc32w(out, out, array_elem);
3477 
3478   __ Bind(&aligned8);
3479   __ Subs(len, len, 8);
3480   // If len < 8 go to process data by 4 bytes, 2 bytes and a byte.
3481   __ B(&process_4bytes, lo);
3482 
3483   // The main loop processing data by 8 bytes.
3484   __ Bind(&loop);
3485   __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex));
3486   __ Subs(len, len, 8);
3487   __ Crc32x(out, out, array_elem.X());
3488   // if len >= 8, process the next 8 bytes.
3489   __ B(&loop, hs);
3490 
3491   // Process the data which is less than 8 bytes.
3492   // The code generated below works with values of len
3493   // which come in the range [-8, 0].
3494   // The first three bits are used to detect whether 4 bytes or 2 bytes or
3495   // a byte can be processed.
3496   // The checking order is from bit 2 to bit 0:
3497   //  bit 2 is set: at least 4 bytes available
3498   //  bit 1 is set: at least 2 bytes available
3499   //  bit 0 is set: at least a byte available
3500   __ Bind(&process_4bytes);
3501   // Goto process_2bytes if less than four bytes available
3502   __ Tbz(len, 2, &process_2bytes);
3503   __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3504   __ Crc32w(out, out, array_elem);
3505 
3506   __ Bind(&process_2bytes);
3507   // Goto process_1bytes if less than two bytes available
3508   __ Tbz(len, 1, &process_1byte);
3509   __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3510   __ Crc32h(out, out, array_elem);
3511 
3512   __ Bind(&process_1byte);
3513   // Goto done if no bytes available
3514   __ Tbz(len, 0, &done);
3515   __ Ldrb(array_elem, MemOperand(ptr));
3516   __ Crc32b(out, out, array_elem);
3517 
3518   __ Bind(&done);
3519   __ Mvn(out, out);
3520 }
3521 
3522 // The threshold for sizes of arrays to use the library provided implementation
3523 // of CRC32.updateBytes instead of the intrinsic.
3524 static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
3525 
VisitCRC32UpdateBytes(HInvoke * invoke)3526 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3527   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3528     return;
3529   }
3530 
3531   LocationSummary* locations =
3532       new (allocator_) LocationSummary(invoke,
3533                                        LocationSummary::kCallOnSlowPath,
3534                                        kIntrinsified);
3535 
3536   locations->SetInAt(0, Location::RequiresRegister());
3537   locations->SetInAt(1, Location::RequiresRegister());
3538   locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
3539   locations->SetInAt(3, Location::RequiresRegister());
3540   locations->AddTemp(Location::RequiresRegister());
3541   locations->SetOut(Location::RequiresRegister());
3542 }
3543 
3544 // Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
3545 //
3546 // Note: The intrinsic is not used if len exceeds a threshold.
VisitCRC32UpdateBytes(HInvoke * invoke)3547 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3548   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3549 
3550   MacroAssembler* masm = GetVIXLAssembler();
3551   LocationSummary* locations = invoke->GetLocations();
3552 
3553   SlowPathCodeARM64* slow_path =
3554       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
3555   codegen_->AddSlowPath(slow_path);
3556 
3557   Register length = WRegisterFrom(locations->InAt(3));
3558   __ Cmp(length, kCRC32UpdateBytesThreshold);
3559   __ B(slow_path->GetEntryLabel(), hi);
3560 
3561   const uint32_t array_data_offset =
3562       mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
3563   Register ptr = XRegisterFrom(locations->GetTemp(0));
3564   Register array = XRegisterFrom(locations->InAt(1));
3565   Location offset = locations->InAt(2);
3566   if (offset.IsConstant()) {
3567     int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
3568     __ Add(ptr, array, array_data_offset + offset_value);
3569   } else {
3570     __ Add(ptr, array, array_data_offset);
3571     __ Add(ptr, ptr, XRegisterFrom(offset));
3572   }
3573 
3574   Register crc = WRegisterFrom(locations->InAt(0));
3575   Register out = WRegisterFrom(locations->Out());
3576 
3577   GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3578 
3579   __ Bind(slow_path->GetExitLabel());
3580 }
3581 
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3582 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3583   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3584     return;
3585   }
3586 
3587   LocationSummary* locations =
3588       new (allocator_) LocationSummary(invoke,
3589                                        LocationSummary::kNoCall,
3590                                        kIntrinsified);
3591 
3592   locations->SetInAt(0, Location::RequiresRegister());
3593   locations->SetInAt(1, Location::RequiresRegister());
3594   locations->SetInAt(2, Location::RequiresRegister());
3595   locations->SetInAt(3, Location::RequiresRegister());
3596   locations->AddTemp(Location::RequiresRegister());
3597   locations->SetOut(Location::RequiresRegister());
3598 }
3599 
3600 // Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len)
3601 //
3602 // There is no need to generate code checking if addr is 0.
3603 // The method updateByteBuffer is a private method of java.util.zip.CRC32.
3604 // This guarantees no calls outside of the CRC32 class.
3605 // An address of DirectBuffer is always passed to the call of updateByteBuffer.
3606 // It might be an implementation of an empty DirectBuffer which can use a zero
3607 // address but it must have the length to be zero. The current generated code
3608 // correctly works with the zero length.
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3609 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3610   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3611 
3612   MacroAssembler* masm = GetVIXLAssembler();
3613   LocationSummary* locations = invoke->GetLocations();
3614 
3615   Register addr = XRegisterFrom(locations->InAt(1));
3616   Register ptr = XRegisterFrom(locations->GetTemp(0));
3617   __ Add(ptr, addr, XRegisterFrom(locations->InAt(2)));
3618 
3619   Register crc = WRegisterFrom(locations->InAt(0));
3620   Register length = WRegisterFrom(locations->InAt(3));
3621   Register out = WRegisterFrom(locations->Out());
3622   GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3623 }
3624 
VisitFP16ToFloat(HInvoke * invoke)3625 void IntrinsicLocationsBuilderARM64::VisitFP16ToFloat(HInvoke* invoke) {
3626   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3627     return;
3628   }
3629 
3630   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3631                                                                 LocationSummary::kNoCall,
3632                                                                 kIntrinsified);
3633   locations->SetInAt(0, Location::RequiresRegister());
3634   locations->SetOut(Location::RequiresFpuRegister());
3635 }
3636 
VisitFP16ToFloat(HInvoke * invoke)3637 void IntrinsicCodeGeneratorARM64::VisitFP16ToFloat(HInvoke* invoke) {
3638   DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3639   MacroAssembler* masm = GetVIXLAssembler();
3640   UseScratchRegisterScope scratch_scope(masm);
3641   Register bits = InputRegisterAt(invoke, 0);
3642   VRegister out = SRegisterFrom(invoke->GetLocations()->Out());
3643   VRegister half = scratch_scope.AcquireH();
3644   __ Fmov(half, bits);  // ARMv8.2
3645   __ Fcvt(out, half);
3646 }
3647 
VisitFP16ToHalf(HInvoke * invoke)3648 void IntrinsicLocationsBuilderARM64::VisitFP16ToHalf(HInvoke* invoke) {
3649   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3650     return;
3651   }
3652 
3653   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3654                                                                 LocationSummary::kNoCall,
3655                                                                 kIntrinsified);
3656   locations->SetInAt(0, Location::RequiresFpuRegister());
3657   locations->SetOut(Location::RequiresRegister());
3658 }
3659 
VisitFP16ToHalf(HInvoke * invoke)3660 void IntrinsicCodeGeneratorARM64::VisitFP16ToHalf(HInvoke* invoke) {
3661   DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3662   MacroAssembler* masm = GetVIXLAssembler();
3663   UseScratchRegisterScope scratch_scope(masm);
3664   VRegister in = SRegisterFrom(invoke->GetLocations()->InAt(0));
3665   VRegister half = scratch_scope.AcquireH();
3666   Register out = WRegisterFrom(invoke->GetLocations()->Out());
3667   __ Fcvt(half, in);
3668   __ Fmov(out, half);
3669   __ Sxth(out, out);  // sign extend due to returning a short type.
3670 }
3671 
3672 template<typename OP>
GenerateFP16Round(HInvoke * invoke,CodeGeneratorARM64 * const codegen_,MacroAssembler * masm,const OP roundOp)3673 void GenerateFP16Round(HInvoke* invoke,
3674                        CodeGeneratorARM64* const codegen_,
3675                        MacroAssembler* masm,
3676                        const OP roundOp) {
3677   DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3678   LocationSummary* locations = invoke->GetLocations();
3679   UseScratchRegisterScope scratch_scope(masm);
3680   Register out = WRegisterFrom(locations->Out());
3681   VRegister half = scratch_scope.AcquireH();
3682   __ Fmov(half, WRegisterFrom(locations->InAt(0)));
3683   roundOp(half, half);
3684   __ Fmov(out, half);
3685   __ Sxth(out, out);
3686 }
3687 
VisitFP16Floor(HInvoke * invoke)3688 void IntrinsicLocationsBuilderARM64::VisitFP16Floor(HInvoke* invoke) {
3689   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3690     return;
3691   }
3692 
3693   CreateIntToIntLocations(allocator_, invoke);
3694 }
3695 
VisitFP16Floor(HInvoke * invoke)3696 void IntrinsicCodeGeneratorARM64::VisitFP16Floor(HInvoke* invoke) {
3697   MacroAssembler* masm = GetVIXLAssembler();
3698   auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3699     __ Frintm(out, in);  // Round towards Minus infinity
3700   };
3701   GenerateFP16Round(invoke, codegen_, masm, roundOp);
3702 }
3703 
VisitFP16Ceil(HInvoke * invoke)3704 void IntrinsicLocationsBuilderARM64::VisitFP16Ceil(HInvoke* invoke) {
3705   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3706     return;
3707   }
3708 
3709   CreateIntToIntLocations(allocator_, invoke);
3710 }
3711 
VisitFP16Ceil(HInvoke * invoke)3712 void IntrinsicCodeGeneratorARM64::VisitFP16Ceil(HInvoke* invoke) {
3713   MacroAssembler* masm = GetVIXLAssembler();
3714   auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3715     __ Frintp(out, in);  // Round towards Plus infinity
3716   };
3717   GenerateFP16Round(invoke, codegen_, masm, roundOp);
3718 }
3719 
VisitFP16Rint(HInvoke * invoke)3720 void IntrinsicLocationsBuilderARM64::VisitFP16Rint(HInvoke* invoke) {
3721   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3722     return;
3723   }
3724 
3725   CreateIntToIntLocations(allocator_, invoke);
3726 }
3727 
VisitFP16Rint(HInvoke * invoke)3728 void IntrinsicCodeGeneratorARM64::VisitFP16Rint(HInvoke* invoke) {
3729   MacroAssembler* masm = GetVIXLAssembler();
3730   auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3731     __ Frintn(out, in);  // Round to nearest, with ties to even
3732   };
3733   GenerateFP16Round(invoke, codegen_, masm, roundOp);
3734 }
3735 
3736 template<typename OP>
GenerateFP16Compare(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,const OP compareOp)3737 void GenerateFP16Compare(HInvoke* invoke,
3738                          CodeGeneratorARM64* codegen,
3739                          MacroAssembler* masm,
3740                          const OP compareOp) {
3741   DCHECK(codegen->GetInstructionSetFeatures().HasFP16());
3742   LocationSummary* locations = invoke->GetLocations();
3743   Register out = WRegisterFrom(locations->Out());
3744   VRegister half0 = HRegisterFrom(locations->GetTemp(0));
3745   VRegister half1 = HRegisterFrom(locations->GetTemp(1));
3746   __ Fmov(half0, WRegisterFrom(locations->InAt(0)));
3747   __ Fmov(half1, WRegisterFrom(locations->InAt(1)));
3748   compareOp(out, half0, half1);
3749 }
3750 
GenerateFP16Compare(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,vixl::aarch64::Condition cond)3751 static inline void GenerateFP16Compare(HInvoke* invoke,
3752                                        CodeGeneratorARM64* codegen,
3753                                        MacroAssembler* masm,
3754                                        vixl::aarch64::Condition cond) {
3755   auto compareOp = [masm, cond](const Register out, const VRegister& in0, const VRegister& in1) {
3756     __ Fcmp(in0, in1);
3757     __ Cset(out, cond);
3758   };
3759   GenerateFP16Compare(invoke, codegen, masm, compareOp);
3760 }
3761 
VisitFP16Greater(HInvoke * invoke)3762 void IntrinsicLocationsBuilderARM64::VisitFP16Greater(HInvoke* invoke) {
3763   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3764     return;
3765   }
3766 
3767   CreateIntIntToIntLocations(allocator_, invoke);
3768   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3769   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3770 }
3771 
VisitFP16Greater(HInvoke * invoke)3772 void IntrinsicCodeGeneratorARM64::VisitFP16Greater(HInvoke* invoke) {
3773   MacroAssembler* masm = GetVIXLAssembler();
3774   GenerateFP16Compare(invoke, codegen_, masm, gt);
3775 }
3776 
VisitFP16GreaterEquals(HInvoke * invoke)3777 void IntrinsicLocationsBuilderARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
3778   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3779     return;
3780   }
3781 
3782   CreateIntIntToIntLocations(allocator_, invoke);
3783   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3784   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3785 }
3786 
VisitFP16GreaterEquals(HInvoke * invoke)3787 void IntrinsicCodeGeneratorARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
3788   MacroAssembler* masm = GetVIXLAssembler();
3789   GenerateFP16Compare(invoke, codegen_, masm, ge);
3790 }
3791 
VisitFP16Less(HInvoke * invoke)3792 void IntrinsicLocationsBuilderARM64::VisitFP16Less(HInvoke* invoke) {
3793   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3794     return;
3795   }
3796 
3797   CreateIntIntToIntLocations(allocator_, invoke);
3798   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3799   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3800 }
3801 
VisitFP16Less(HInvoke * invoke)3802 void IntrinsicCodeGeneratorARM64::VisitFP16Less(HInvoke* invoke) {
3803   MacroAssembler* masm = GetVIXLAssembler();
3804   GenerateFP16Compare(invoke, codegen_, masm, mi);
3805 }
3806 
VisitFP16LessEquals(HInvoke * invoke)3807 void IntrinsicLocationsBuilderARM64::VisitFP16LessEquals(HInvoke* invoke) {
3808   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3809     return;
3810   }
3811 
3812   CreateIntIntToIntLocations(allocator_, invoke);
3813   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3814   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3815 }
3816 
VisitFP16LessEquals(HInvoke * invoke)3817 void IntrinsicCodeGeneratorARM64::VisitFP16LessEquals(HInvoke* invoke) {
3818   MacroAssembler* masm = GetVIXLAssembler();
3819   GenerateFP16Compare(invoke, codegen_, masm, ls);
3820 }
3821 
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorARM64 * codegen)3822 static void GenerateDivideUnsigned(HInvoke* invoke, CodeGeneratorARM64* codegen) {
3823   LocationSummary* locations = invoke->GetLocations();
3824   MacroAssembler* masm = codegen->GetVIXLAssembler();
3825   DataType::Type type = invoke->GetType();
3826   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3827 
3828   Register dividend = RegisterFrom(locations->InAt(0), type);
3829   Register divisor = RegisterFrom(locations->InAt(1), type);
3830   Register out = RegisterFrom(locations->Out(), type);
3831 
3832   // Check if divisor is zero, bail to managed implementation to handle.
3833   SlowPathCodeARM64* slow_path =
3834       new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
3835   codegen->AddSlowPath(slow_path);
3836   __ Cbz(divisor, slow_path->GetEntryLabel());
3837 
3838   __ Udiv(out, dividend, divisor);
3839 
3840   __ Bind(slow_path->GetExitLabel());
3841 }
3842 
VisitIntegerDivideUnsigned(HInvoke * invoke)3843 void IntrinsicLocationsBuilderARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3844   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
3845 }
3846 
VisitIntegerDivideUnsigned(HInvoke * invoke)3847 void IntrinsicCodeGeneratorARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3848   GenerateDivideUnsigned(invoke, codegen_);
3849 }
3850 
VisitLongDivideUnsigned(HInvoke * invoke)3851 void IntrinsicLocationsBuilderARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
3852   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
3853 }
3854 
VisitLongDivideUnsigned(HInvoke * invoke)3855 void IntrinsicCodeGeneratorARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
3856   GenerateDivideUnsigned(invoke, codegen_);
3857 }
3858 
VisitMathMultiplyHigh(HInvoke * invoke)3859 void IntrinsicLocationsBuilderARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
3860   CreateIntIntToIntLocations(allocator_, invoke);
3861 }
3862 
VisitMathMultiplyHigh(HInvoke * invoke)3863 void IntrinsicCodeGeneratorARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
3864   LocationSummary* locations = invoke->GetLocations();
3865   MacroAssembler* masm = codegen_->GetVIXLAssembler();
3866   DataType::Type type = invoke->GetType();
3867   DCHECK(type == DataType::Type::kInt64);
3868 
3869   Register x = RegisterFrom(locations->InAt(0), type);
3870   Register y = RegisterFrom(locations->InAt(1), type);
3871   Register out = RegisterFrom(locations->Out(), type);
3872 
3873   __ Smulh(out, x, y);
3874 }
3875 
3876 class VarHandleSlowPathARM64 : public IntrinsicSlowPathARM64 {
3877  public:
VarHandleSlowPathARM64(HInvoke * invoke,std::memory_order order)3878   VarHandleSlowPathARM64(HInvoke* invoke, std::memory_order order)
3879       : IntrinsicSlowPathARM64(invoke),
3880         order_(order),
3881         return_success_(false),
3882         strong_(false),
3883         get_and_update_op_(GetAndUpdateOp::kAdd) {
3884   }
3885 
GetByteArrayViewCheckLabel()3886   vixl::aarch64::Label* GetByteArrayViewCheckLabel() {
3887     return &byte_array_view_check_label_;
3888   }
3889 
GetNativeByteOrderLabel()3890   vixl::aarch64::Label* GetNativeByteOrderLabel() {
3891     return &native_byte_order_label_;
3892   }
3893 
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)3894   void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
3895     if (return_success) {
3896       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
3897     } else {
3898       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
3899     }
3900     return_success_ = return_success;
3901     strong_ = strong;
3902   }
3903 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3904   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3905     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
3906     get_and_update_op_ = get_and_update_op;
3907   }
3908 
EmitNativeCode(CodeGenerator * codegen_in)3909   void EmitNativeCode(CodeGenerator* codegen_in) override {
3910     if (GetByteArrayViewCheckLabel()->IsLinked()) {
3911       EmitByteArrayViewCode(codegen_in);
3912     }
3913     IntrinsicSlowPathARM64::EmitNativeCode(codegen_in);
3914   }
3915 
3916  private:
GetInvoke() const3917   HInvoke* GetInvoke() const {
3918     return GetInstruction()->AsInvoke();
3919   }
3920 
GetAccessModeTemplate() const3921   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3922     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3923   }
3924 
3925   void EmitByteArrayViewCode(CodeGenerator* codegen_in);
3926 
3927   vixl::aarch64::Label byte_array_view_check_label_;
3928   vixl::aarch64::Label native_byte_order_label_;
3929   // Shared parameter for all VarHandle intrinsics.
3930   std::memory_order order_;
3931   // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
3932   bool return_success_;
3933   bool strong_;
3934   // Extra argument for GenerateVarHandleGetAndUpdate().
3935   GetAndUpdateOp get_and_update_op_;
3936 };
3937 
3938 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path,Register object,Register type,bool object_can_be_null=true)3939 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARM64* codegen,
3940                                                     SlowPathCodeARM64* slow_path,
3941                                                     Register object,
3942                                                     Register type,
3943                                                     bool object_can_be_null = true) {
3944   MacroAssembler* masm = codegen->GetVIXLAssembler();
3945 
3946   const MemberOffset class_offset = mirror::Object::ClassOffset();
3947   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3948 
3949   vixl::aarch64::Label success;
3950   if (object_can_be_null) {
3951     __ Cbz(object, &success);
3952   }
3953 
3954   UseScratchRegisterScope temps(masm);
3955   Register temp = temps.AcquireW();
3956 
3957   __ Ldr(temp, HeapOperand(object, class_offset.Int32Value()));
3958   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3959   vixl::aarch64::Label loop;
3960   __ Bind(&loop);
3961   __ Cmp(type, temp);
3962   __ B(&success, eq);
3963   __ Ldr(temp, HeapOperand(temp, super_class_offset.Int32Value()));
3964   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3965   __ Cbz(temp, slow_path->GetEntryLabel());
3966   __ B(&loop);
3967   __ Bind(&success);
3968 }
3969 
3970 // Check access mode and the primitive type from VarHandle.varType.
3971 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3972 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path,DataType::Type type)3973 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3974                                                         CodeGeneratorARM64* codegen,
3975                                                         SlowPathCodeARM64* slow_path,
3976                                                         DataType::Type type) {
3977   mirror::VarHandle::AccessMode access_mode =
3978       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3979   Primitive::Type primitive_type = DataTypeToPrimitive(type);
3980 
3981   MacroAssembler* masm = codegen->GetVIXLAssembler();
3982   Register varhandle = InputRegisterAt(invoke, 0);
3983 
3984   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3985   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3986   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3987 
3988   UseScratchRegisterScope temps(masm);
3989   Register var_type_no_rb = temps.AcquireW();
3990   Register temp2 = temps.AcquireW();
3991 
3992   // Check that the operation is permitted and the primitive type of varhandle.varType.
3993   // We do not need a read barrier when loading a reference only for loading constant
3994   // primitive field through the reference. Use LDP to load the fields together.
3995   DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
3996   __ Ldp(var_type_no_rb, temp2, HeapOperand(varhandle, var_type_offset.Int32Value()));
3997   codegen->GetAssembler()->MaybeUnpoisonHeapReference(var_type_no_rb);
3998   __ Tbz(temp2, static_cast<uint32_t>(access_mode), slow_path->GetEntryLabel());
3999   __ Ldrh(temp2, HeapOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
4000   if (primitive_type == Primitive::kPrimNot) {
4001     static_assert(Primitive::kPrimNot == 0);
4002     __ Cbnz(temp2, slow_path->GetEntryLabel());
4003   } else {
4004     __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4005     __ B(slow_path->GetEntryLabel(), ne);
4006   }
4007 
4008   temps.Release(temp2);
4009 
4010   if (type == DataType::Type::kReference) {
4011     // Check reference arguments against the varType.
4012     // False negatives due to varType being an interface or array type
4013     // or due to the missing read barrier are handled by the slow path.
4014     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4015     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4016     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4017     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4018       HInstruction* arg = invoke->InputAt(arg_index);
4019       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
4020       if (!arg->IsNullConstant()) {
4021         Register arg_reg = WRegisterFrom(invoke->GetLocations()->InAt(arg_index));
4022         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
4023       }
4024     }
4025   }
4026 }
4027 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path)4028 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
4029                                               CodeGeneratorARM64* codegen,
4030                                               SlowPathCodeARM64* slow_path) {
4031   MacroAssembler* masm = codegen->GetVIXLAssembler();
4032   Register varhandle = InputRegisterAt(invoke, 0);
4033 
4034   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4035 
4036   UseScratchRegisterScope temps(masm);
4037   Register temp = temps.AcquireW();
4038 
4039   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
4040   // Do not emit read barrier (or unpoison the reference) for comparing to null.
4041   __ Ldr(temp, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4042   __ Cbnz(temp, slow_path->GetEntryLabel());
4043 }
4044 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path)4045 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
4046                                                  CodeGeneratorARM64* codegen,
4047                                                  SlowPathCodeARM64* slow_path) {
4048   MacroAssembler* masm = codegen->GetVIXLAssembler();
4049   Register varhandle = InputRegisterAt(invoke, 0);
4050   Register object = InputRegisterAt(invoke, 1);
4051 
4052   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4053   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4054 
4055   // Null-check the object.
4056   __ Cbz(object, slow_path->GetEntryLabel());
4057 
4058   UseScratchRegisterScope temps(masm);
4059   Register temp = temps.AcquireW();
4060   Register temp2 = temps.AcquireW();
4061 
4062   // Check that the VarHandle references an instance field by checking that
4063   // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
4064   // type compatibility check with the source object's type, which will fail for null.
4065   DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4066   __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4067   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4068   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4069   __ Cbnz(temp2, slow_path->GetEntryLabel());
4070 
4071   // Check that the object has the correct type.
4072   // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
4073   temps.Release(temp2);  // Needed by GenerateSubTypeObjectCheckNoReadBarrier().
4074   GenerateSubTypeObjectCheckNoReadBarrier(
4075       codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
4076 }
4077 
GetVarHandleExpectedValueType(HInvoke * invoke,size_t expected_coordinates_count)4078 static DataType::Type GetVarHandleExpectedValueType(HInvoke* invoke,
4079                                                     size_t expected_coordinates_count) {
4080   DCHECK_EQ(expected_coordinates_count, GetExpectedVarHandleCoordinatesCount(invoke));
4081   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4082   DCHECK_GE(number_of_arguments, /* VarHandle object */ 1u + expected_coordinates_count);
4083   if (number_of_arguments == /* VarHandle object */ 1u + expected_coordinates_count) {
4084     return invoke->GetType();
4085   } else {
4086     return GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4087   }
4088 }
4089 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,VarHandleSlowPathARM64 * slow_path)4090 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
4091                                          CodeGeneratorARM64* codegen,
4092                                          VarHandleSlowPathARM64* slow_path) {
4093   MacroAssembler* masm = codegen->GetVIXLAssembler();
4094   Register varhandle = InputRegisterAt(invoke, 0);
4095   Register object = InputRegisterAt(invoke, 1);
4096   Register index = InputRegisterAt(invoke, 2);
4097   DataType::Type value_type =
4098       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4099   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
4100 
4101   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4102   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4103   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
4104   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4105   const MemberOffset class_offset = mirror::Object::ClassOffset();
4106   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
4107 
4108   // Null-check the object.
4109   __ Cbz(object, slow_path->GetEntryLabel());
4110 
4111   UseScratchRegisterScope temps(masm);
4112   Register temp = temps.AcquireW();
4113   Register temp2 = temps.AcquireW();
4114 
4115   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
4116   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
4117   // coordinateType0 shall not be null but we do not explicitly verify that.
4118   DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4119   __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4120   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4121   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4122   __ Cbz(temp2, slow_path->GetEntryLabel());
4123 
4124   // Check object class against componentType0.
4125   //
4126   // This is an exact check and we defer other cases to the runtime. This includes
4127   // conversion to array of superclass references, which is valid but subsequently
4128   // requires all update operations to check that the value can indeed be stored.
4129   // We do not want to perform such extra checks in the intrinsified code.
4130   //
4131   // We do this check without read barrier, so there can be false negatives which we
4132   // defer to the slow path. There shall be no false negatives for array classes in the
4133   // boot image (including Object[] and primitive arrays) because they are non-movable.
4134   __ Ldr(temp2, HeapOperand(object, class_offset.Int32Value()));
4135   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4136   __ Cmp(temp, temp2);
4137   __ B(slow_path->GetEntryLabel(), ne);
4138 
4139   // Check that the coordinateType0 is an array type. We do not need a read barrier
4140   // for loading constant reference fields (or chains of them) for comparison with null,
4141   // nor for finally loading a constant primitive field (primitive type) below.
4142   __ Ldr(temp2, HeapOperand(temp, component_type_offset.Int32Value()));
4143   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4144   __ Cbz(temp2, slow_path->GetEntryLabel());
4145 
4146   // Check that the array component type matches the primitive type.
4147   __ Ldrh(temp2, HeapOperand(temp2, primitive_type_offset.Int32Value()));
4148   if (primitive_type == Primitive::kPrimNot) {
4149     static_assert(Primitive::kPrimNot == 0);
4150     __ Cbnz(temp2, slow_path->GetEntryLabel());
4151   } else {
4152     // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
4153     // we shall check for a byte array view in the slow path.
4154     // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4155     // so we cannot emit that if we're JITting without boot image.
4156     bool boot_image_available =
4157         codegen->GetCompilerOptions().IsBootImage() ||
4158         !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4159     DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
4160     size_t can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
4161     vixl::aarch64::Label* slow_path_label =
4162         can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4163     __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4164     __ B(slow_path_label, ne);
4165   }
4166 
4167   // Check for array index out of bounds.
4168   __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
4169   __ Cmp(index, temp);
4170   __ B(slow_path->GetEntryLabel(), hs);
4171 }
4172 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,VarHandleSlowPathARM64 * slow_path)4173 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4174                                               CodeGeneratorARM64* codegen,
4175                                               VarHandleSlowPathARM64* slow_path) {
4176   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4177   if (expected_coordinates_count == 0u) {
4178     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4179   } else if (expected_coordinates_count == 1u) {
4180     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4181   } else {
4182     DCHECK_EQ(expected_coordinates_count, 2u);
4183     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4184   }
4185 }
4186 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,DataType::Type type)4187 static VarHandleSlowPathARM64* GenerateVarHandleChecks(HInvoke* invoke,
4188                                                        CodeGeneratorARM64* codegen,
4189                                                        std::memory_order order,
4190                                                        DataType::Type type) {
4191   VarHandleSlowPathARM64* slow_path =
4192       new (codegen->GetScopedAllocator()) VarHandleSlowPathARM64(invoke, order);
4193   codegen->AddSlowPath(slow_path);
4194 
4195   GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4196   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4197 
4198   return slow_path;
4199 }
4200 
4201 struct VarHandleTarget {
4202   Register object;  // The object holding the value to operate on.
4203   Register offset;  // The offset of the value to operate on.
4204 };
4205 
GetVarHandleTarget(HInvoke * invoke)4206 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4207   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4208   LocationSummary* locations = invoke->GetLocations();
4209 
4210   VarHandleTarget target;
4211   // The temporary allocated for loading the offset.
4212   target.offset = WRegisterFrom(locations->GetTemp(0u));
4213   // The reference to the object that holds the value to operate on.
4214   target.object = (expected_coordinates_count == 0u)
4215       ? WRegisterFrom(locations->GetTemp(1u))
4216       : InputRegisterAt(invoke, 1);
4217   return target;
4218 }
4219 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorARM64 * codegen)4220 static void GenerateVarHandleTarget(HInvoke* invoke,
4221                                     const VarHandleTarget& target,
4222                                     CodeGeneratorARM64* codegen) {
4223   MacroAssembler* masm = codegen->GetVIXLAssembler();
4224   Register varhandle = InputRegisterAt(invoke, 0);
4225   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4226 
4227   if (expected_coordinates_count <= 1u) {
4228     // For static fields, we need to fill the `target.object` with the declaring class,
4229     // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
4230     // we do not need the declaring class, so we can forget the `ArtMethod*` when
4231     // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
4232     Register method = (expected_coordinates_count == 0) ? target.object : target.offset;
4233 
4234     const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4235     const MemberOffset offset_offset = ArtField::OffsetOffset();
4236 
4237     // Load the ArtField, the offset and, if needed, declaring class.
4238     __ Ldr(method.X(), HeapOperand(varhandle, art_field_offset.Int32Value()));
4239     __ Ldr(target.offset, MemOperand(method.X(), offset_offset.Int32Value()));
4240     if (expected_coordinates_count == 0u) {
4241       codegen->GenerateGcRootFieldLoad(invoke,
4242                                        LocationFrom(target.object),
4243                                        method.X(),
4244                                        ArtField::DeclaringClassOffset().Int32Value(),
4245                                        /*fixup_label=*/ nullptr,
4246                                        kCompilerReadBarrierOption);
4247     }
4248   } else {
4249     DCHECK_EQ(expected_coordinates_count, 2u);
4250     DataType::Type value_type =
4251         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4252     size_t size_shift = DataType::SizeShift(value_type);
4253     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4254 
4255     Register index = InputRegisterAt(invoke, 2);
4256     Register shifted_index = index;
4257     if (size_shift != 0u) {
4258       shifted_index = target.offset;
4259       __ Lsl(shifted_index, index, size_shift);
4260     }
4261     __ Add(target.offset, shifted_index, data_offset.Int32Value());
4262   }
4263 }
4264 
HasVarHandleIntrinsicImplementation(HInvoke * invoke)4265 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
4266   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4267   if (expected_coordinates_count > 2u) {
4268     // Invalid coordinate count. This invoke shall throw at runtime.
4269     return false;
4270   }
4271   if (expected_coordinates_count != 0u &&
4272       invoke->InputAt(1)->GetType() != DataType::Type::kReference) {
4273     // Except for static fields (no coordinates), the first coordinate must be a reference.
4274     return false;
4275   }
4276   if (expected_coordinates_count == 2u) {
4277     // For arrays and views, the second coordinate must be convertible to `int`.
4278     // In this context, `boolean` is not convertible but we have to look at the shorty
4279     // as compiler transformations can give the invoke a valid boolean input.
4280     DataType::Type index_type = GetDataTypeFromShorty(invoke, 2);
4281     if (index_type == DataType::Type::kBool ||
4282         DataType::Kind(index_type) != DataType::Type::kInt32) {
4283       return false;
4284     }
4285   }
4286 
4287   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4288   DataType::Type return_type = invoke->GetType();
4289   mirror::VarHandle::AccessModeTemplate access_mode_template =
4290       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4291   switch (access_mode_template) {
4292     case mirror::VarHandle::AccessModeTemplate::kGet:
4293       // The return type should be the same as varType, so it shouldn't be void.
4294       if (return_type == DataType::Type::kVoid) {
4295         return false;
4296       }
4297       break;
4298     case mirror::VarHandle::AccessModeTemplate::kSet:
4299       if (return_type != DataType::Type::kVoid) {
4300         return false;
4301       }
4302       break;
4303     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet: {
4304       if (return_type != DataType::Type::kBool) {
4305         return false;
4306       }
4307       uint32_t expected_value_index = number_of_arguments - 2;
4308       uint32_t new_value_index = number_of_arguments - 1;
4309       DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4310       DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
4311       if (expected_value_type != new_value_type) {
4312         return false;
4313       }
4314       break;
4315     }
4316     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
4317       uint32_t expected_value_index = number_of_arguments - 2;
4318       uint32_t new_value_index = number_of_arguments - 1;
4319       DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4320       DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
4321       if (expected_value_type != new_value_type || return_type != expected_value_type) {
4322         return false;
4323       }
4324       break;
4325     }
4326     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
4327       DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
4328       if (IsVarHandleGetAndAdd(invoke) &&
4329           (value_type == DataType::Type::kReference || value_type == DataType::Type::kBool)) {
4330         // We should only add numerical types.
4331         return false;
4332       } else if (IsVarHandleGetAndBitwiseOp(invoke) && !DataType::IsIntegralType(value_type)) {
4333         // We can only apply operators to bitwise integral types.
4334         // Note that bitwise VarHandle operations accept a non-integral boolean type and
4335         // perform the appropriate logical operation. However, the result is the same as
4336         // using the bitwise operation on our boolean representation and this fits well
4337         // with DataType::IsIntegralType() treating the compiler type kBool as integral.
4338         return false;
4339       }
4340       if (value_type != return_type) {
4341         return false;
4342       }
4343       break;
4344     }
4345   }
4346 
4347   return true;
4348 }
4349 
CreateVarHandleCommonLocations(HInvoke * invoke)4350 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
4351   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4352   DataType::Type return_type = invoke->GetType();
4353 
4354   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4355   LocationSummary* locations =
4356       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4357   locations->SetInAt(0, Location::RequiresRegister());
4358   // Require coordinates in registers. These are the object holding the value
4359   // to operate on (except for static fields) and index (for arrays and views).
4360   for (size_t i = 0; i != expected_coordinates_count; ++i) {
4361     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4362   }
4363   if (return_type != DataType::Type::kVoid) {
4364     if (DataType::IsFloatingPointType(return_type)) {
4365       locations->SetOut(Location::RequiresFpuRegister());
4366     } else {
4367       locations->SetOut(Location::RequiresRegister());
4368     }
4369   }
4370   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4371   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4372   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4373     HInstruction* arg = invoke->InputAt(arg_index);
4374     if (IsConstantZeroBitPattern(arg)) {
4375       locations->SetInAt(arg_index, Location::ConstantLocation(arg->AsConstant()));
4376     } else if (DataType::IsFloatingPointType(arg->GetType())) {
4377       locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4378     } else {
4379       locations->SetInAt(arg_index, Location::RequiresRegister());
4380     }
4381   }
4382 
4383   // Add a temporary for offset.
4384   if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4385       GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
4386     // To preserve the offset value across the non-Baker read barrier slow path
4387     // for loading the declaring class, use a fixed callee-save register.
4388     constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
4389     locations->AddTemp(Location::RegisterLocation(first_callee_save));
4390   } else {
4391     locations->AddTemp(Location::RequiresRegister());
4392   }
4393   if (expected_coordinates_count == 0u) {
4394     // Add a temporary to hold the declaring class.
4395     locations->AddTemp(Location::RequiresRegister());
4396   }
4397 
4398   return locations;
4399 }
4400 
CreateVarHandleGetLocations(HInvoke * invoke)4401 static void CreateVarHandleGetLocations(HInvoke* invoke) {
4402   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4403     return;
4404   }
4405 
4406   if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4407       invoke->GetType() == DataType::Type::kReference &&
4408       invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4409       invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4410     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4411     // the passed reference and reloads it from the field. This gets the memory visibility
4412     // wrong for Acquire/Volatile operations. b/173104084
4413     return;
4414   }
4415 
4416   CreateVarHandleCommonLocations(invoke);
4417 }
4418 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool byte_swap=false)4419 static void GenerateVarHandleGet(HInvoke* invoke,
4420                                  CodeGeneratorARM64* codegen,
4421                                  std::memory_order order,
4422                                  bool byte_swap = false) {
4423   DataType::Type type = invoke->GetType();
4424   DCHECK_NE(type, DataType::Type::kVoid);
4425 
4426   LocationSummary* locations = invoke->GetLocations();
4427   MacroAssembler* masm = codegen->GetVIXLAssembler();
4428   CPURegister out = helpers::OutputCPURegister(invoke);
4429 
4430   VarHandleTarget target = GetVarHandleTarget(invoke);
4431   VarHandleSlowPathARM64* slow_path = nullptr;
4432   if (!byte_swap) {
4433     slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4434     GenerateVarHandleTarget(invoke, target, codegen);
4435     __ Bind(slow_path->GetNativeByteOrderLabel());
4436   }
4437 
4438   // ARM64 load-acquire instructions are implicitly sequentially consistent.
4439   bool use_load_acquire =
4440       (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
4441   DCHECK(use_load_acquire || order == std::memory_order_relaxed);
4442 
4443   // Load the value from the target location.
4444   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4445     // Piggy-back on the field load path using introspection for the Baker read barrier.
4446     // The `target.offset` is a temporary, use it for field address.
4447     Register tmp_ptr = target.offset.X();
4448     __ Add(tmp_ptr, target.object.X(), target.offset.X());
4449     codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
4450                                                    locations->Out(),
4451                                                    target.object,
4452                                                    MemOperand(tmp_ptr),
4453                                                    /*needs_null_check=*/ false,
4454                                                    use_load_acquire);
4455     DCHECK(!byte_swap);
4456   } else {
4457     MemOperand address(target.object.X(), target.offset.X());
4458     CPURegister load_reg = out;
4459     DataType::Type load_type = type;
4460     UseScratchRegisterScope temps(masm);
4461     if (byte_swap) {
4462       if (type == DataType::Type::kInt16) {
4463         // Avoid unnecessary sign extension before REV16.
4464         load_type = DataType::Type::kUint16;
4465       } else if (type == DataType::Type::kFloat32) {
4466         load_type = DataType::Type::kInt32;
4467         load_reg = target.offset.W();
4468       } else if (type == DataType::Type::kFloat64) {
4469         load_type = DataType::Type::kInt64;
4470         load_reg = target.offset.X();
4471       }
4472     }
4473     if (use_load_acquire) {
4474       codegen->LoadAcquire(invoke, load_type, load_reg, address, /*needs_null_check=*/ false);
4475     } else {
4476       codegen->Load(load_type, load_reg, address);
4477     }
4478     if (type == DataType::Type::kReference) {
4479       DCHECK(!byte_swap);
4480       DCHECK(out.IsW());
4481       Location out_loc = locations->Out();
4482       Location object_loc = LocationFrom(target.object);
4483       Location offset_loc = LocationFrom(target.offset);
4484       codegen->MaybeGenerateReadBarrierSlow(invoke, out_loc, out_loc, object_loc, 0u, offset_loc);
4485     } else if (byte_swap) {
4486       GenerateReverseBytes(masm, type, load_reg, out);
4487     }
4488   }
4489 
4490   if (!byte_swap) {
4491     __ Bind(slow_path->GetExitLabel());
4492   }
4493 }
4494 
VisitVarHandleGet(HInvoke * invoke)4495 void IntrinsicLocationsBuilderARM64::VisitVarHandleGet(HInvoke* invoke) {
4496   CreateVarHandleGetLocations(invoke);
4497 }
4498 
VisitVarHandleGet(HInvoke * invoke)4499 void IntrinsicCodeGeneratorARM64::VisitVarHandleGet(HInvoke* invoke) {
4500   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
4501 }
4502 
VisitVarHandleGetOpaque(HInvoke * invoke)4503 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4504   CreateVarHandleGetLocations(invoke);
4505 }
4506 
VisitVarHandleGetOpaque(HInvoke * invoke)4507 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4508   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
4509 }
4510 
VisitVarHandleGetAcquire(HInvoke * invoke)4511 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4512   CreateVarHandleGetLocations(invoke);
4513 }
4514 
VisitVarHandleGetAcquire(HInvoke * invoke)4515 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4516   GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire);
4517 }
4518 
VisitVarHandleGetVolatile(HInvoke * invoke)4519 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4520   CreateVarHandleGetLocations(invoke);
4521 }
4522 
VisitVarHandleGetVolatile(HInvoke * invoke)4523 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4524   GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst);
4525 }
4526 
CreateVarHandleSetLocations(HInvoke * invoke)4527 static void CreateVarHandleSetLocations(HInvoke* invoke) {
4528   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4529     return;
4530   }
4531 
4532   CreateVarHandleCommonLocations(invoke);
4533 }
4534 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool byte_swap=false)4535 static void GenerateVarHandleSet(HInvoke* invoke,
4536                                  CodeGeneratorARM64* codegen,
4537                                  std::memory_order order,
4538                                  bool byte_swap = false) {
4539   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4540   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4541 
4542   MacroAssembler* masm = codegen->GetVIXLAssembler();
4543   CPURegister value = InputCPURegisterOrZeroRegAt(invoke, value_index);
4544 
4545   VarHandleTarget target = GetVarHandleTarget(invoke);
4546   VarHandleSlowPathARM64* slow_path = nullptr;
4547   if (!byte_swap) {
4548     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4549     GenerateVarHandleTarget(invoke, target, codegen);
4550     __ Bind(slow_path->GetNativeByteOrderLabel());
4551   }
4552 
4553   // ARM64 store-release instructions are implicitly sequentially consistent.
4554   bool use_store_release =
4555       (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
4556   DCHECK(use_store_release || order == std::memory_order_relaxed);
4557 
4558   // Store the value to the target location.
4559   {
4560     CPURegister source = value;
4561     UseScratchRegisterScope temps(masm);
4562     if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
4563       DCHECK(value.IsW());
4564       Register temp = temps.AcquireW();
4565       __ Mov(temp, value.W());
4566       codegen->GetAssembler()->PoisonHeapReference(temp);
4567       source = temp;
4568     }
4569     if (byte_swap) {
4570       DCHECK(!source.IsZero());  // We use the main path for zero as it does not need a byte swap.
4571       Register temp = source.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
4572       if (value_type == DataType::Type::kInt16) {
4573         // Avoid unnecessary sign extension before storing.
4574         value_type = DataType::Type::kUint16;
4575       } else if (DataType::IsFloatingPointType(value_type)) {
4576         __ Fmov(temp, source.Is64Bits() ? source.D() : source.S());
4577         value_type = source.Is64Bits() ? DataType::Type::kInt64 : DataType::Type::kInt32;
4578         source = temp;  // Source for the `GenerateReverseBytes()` below.
4579       }
4580       GenerateReverseBytes(masm, value_type, source, temp);
4581       source = temp;
4582     }
4583     MemOperand address(target.object.X(), target.offset.X());
4584     if (use_store_release) {
4585       codegen->StoreRelease(invoke, value_type, source, address, /*needs_null_check=*/ false);
4586     } else {
4587       codegen->Store(value_type, source, address);
4588     }
4589   }
4590 
4591   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4592     codegen->MarkGCCard(target.object, Register(value), /*value_can_be_null=*/ true);
4593   }
4594 
4595   if (!byte_swap) {
4596     __ Bind(slow_path->GetExitLabel());
4597   }
4598 }
4599 
VisitVarHandleSet(HInvoke * invoke)4600 void IntrinsicLocationsBuilderARM64::VisitVarHandleSet(HInvoke* invoke) {
4601   CreateVarHandleSetLocations(invoke);
4602 }
4603 
VisitVarHandleSet(HInvoke * invoke)4604 void IntrinsicCodeGeneratorARM64::VisitVarHandleSet(HInvoke* invoke) {
4605   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
4606 }
4607 
VisitVarHandleSetOpaque(HInvoke * invoke)4608 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4609   CreateVarHandleSetLocations(invoke);
4610 }
4611 
VisitVarHandleSetOpaque(HInvoke * invoke)4612 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4613   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
4614 }
4615 
VisitVarHandleSetRelease(HInvoke * invoke)4616 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
4617   CreateVarHandleSetLocations(invoke);
4618 }
4619 
VisitVarHandleSetRelease(HInvoke * invoke)4620 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
4621   GenerateVarHandleSet(invoke, codegen_, std::memory_order_release);
4622 }
4623 
VisitVarHandleSetVolatile(HInvoke * invoke)4624 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4625   CreateVarHandleSetLocations(invoke);
4626 }
4627 
VisitVarHandleSetVolatile(HInvoke * invoke)4628 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4629   GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst);
4630 }
4631 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,bool return_success)4632 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) {
4633   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4634     return;
4635   }
4636 
4637   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4638   DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4639   if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4640       value_type == DataType::Type::kReference) {
4641     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4642     // the passed reference and reloads it from the field. This breaks the read barriers
4643     // in slow path in different ways. The marked old value may not actually be a to-space
4644     // reference to the same object as `old_value`, breaking slow path assumptions. And
4645     // for CompareAndExchange, marking the old value after comparison failure may actually
4646     // return the reference to `expected`, erroneously indicating success even though we
4647     // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
4648     return;
4649   }
4650 
4651   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4652 
4653   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4654     // We need callee-save registers for both the class object and offset instead of
4655     // the temporaries reserved in CreateVarHandleCommonLocations().
4656     static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u);
4657     uint32_t first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
4658     uint32_t second_callee_save = CTZ(kArm64CalleeSaveRefSpills ^ (1u << first_callee_save));
4659     if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
4660       DCHECK_EQ(locations->GetTempCount(), 2u);
4661       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4662       DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4663       locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4664     } else {
4665       DCHECK_EQ(locations->GetTempCount(), 1u);
4666       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4667       locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4668     }
4669   }
4670   size_t old_temp_count = locations->GetTempCount();
4671   DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4672   if (!return_success) {
4673     if (DataType::IsFloatingPointType(value_type)) {
4674       // Add a temporary for old value and exclusive store result if floating point
4675       // `expected` and/or `new_value` take scratch registers.
4676       size_t available_scratch_registers =
4677           (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) +
4678           (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u);
4679       size_t temps_needed = /* pointer, old value, store result */ 3u - available_scratch_registers;
4680       // We can reuse the declaring class (if present) and offset temporary.
4681       if (temps_needed > old_temp_count) {
4682         locations->AddRegisterTemps(temps_needed - old_temp_count);
4683       }
4684     } else if ((value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) &&
4685                !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) &&
4686                !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) &&
4687                GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4688       // Allocate a normal temporary for store result in the non-native byte order path
4689       // because scratch registers are used by the byte-swapped `expected` and `new_value`.
4690       DCHECK_EQ(old_temp_count, 1u);
4691       locations->AddTemp(Location::RequiresRegister());
4692     }
4693   }
4694   if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
4695     // Add a temporary for the `old_value_temp` in slow path.
4696     locations->AddTemp(Location::RequiresRegister());
4697   }
4698 }
4699 
MoveToTempIfFpRegister(const CPURegister & cpu_reg,DataType::Type type,MacroAssembler * masm,UseScratchRegisterScope * temps)4700 static Register MoveToTempIfFpRegister(const CPURegister& cpu_reg,
4701                                        DataType::Type type,
4702                                        MacroAssembler* masm,
4703                                        UseScratchRegisterScope* temps) {
4704   if (cpu_reg.IsS()) {
4705     DCHECK_EQ(type, DataType::Type::kFloat32);
4706     Register reg = temps->AcquireW();
4707     __ Fmov(reg, cpu_reg.S());
4708     return reg;
4709   } else if (cpu_reg.IsD()) {
4710     DCHECK_EQ(type, DataType::Type::kFloat64);
4711     Register reg = temps->AcquireX();
4712     __ Fmov(reg, cpu_reg.D());
4713     return reg;
4714   } else {
4715     return DataType::Is64BitType(type) ? cpu_reg.X() : cpu_reg.W();
4716   }
4717 }
4718 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)4719 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4720                                                      CodeGeneratorARM64* codegen,
4721                                                      std::memory_order order,
4722                                                      bool return_success,
4723                                                      bool strong,
4724                                                      bool byte_swap = false) {
4725   DCHECK(return_success || strong);
4726 
4727   uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4728   uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4729   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4730   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4731 
4732   MacroAssembler* masm = codegen->GetVIXLAssembler();
4733   LocationSummary* locations = invoke->GetLocations();
4734   CPURegister expected = InputCPURegisterOrZeroRegAt(invoke, expected_index);
4735   CPURegister new_value = InputCPURegisterOrZeroRegAt(invoke, new_value_index);
4736   CPURegister out = helpers::OutputCPURegister(invoke);
4737 
4738   VarHandleTarget target = GetVarHandleTarget(invoke);
4739   VarHandleSlowPathARM64* slow_path = nullptr;
4740   if (!byte_swap) {
4741     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4742     slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
4743     GenerateVarHandleTarget(invoke, target, codegen);
4744     __ Bind(slow_path->GetNativeByteOrderLabel());
4745   }
4746 
4747   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
4748   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
4749     // Mark card for object assuming new value is stored.
4750     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
4751     codegen->MarkGCCard(target.object, new_value.W(), new_value_can_be_null);
4752   }
4753 
4754   // Reuse the `offset` temporary for the pointer to the target location,
4755   // except for references that need the offset for the read barrier.
4756   UseScratchRegisterScope temps(masm);
4757   Register tmp_ptr = target.offset.X();
4758   if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
4759     tmp_ptr = temps.AcquireX();
4760   }
4761   __ Add(tmp_ptr, target.object.X(), target.offset.X());
4762 
4763   // Move floating point values to scratch registers.
4764   // Note that float/double CAS uses bitwise comparison, rather than the operator==.
4765   Register expected_reg = MoveToTempIfFpRegister(expected, value_type, masm, &temps);
4766   Register new_value_reg = MoveToTempIfFpRegister(new_value, value_type, masm, &temps);
4767   bool is_fp = DataType::IsFloatingPointType(value_type);
4768   DataType::Type cas_type = is_fp
4769       ? ((value_type == DataType::Type::kFloat64) ? DataType::Type::kInt64 : DataType::Type::kInt32)
4770       : value_type;
4771   // Avoid sign extension in the CAS loop by zero-extending `expected` before the loop. This adds
4772   // one instruction for CompareAndExchange as we shall need to sign-extend the returned value.
4773   if (value_type == DataType::Type::kInt16 && !expected.IsZero()) {
4774     Register temp = temps.AcquireW();
4775     __ Uxth(temp, expected_reg);
4776     expected_reg = temp;
4777     cas_type = DataType::Type::kUint16;
4778   } else if (value_type == DataType::Type::kInt8 && !expected.IsZero()) {
4779     Register temp = temps.AcquireW();
4780     __ Uxtb(temp, expected_reg);
4781     expected_reg = temp;
4782     cas_type = DataType::Type::kUint8;
4783   }
4784 
4785   if (byte_swap) {
4786     // Do the byte swap and move values to scratch registers if needed.
4787     // Non-zero FP values and non-zero `expected` for `kInt16` are already in scratch registers.
4788     DCHECK_NE(value_type, DataType::Type::kInt8);
4789     if (!expected.IsZero()) {
4790       bool is_scratch = is_fp || (value_type == DataType::Type::kInt16);
4791       Register temp = is_scratch ? expected_reg : temps.AcquireSameSizeAs(expected_reg);
4792       GenerateReverseBytes(masm, cas_type, expected_reg, temp);
4793       expected_reg = temp;
4794     }
4795     if (!new_value.IsZero()) {
4796       Register temp = is_fp ? new_value_reg : temps.AcquireSameSizeAs(new_value_reg);
4797       GenerateReverseBytes(masm, cas_type, new_value_reg, temp);
4798       new_value_reg = temp;
4799     }
4800   }
4801 
4802   // Prepare registers for old value and the result of the exclusive store.
4803   Register old_value;
4804   Register store_result;
4805   if (return_success) {
4806     // Use the output register for both old value and exclusive store result.
4807     old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
4808     store_result = out.W();
4809   } else if (DataType::IsFloatingPointType(value_type)) {
4810     // We need two temporary registers but we have already used scratch registers for
4811     // holding the expected and new value unless they are zero bit pattern (+0.0f or
4812     // +0.0). We have allocated sufficient normal temporaries to handle that.
4813     size_t next_temp = 1u;
4814     if (expected.IsZero()) {
4815       old_value = (cas_type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
4816     } else {
4817       Location temp = locations->GetTemp(next_temp);
4818       ++next_temp;
4819       old_value = (cas_type == DataType::Type::kInt64) ? XRegisterFrom(temp) : WRegisterFrom(temp);
4820     }
4821     store_result =
4822         new_value.IsZero() ? temps.AcquireW() : WRegisterFrom(locations->GetTemp(next_temp));
4823     DCHECK(!old_value.Is(tmp_ptr));
4824     DCHECK(!store_result.Is(tmp_ptr));
4825   } else {
4826     // Use the output register for the old value.
4827     old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
4828     // Use scratch register for the store result, except when we have used up
4829     // scratch registers for byte-swapped `expected` and `new_value`.
4830     // In that case, we have allocated a normal temporary.
4831     store_result = (byte_swap && !expected.IsZero() && !new_value.IsZero())
4832         ? WRegisterFrom(locations->GetTemp(1))
4833         : temps.AcquireW();
4834     DCHECK(!store_result.Is(tmp_ptr));
4835   }
4836 
4837   vixl::aarch64::Label exit_loop_label;
4838   vixl::aarch64::Label* exit_loop = &exit_loop_label;
4839   vixl::aarch64::Label* cmp_failure = &exit_loop_label;
4840 
4841   if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
4842     // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
4843     // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
4844     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4845     Register old_value_temp =
4846         WRegisterFrom(locations->GetTemp((expected_coordinates_count == 0u) ? 2u : 1u));
4847     // For strong CAS, use a scratch register for the store result in slow path.
4848     // For weak CAS, we need to check the store result, so store it in `store_result`.
4849     Register slow_path_store_result = strong ? Register() : store_result;
4850     ReadBarrierCasSlowPathARM64* rb_slow_path =
4851         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
4852             invoke,
4853             order,
4854             strong,
4855             target.object,
4856             target.offset.X(),
4857             expected_reg,
4858             new_value_reg,
4859             old_value,
4860             old_value_temp,
4861             slow_path_store_result,
4862             /*update_old_value=*/ !return_success,
4863             codegen);
4864     codegen->AddSlowPath(rb_slow_path);
4865     exit_loop = rb_slow_path->GetExitLabel();
4866     cmp_failure = rb_slow_path->GetEntryLabel();
4867   }
4868 
4869   GenerateCompareAndSet(codegen,
4870                         cas_type,
4871                         order,
4872                         strong,
4873                         cmp_failure,
4874                         tmp_ptr,
4875                         new_value_reg,
4876                         old_value,
4877                         store_result,
4878                         expected_reg);
4879   __ Bind(exit_loop);
4880 
4881   if (return_success) {
4882     if (strong) {
4883       __ Cset(out.W(), eq);
4884     } else {
4885       // On success, the Z flag is set and the store result is 1, see GenerateCompareAndSet().
4886       // On failure, either the Z flag is clear or the store result is 0.
4887       // Determine the final success value with a CSEL.
4888       __ Csel(out.W(), store_result, wzr, eq);
4889     }
4890   } else if (byte_swap) {
4891     // Also handles moving to FP registers.
4892     GenerateReverseBytes(masm, value_type, old_value, out);
4893   } else if (DataType::IsFloatingPointType(value_type)) {
4894     __ Fmov((value_type == DataType::Type::kFloat64) ? out.D() : out.S(), old_value);
4895   } else if (value_type == DataType::Type::kInt8) {
4896     __ Sxtb(out.W(), old_value);
4897   } else if (value_type == DataType::Type::kInt16) {
4898     __ Sxth(out.W(), old_value);
4899   }
4900 
4901   if (!byte_swap) {
4902     __ Bind(slow_path->GetExitLabel());
4903   }
4904 }
4905 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4906 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4907   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4908 }
4909 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4910 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4911   GenerateVarHandleCompareAndSetOrExchange(
4912       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
4913 }
4914 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4915 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4916   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4917 }
4918 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4919 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4920   GenerateVarHandleCompareAndSetOrExchange(
4921       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
4922 }
4923 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4924 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4925   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4926 }
4927 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4928 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4929   GenerateVarHandleCompareAndSetOrExchange(
4930       invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
4931 }
4932 
VisitVarHandleCompareAndSet(HInvoke * invoke)4933 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4934   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4935 }
4936 
VisitVarHandleCompareAndSet(HInvoke * invoke)4937 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4938   GenerateVarHandleCompareAndSetOrExchange(
4939       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
4940 }
4941 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4942 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4943   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4944 }
4945 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4946 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4947   GenerateVarHandleCompareAndSetOrExchange(
4948       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
4949 }
4950 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4951 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4952   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4953 }
4954 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4955 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4956   GenerateVarHandleCompareAndSetOrExchange(
4957       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
4958 }
4959 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4960 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4961   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4962 }
4963 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4964 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4965   GenerateVarHandleCompareAndSetOrExchange(
4966       invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
4967 }
4968 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4969 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4970   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4971 }
4972 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4973 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4974   GenerateVarHandleCompareAndSetOrExchange(
4975       invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
4976 }
4977 
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,GetAndUpdateOp get_and_update_op)4978 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
4979                                                  GetAndUpdateOp get_and_update_op) {
4980   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4981     return;
4982   }
4983 
4984   if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4985       invoke->GetType() == DataType::Type::kReference) {
4986     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4987     // the passed reference and reloads it from the field, thus seeing the new value
4988     // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
4989     return;
4990   }
4991 
4992   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4993 
4994   size_t old_temp_count = locations->GetTempCount();
4995   DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4996   if (DataType::IsFloatingPointType(invoke->GetType())) {
4997     if (get_and_update_op == GetAndUpdateOp::kAdd) {
4998       // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
4999       locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister());
5000     } else {
5001       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
5002       // We can reuse the declaring class temporary if present.
5003       if (old_temp_count == 1u &&
5004           !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5005         // Add a temporary for `old_value` if floating point `new_value` takes a scratch register.
5006         locations->AddTemp(Location::RequiresRegister());
5007       }
5008     }
5009   }
5010   // We need a temporary for the byte-swap path for bitwise operations unless the argument is a
5011   // zero which does not need a byte-swap. We can reuse the declaring class temporary if present.
5012   if (old_temp_count == 1u &&
5013       (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) &&
5014       GetExpectedVarHandleCoordinatesCount(invoke) == 2u &&
5015       !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5016     DataType::Type value_type =
5017         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5018     if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) {
5019       locations->AddTemp(Location::RequiresRegister());
5020     }
5021   }
5022 }
5023 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)5024 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5025                                           CodeGeneratorARM64* codegen,
5026                                           GetAndUpdateOp get_and_update_op,
5027                                           std::memory_order order,
5028                                           bool byte_swap = false) {
5029   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5030   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5031 
5032   MacroAssembler* masm = codegen->GetVIXLAssembler();
5033   LocationSummary* locations = invoke->GetLocations();
5034   CPURegister arg = InputCPURegisterOrZeroRegAt(invoke, arg_index);
5035   CPURegister out = helpers::OutputCPURegister(invoke);
5036 
5037   VarHandleTarget target = GetVarHandleTarget(invoke);
5038   VarHandleSlowPathARM64* slow_path = nullptr;
5039   if (!byte_swap) {
5040     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5041     slow_path->SetGetAndUpdateOp(get_and_update_op);
5042     GenerateVarHandleTarget(invoke, target, codegen);
5043     __ Bind(slow_path->GetNativeByteOrderLabel());
5044   }
5045 
5046   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
5047   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5048     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
5049     // Mark card for object, the new value shall be stored.
5050     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
5051     codegen->MarkGCCard(target.object, arg.W(), new_value_can_be_null);
5052   }
5053 
5054   // Reuse the `target.offset` temporary for the pointer to the target location,
5055   // except for references that need the offset for the non-Baker read barrier.
5056   UseScratchRegisterScope temps(masm);
5057   Register tmp_ptr = target.offset.X();
5058   if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
5059       value_type == DataType::Type::kReference) {
5060     tmp_ptr = temps.AcquireX();
5061   }
5062   __ Add(tmp_ptr, target.object.X(), target.offset.X());
5063 
5064   // The load/store type is never floating point.
5065   bool is_fp = DataType::IsFloatingPointType(value_type);
5066   DataType::Type load_store_type = is_fp
5067       ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
5068       : value_type;
5069   // Avoid sign extension in the CAS loop. Sign-extend after the loop.
5070   // Note: Using unsigned values yields the same value to store (we do not store higher bits).
5071   if (value_type == DataType::Type::kInt8) {
5072     load_store_type = DataType::Type::kUint8;
5073   } else if (value_type == DataType::Type::kInt16) {
5074     load_store_type = DataType::Type::kUint16;
5075   }
5076 
5077   // Prepare register for old value.
5078   CPURegister old_value = out;
5079   if (get_and_update_op == GetAndUpdateOp::kSet) {
5080     // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5081     // rather than moving between core and FP registers in the loop.
5082     arg = MoveToTempIfFpRegister(arg, value_type, masm, &temps);
5083     if (DataType::IsFloatingPointType(value_type) && !arg.IsZero()) {
5084       // We need a temporary register but we have already used a scratch register for
5085       // the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one
5086       // in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that.
5087       old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type);
5088     } else if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) &&
5089                value_type == DataType::Type::kReference) {
5090       // Load the old value initially to a scratch register.
5091       // We shall move it to `out` later with a read barrier.
5092       old_value = temps.AcquireW();
5093     }
5094   }
5095 
5096   if (byte_swap) {
5097     DCHECK_NE(value_type, DataType::Type::kReference);
5098     DCHECK_NE(DataType::Size(value_type), 1u);
5099     if (get_and_update_op == GetAndUpdateOp::kAdd) {
5100       // We need to do the byte swapping in the CAS loop for GetAndAdd.
5101       get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5102     } else if (!arg.IsZero()) {
5103       // For other operations, avoid byte swap inside the CAS loop by providing an adjusted `arg`.
5104       // For GetAndSet use a scratch register; FP argument is already in a scratch register.
5105       // For bitwise operations GenerateGetAndUpdate() needs both scratch registers;
5106       // we have allocated a normal temporary to handle that.
5107       CPURegister temp = (get_and_update_op == GetAndUpdateOp::kSet)
5108           ? (is_fp ? arg : (arg.Is64Bits() ? temps.AcquireX() : temps.AcquireW()))
5109           : CPURegisterFrom(locations->GetTemp(1u), load_store_type);
5110       GenerateReverseBytes(masm, load_store_type, arg, temp);
5111       arg = temp;
5112     }
5113   }
5114 
5115   GenerateGetAndUpdate(codegen, get_and_update_op, load_store_type, order, tmp_ptr, arg, old_value);
5116 
5117   if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
5118     // The only adjustment needed is sign-extension for `kInt16`.
5119     // Everything else has been done by the `GenerateGetAndUpdate()`.
5120     DCHECK(byte_swap);
5121     if (value_type == DataType::Type::kInt16) {
5122       DCHECK_EQ(load_store_type, DataType::Type::kUint16);
5123       __ Sxth(out.W(), old_value.W());
5124     }
5125   } else if (byte_swap) {
5126     // Also handles moving to FP registers.
5127     GenerateReverseBytes(masm, value_type, old_value, out);
5128   } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat64) {
5129     __ Fmov(out.D(), old_value.X());
5130   } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat32) {
5131     __ Fmov(out.S(), old_value.W());
5132   } else if (value_type == DataType::Type::kInt8) {
5133     __ Sxtb(out.W(), old_value.W());
5134   } else if (value_type == DataType::Type::kInt16) {
5135     __ Sxth(out.W(), old_value.W());
5136   } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5137     if (kUseBakerReadBarrier) {
5138       codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(out.W(), old_value.W());
5139     } else {
5140       codegen->GenerateReadBarrierSlow(
5141           invoke,
5142           Location::RegisterLocation(out.GetCode()),
5143           Location::RegisterLocation(old_value.GetCode()),
5144           Location::RegisterLocation(target.object.GetCode()),
5145           /*offset=*/ 0u,
5146           /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
5147     }
5148   }
5149 
5150   if (!byte_swap) {
5151     __ Bind(slow_path->GetExitLabel());
5152   }
5153 }
5154 
VisitVarHandleGetAndSet(HInvoke * invoke)5155 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5156   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5157 }
5158 
VisitVarHandleGetAndSet(HInvoke * invoke)5159 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5160   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
5161 }
5162 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5163 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5164   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5165 }
5166 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5167 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5168   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
5169 }
5170 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5171 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5172   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5173 }
5174 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5175 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5176   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
5177 }
5178 
VisitVarHandleGetAndAdd(HInvoke * invoke)5179 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5180   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5181 }
5182 
VisitVarHandleGetAndAdd(HInvoke * invoke)5183 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5184   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5185 }
5186 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5187 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5188   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5189 }
5190 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5191 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5192   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5193 }
5194 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5195 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5196   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5197 }
5198 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5199 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5200   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5201 }
5202 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5203 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5204   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5205 }
5206 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5207 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5208   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5209 }
5210 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5211 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5212   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5213 }
5214 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5215 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5216   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5217 }
5218 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5219 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5220   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5221 }
5222 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5223 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5224   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5225 }
5226 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5227 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5228   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5229 }
5230 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5231 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5232   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5233 }
5234 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5235 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5236   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5237 }
5238 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5239 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5240   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5241 }
5242 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5243 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5244   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5245 }
5246 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5247 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5248   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5249 }
5250 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5251 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5252   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5253 }
5254 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5255 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5256   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5257 }
5258 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5259 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5260   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5261 }
5262 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5263 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5264   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5265 }
5266 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5267 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5268   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5269 }
5270 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5271 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5272   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5273 }
5274 
EmitByteArrayViewCode(CodeGenerator * codegen_in)5275 void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5276   DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5277   CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
5278   MacroAssembler* masm = codegen->GetVIXLAssembler();
5279   HInvoke* invoke = GetInvoke();
5280   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5281   DataType::Type value_type =
5282       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5283   DCHECK_NE(value_type, DataType::Type::kReference);
5284   size_t size = DataType::Size(value_type);
5285   DCHECK_GT(size, 1u);
5286   Register varhandle = InputRegisterAt(invoke, 0);
5287   Register object = InputRegisterAt(invoke, 1);
5288   Register index = InputRegisterAt(invoke, 2);
5289 
5290   MemberOffset class_offset = mirror::Object::ClassOffset();
5291   MemberOffset array_length_offset = mirror::Array::LengthOffset();
5292   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5293   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5294 
5295   __ Bind(GetByteArrayViewCheckLabel());
5296 
5297   VarHandleTarget target = GetVarHandleTarget(invoke);
5298   {
5299     UseScratchRegisterScope temps(masm);
5300     Register temp = temps.AcquireW();
5301     Register temp2 = temps.AcquireW();
5302 
5303     // The main path checked that the coordinateType0 is an array class that matches
5304     // the class of the actual coordinate argument but it does not match the value type.
5305     // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5306     __ Ldr(temp, HeapOperand(varhandle, class_offset.Int32Value()));
5307     codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5308     __ Cmp(temp, temp2);
5309     __ B(GetEntryLabel(), ne);
5310 
5311     // Check for array index out of bounds.
5312     __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
5313     __ Subs(temp, temp, index);
5314     __ Ccmp(temp, size, NoFlag, hs);  // If SUBS yields LO (C=false), keep the C flag clear.
5315     __ B(GetEntryLabel(), lo);
5316 
5317     // Construct the target.
5318     __ Add(target.offset, index, data_offset.Int32Value());
5319 
5320     // Alignment check. For unaligned access, go to the runtime.
5321     DCHECK(IsPowerOfTwo(size));
5322     if (size == 2u) {
5323       __ Tbnz(target.offset, 0, GetEntryLabel());
5324     } else {
5325       __ Tst(target.offset, size - 1u);
5326       __ B(GetEntryLabel(), ne);
5327     }
5328 
5329     // Byte order check. For native byte order return to the main path.
5330     if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5331         IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5332       // There is no reason to differentiate between native byte order and byte-swap
5333       // for setting a zero bit pattern. Just return to the main path.
5334       __ B(GetNativeByteOrderLabel());
5335       return;
5336     }
5337     __ Ldr(temp, HeapOperand(varhandle, native_byte_order_offset.Int32Value()));
5338     __ Cbnz(temp, GetNativeByteOrderLabel());
5339   }
5340 
5341   switch (access_mode_template) {
5342     case mirror::VarHandle::AccessModeTemplate::kGet:
5343       GenerateVarHandleGet(invoke, codegen, order_, /*byte_swap=*/ true);
5344       break;
5345     case mirror::VarHandle::AccessModeTemplate::kSet:
5346       GenerateVarHandleSet(invoke, codegen, order_, /*byte_swap=*/ true);
5347       break;
5348     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5349     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5350       GenerateVarHandleCompareAndSetOrExchange(
5351           invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5352       break;
5353     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5354       GenerateVarHandleGetAndUpdate(
5355           invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5356       break;
5357   }
5358   __ B(GetExitLabel());
5359 }
5360 
5361 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
5362 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
5363 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
5364 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
5365 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
5366 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject);
5367 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString);
5368 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence);
5369 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray);
5370 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean);
5371 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar);
5372 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt);
5373 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong);
5374 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat);
5375 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble);
5376 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
5377 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
5378 
5379 // 1.8.
5380 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
5381 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
5382 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
5383 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
5384 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
5385 
5386 UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvokeExact)
5387 UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvoke)
5388 
5389 UNREACHABLE_INTRINSICS(ARM64)
5390 
5391 #undef __
5392 
5393 }  // namespace arm64
5394 }  // namespace art
5395