1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm64.h"
18 
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "art_method.h"
21 #include "code_generator_arm64.h"
22 #include "common_arm64.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "intrinsics.h"
25 #include "lock_word.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/object_array-inl.h"
28 #include "mirror/reference.h"
29 #include "mirror/string-inl.h"
30 #include "scoped_thread_state_change-inl.h"
31 #include "thread-inl.h"
32 #include "utils/arm64/assembler_arm64.h"
33 
34 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
35 
36 // TODO(VIXL): Make VIXL compile with -Wshadow.
37 #pragma GCC diagnostic push
38 #pragma GCC diagnostic ignored "-Wshadow"
39 #include "aarch64/disasm-aarch64.h"
40 #include "aarch64/macro-assembler-aarch64.h"
41 #pragma GCC diagnostic pop
42 
43 namespace art {
44 
45 namespace arm64 {
46 
47 using helpers::DRegisterFrom;
48 using helpers::FPRegisterFrom;
49 using helpers::HeapOperand;
50 using helpers::LocationFrom;
51 using helpers::OperandFrom;
52 using helpers::RegisterFrom;
53 using helpers::SRegisterFrom;
54 using helpers::WRegisterFrom;
55 using helpers::XRegisterFrom;
56 using helpers::InputRegisterAt;
57 using helpers::OutputRegister;
58 
59 namespace {
60 
AbsoluteHeapOperandFrom(Location location,size_t offset=0)61 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
62   return MemOperand(XRegisterFrom(location), offset);
63 }
64 
65 }  // namespace
66 
GetVIXLAssembler()67 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
68   return codegen_->GetVIXLAssembler();
69 }
70 
GetAllocator()71 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
72   return codegen_->GetGraph()->GetArena();
73 }
74 
75 #define __ codegen->GetVIXLAssembler()->
76 
MoveFromReturnRegister(Location trg,Primitive::Type type,CodeGeneratorARM64 * codegen)77 static void MoveFromReturnRegister(Location trg,
78                                    Primitive::Type type,
79                                    CodeGeneratorARM64* codegen) {
80   if (!trg.IsValid()) {
81     DCHECK(type == Primitive::kPrimVoid);
82     return;
83   }
84 
85   DCHECK_NE(type, Primitive::kPrimVoid);
86 
87   if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
88     Register trg_reg = RegisterFrom(trg, type);
89     Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
90     __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
91   } else {
92     FPRegister trg_reg = FPRegisterFrom(trg, type);
93     FPRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
94     __ Fmov(trg_reg, res_reg);
95   }
96 }
97 
MoveArguments(HInvoke * invoke,CodeGeneratorARM64 * codegen)98 static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
99   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
100   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
101 }
102 
103 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
104 // call. This will copy the arguments into the positions for a regular call.
105 //
106 // Note: The actual parameters are required to be in the locations given by the invoke's location
107 //       summary. If an intrinsic modifies those locations before a slowpath call, they must be
108 //       restored!
109 class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
110  public:
IntrinsicSlowPathARM64(HInvoke * invoke)111   explicit IntrinsicSlowPathARM64(HInvoke* invoke)
112       : SlowPathCodeARM64(invoke), invoke_(invoke) { }
113 
EmitNativeCode(CodeGenerator * codegen_in)114   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
115     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
116     __ Bind(GetEntryLabel());
117 
118     SaveLiveRegisters(codegen, invoke_->GetLocations());
119 
120     MoveArguments(invoke_, codegen);
121 
122     {
123       // Ensure that between the BLR (emitted by Generate*Call) and RecordPcInfo there
124       // are no pools emitted.
125       vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
126       if (invoke_->IsInvokeStaticOrDirect()) {
127         codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
128                                             LocationFrom(kArtMethodRegister));
129       } else {
130         codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister));
131       }
132       codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
133     }
134 
135     // Copy the result back to the expected output.
136     Location out = invoke_->GetLocations()->Out();
137     if (out.IsValid()) {
138       DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
139       DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
140       MoveFromReturnRegister(out, invoke_->GetType(), codegen);
141     }
142 
143     RestoreLiveRegisters(codegen, invoke_->GetLocations());
144     __ B(GetExitLabel());
145   }
146 
GetDescription() const147   const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; }
148 
149  private:
150   // The instruction where this slow path is happening.
151   HInvoke* const invoke_;
152 
153   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
154 };
155 
156 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
157 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
158  public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction * instruction,Location tmp)159   ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
160       : SlowPathCodeARM64(instruction), tmp_(tmp) {
161     DCHECK(kEmitCompilerReadBarrier);
162     DCHECK(kUseBakerReadBarrier);
163   }
164 
EmitNativeCode(CodeGenerator * codegen_in)165   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
166     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
167     LocationSummary* locations = instruction_->GetLocations();
168     DCHECK(locations->CanCall());
169     DCHECK(instruction_->IsInvokeStaticOrDirect())
170         << "Unexpected instruction in read barrier arraycopy slow path: "
171         << instruction_->DebugName();
172     DCHECK(instruction_->GetLocations()->Intrinsified());
173     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
174 
175     const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
176 
177     Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
178     Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
179     Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
180     Register tmp_reg = WRegisterFrom(tmp_);
181 
182     __ Bind(GetEntryLabel());
183     vixl::aarch64::Label slow_copy_loop;
184     __ Bind(&slow_copy_loop);
185     __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
186     codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
187     // TODO: Inline the mark bit check before calling the runtime?
188     // tmp_reg = ReadBarrier::Mark(tmp_reg);
189     // No need to save live registers; it's taken care of by the
190     // entrypoint. Also, there is no need to update the stack mask,
191     // as this runtime call will not trigger a garbage collection.
192     // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
193     // explanations.)
194     DCHECK_NE(tmp_.reg(), LR);
195     DCHECK_NE(tmp_.reg(), WSP);
196     DCHECK_NE(tmp_.reg(), WZR);
197     // IP0 is used internally by the ReadBarrierMarkRegX entry point
198     // as a temporary (and not preserved).  It thus cannot be used by
199     // any live register in this slow path.
200     DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
201     DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
202     DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
203     DCHECK_NE(tmp_.reg(), IP0);
204     DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
205     // TODO: Load the entrypoint once before the loop, instead of
206     // loading it at every iteration.
207     int32_t entry_point_offset =
208         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
209     // This runtime call does not require a stack map.
210     codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
211     codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
212     __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
213     __ Cmp(src_curr_addr, src_stop_addr);
214     __ B(&slow_copy_loop, ne);
215     __ B(GetExitLabel());
216   }
217 
GetDescription() const218   const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
219 
220  private:
221   Location tmp_;
222 
223   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
224 };
225 #undef __
226 
TryDispatch(HInvoke * invoke)227 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
228   Dispatch(invoke);
229   LocationSummary* res = invoke->GetLocations();
230   if (res == nullptr) {
231     return false;
232   }
233   return res->Intrinsified();
234 }
235 
236 #define __ masm->
237 
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke)238 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
239   LocationSummary* locations = new (arena) LocationSummary(invoke,
240                                                            LocationSummary::kNoCall,
241                                                            kIntrinsified);
242   locations->SetInAt(0, Location::RequiresFpuRegister());
243   locations->SetOut(Location::RequiresRegister());
244 }
245 
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke)246 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
247   LocationSummary* locations = new (arena) LocationSummary(invoke,
248                                                            LocationSummary::kNoCall,
249                                                            kIntrinsified);
250   locations->SetInAt(0, Location::RequiresRegister());
251   locations->SetOut(Location::RequiresFpuRegister());
252 }
253 
MoveFPToInt(LocationSummary * locations,bool is64bit,MacroAssembler * masm)254 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
255   Location input = locations->InAt(0);
256   Location output = locations->Out();
257   __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
258           is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
259 }
260 
MoveIntToFP(LocationSummary * locations,bool is64bit,MacroAssembler * masm)261 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
262   Location input = locations->InAt(0);
263   Location output = locations->Out();
264   __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
265           is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
266 }
267 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)268 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
269   CreateFPToIntLocations(arena_, invoke);
270 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)271 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
272   CreateIntToFPLocations(arena_, invoke);
273 }
274 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)275 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
276   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
277 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)278 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
279   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
280 }
281 
VisitFloatFloatToRawIntBits(HInvoke * invoke)282 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
283   CreateFPToIntLocations(arena_, invoke);
284 }
VisitFloatIntBitsToFloat(HInvoke * invoke)285 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
286   CreateIntToFPLocations(arena_, invoke);
287 }
288 
VisitFloatFloatToRawIntBits(HInvoke * invoke)289 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
290   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
291 }
VisitFloatIntBitsToFloat(HInvoke * invoke)292 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
293   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
294 }
295 
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)296 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
297   LocationSummary* locations = new (arena) LocationSummary(invoke,
298                                                            LocationSummary::kNoCall,
299                                                            kIntrinsified);
300   locations->SetInAt(0, Location::RequiresRegister());
301   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
302 }
303 
GenReverseBytes(LocationSummary * locations,Primitive::Type type,MacroAssembler * masm)304 static void GenReverseBytes(LocationSummary* locations,
305                             Primitive::Type type,
306                             MacroAssembler* masm) {
307   Location in = locations->InAt(0);
308   Location out = locations->Out();
309 
310   switch (type) {
311     case Primitive::kPrimShort:
312       __ Rev16(WRegisterFrom(out), WRegisterFrom(in));
313       __ Sxth(WRegisterFrom(out), WRegisterFrom(out));
314       break;
315     case Primitive::kPrimInt:
316     case Primitive::kPrimLong:
317       __ Rev(RegisterFrom(out, type), RegisterFrom(in, type));
318       break;
319     default:
320       LOG(FATAL) << "Unexpected size for reverse-bytes: " << type;
321       UNREACHABLE();
322   }
323 }
324 
VisitIntegerReverseBytes(HInvoke * invoke)325 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
326   CreateIntToIntLocations(arena_, invoke);
327 }
328 
VisitIntegerReverseBytes(HInvoke * invoke)329 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
330   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
331 }
332 
VisitLongReverseBytes(HInvoke * invoke)333 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
334   CreateIntToIntLocations(arena_, invoke);
335 }
336 
VisitLongReverseBytes(HInvoke * invoke)337 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
338   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
339 }
340 
VisitShortReverseBytes(HInvoke * invoke)341 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
342   CreateIntToIntLocations(arena_, invoke);
343 }
344 
VisitShortReverseBytes(HInvoke * invoke)345 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
346   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetVIXLAssembler());
347 }
348 
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)349 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
350   LocationSummary* locations = new (arena) LocationSummary(invoke,
351                                                            LocationSummary::kNoCall,
352                                                            kIntrinsified);
353   locations->SetInAt(0, Location::RequiresRegister());
354   locations->SetInAt(1, Location::RequiresRegister());
355   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
356 }
357 
GenNumberOfLeadingZeros(LocationSummary * locations,Primitive::Type type,MacroAssembler * masm)358 static void GenNumberOfLeadingZeros(LocationSummary* locations,
359                                     Primitive::Type type,
360                                     MacroAssembler* masm) {
361   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
362 
363   Location in = locations->InAt(0);
364   Location out = locations->Out();
365 
366   __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
367 }
368 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)369 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
370   CreateIntToIntLocations(arena_, invoke);
371 }
372 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)373 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
374   GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
375 }
376 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)377 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
378   CreateIntToIntLocations(arena_, invoke);
379 }
380 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)381 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
382   GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
383 }
384 
GenNumberOfTrailingZeros(LocationSummary * locations,Primitive::Type type,MacroAssembler * masm)385 static void GenNumberOfTrailingZeros(LocationSummary* locations,
386                                      Primitive::Type type,
387                                      MacroAssembler* masm) {
388   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
389 
390   Location in = locations->InAt(0);
391   Location out = locations->Out();
392 
393   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
394   __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
395 }
396 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)397 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
398   CreateIntToIntLocations(arena_, invoke);
399 }
400 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)401 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
402   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
403 }
404 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)405 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
406   CreateIntToIntLocations(arena_, invoke);
407 }
408 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)409 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
410   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
411 }
412 
GenReverse(LocationSummary * locations,Primitive::Type type,MacroAssembler * masm)413 static void GenReverse(LocationSummary* locations,
414                        Primitive::Type type,
415                        MacroAssembler* masm) {
416   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
417 
418   Location in = locations->InAt(0);
419   Location out = locations->Out();
420 
421   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
422 }
423 
VisitIntegerReverse(HInvoke * invoke)424 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
425   CreateIntToIntLocations(arena_, invoke);
426 }
427 
VisitIntegerReverse(HInvoke * invoke)428 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
429   GenReverse(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
430 }
431 
VisitLongReverse(HInvoke * invoke)432 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
433   CreateIntToIntLocations(arena_, invoke);
434 }
435 
VisitLongReverse(HInvoke * invoke)436 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
437   GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
438 }
439 
GenBitCount(HInvoke * instr,Primitive::Type type,MacroAssembler * masm)440 static void GenBitCount(HInvoke* instr, Primitive::Type type, MacroAssembler* masm) {
441   DCHECK(Primitive::IsIntOrLongType(type)) << type;
442   DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
443   DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
444 
445   UseScratchRegisterScope temps(masm);
446 
447   Register src = InputRegisterAt(instr, 0);
448   Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
449   FPRegister fpr = (type == Primitive::kPrimLong) ? temps.AcquireD() : temps.AcquireS();
450 
451   __ Fmov(fpr, src);
452   __ Cnt(fpr.V8B(), fpr.V8B());
453   __ Addv(fpr.B(), fpr.V8B());
454   __ Fmov(dst, fpr);
455 }
456 
VisitLongBitCount(HInvoke * invoke)457 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
458   CreateIntToIntLocations(arena_, invoke);
459 }
460 
VisitLongBitCount(HInvoke * invoke)461 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
462   GenBitCount(invoke, Primitive::kPrimLong, GetVIXLAssembler());
463 }
464 
VisitIntegerBitCount(HInvoke * invoke)465 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
466   CreateIntToIntLocations(arena_, invoke);
467 }
468 
VisitIntegerBitCount(HInvoke * invoke)469 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
470   GenBitCount(invoke, Primitive::kPrimInt, GetVIXLAssembler());
471 }
472 
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)473 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
474   LocationSummary* locations = new (arena) LocationSummary(invoke,
475                                                            LocationSummary::kNoCall,
476                                                            kIntrinsified);
477   locations->SetInAt(0, Location::RequiresFpuRegister());
478   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
479 }
480 
MathAbsFP(LocationSummary * locations,bool is64bit,MacroAssembler * masm)481 static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
482   Location in = locations->InAt(0);
483   Location out = locations->Out();
484 
485   FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in);
486   FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out);
487 
488   __ Fabs(out_reg, in_reg);
489 }
490 
VisitMathAbsDouble(HInvoke * invoke)491 void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) {
492   CreateFPToFPLocations(arena_, invoke);
493 }
494 
VisitMathAbsDouble(HInvoke * invoke)495 void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) {
496   MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
497 }
498 
VisitMathAbsFloat(HInvoke * invoke)499 void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) {
500   CreateFPToFPLocations(arena_, invoke);
501 }
502 
VisitMathAbsFloat(HInvoke * invoke)503 void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
504   MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
505 }
506 
CreateIntToInt(ArenaAllocator * arena,HInvoke * invoke)507 static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
508   LocationSummary* locations = new (arena) LocationSummary(invoke,
509                                                            LocationSummary::kNoCall,
510                                                            kIntrinsified);
511   locations->SetInAt(0, Location::RequiresRegister());
512   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
513 }
514 
GenAbsInteger(LocationSummary * locations,bool is64bit,MacroAssembler * masm)515 static void GenAbsInteger(LocationSummary* locations,
516                           bool is64bit,
517                           MacroAssembler* masm) {
518   Location in = locations->InAt(0);
519   Location output = locations->Out();
520 
521   Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in);
522   Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output);
523 
524   __ Cmp(in_reg, Operand(0));
525   __ Cneg(out_reg, in_reg, lt);
526 }
527 
VisitMathAbsInt(HInvoke * invoke)528 void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) {
529   CreateIntToInt(arena_, invoke);
530 }
531 
VisitMathAbsInt(HInvoke * invoke)532 void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
533   GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
534 }
535 
VisitMathAbsLong(HInvoke * invoke)536 void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
537   CreateIntToInt(arena_, invoke);
538 }
539 
VisitMathAbsLong(HInvoke * invoke)540 void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
541   GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
542 }
543 
GenMinMaxFP(LocationSummary * locations,bool is_min,bool is_double,MacroAssembler * masm)544 static void GenMinMaxFP(LocationSummary* locations,
545                         bool is_min,
546                         bool is_double,
547                         MacroAssembler* masm) {
548   Location op1 = locations->InAt(0);
549   Location op2 = locations->InAt(1);
550   Location out = locations->Out();
551 
552   FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1);
553   FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2);
554   FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out);
555   if (is_min) {
556     __ Fmin(out_reg, op1_reg, op2_reg);
557   } else {
558     __ Fmax(out_reg, op1_reg, op2_reg);
559   }
560 }
561 
CreateFPFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)562 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
563   LocationSummary* locations = new (arena) LocationSummary(invoke,
564                                                            LocationSummary::kNoCall,
565                                                            kIntrinsified);
566   locations->SetInAt(0, Location::RequiresFpuRegister());
567   locations->SetInAt(1, Location::RequiresFpuRegister());
568   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
569 }
570 
VisitMathMinDoubleDouble(HInvoke * invoke)571 void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
572   CreateFPFPToFPLocations(arena_, invoke);
573 }
574 
VisitMathMinDoubleDouble(HInvoke * invoke)575 void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
576   GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
577 }
578 
VisitMathMinFloatFloat(HInvoke * invoke)579 void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
580   CreateFPFPToFPLocations(arena_, invoke);
581 }
582 
VisitMathMinFloatFloat(HInvoke * invoke)583 void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
584   GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
585 }
586 
VisitMathMaxDoubleDouble(HInvoke * invoke)587 void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
588   CreateFPFPToFPLocations(arena_, invoke);
589 }
590 
VisitMathMaxDoubleDouble(HInvoke * invoke)591 void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
592   GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
593 }
594 
VisitMathMaxFloatFloat(HInvoke * invoke)595 void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
596   CreateFPFPToFPLocations(arena_, invoke);
597 }
598 
VisitMathMaxFloatFloat(HInvoke * invoke)599 void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
600   GenMinMaxFP(
601       invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
602 }
603 
GenMinMax(LocationSummary * locations,bool is_min,bool is_long,MacroAssembler * masm)604 static void GenMinMax(LocationSummary* locations,
605                       bool is_min,
606                       bool is_long,
607                       MacroAssembler* masm) {
608   Location op1 = locations->InAt(0);
609   Location op2 = locations->InAt(1);
610   Location out = locations->Out();
611 
612   Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
613   Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
614   Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
615 
616   __ Cmp(op1_reg, op2_reg);
617   __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
618 }
619 
VisitMathMinIntInt(HInvoke * invoke)620 void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) {
621   CreateIntIntToIntLocations(arena_, invoke);
622 }
623 
VisitMathMinIntInt(HInvoke * invoke)624 void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
625   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
626 }
627 
VisitMathMinLongLong(HInvoke * invoke)628 void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
629   CreateIntIntToIntLocations(arena_, invoke);
630 }
631 
VisitMathMinLongLong(HInvoke * invoke)632 void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
633   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
634 }
635 
VisitMathMaxIntInt(HInvoke * invoke)636 void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
637   CreateIntIntToIntLocations(arena_, invoke);
638 }
639 
VisitMathMaxIntInt(HInvoke * invoke)640 void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
641   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
642 }
643 
VisitMathMaxLongLong(HInvoke * invoke)644 void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
645   CreateIntIntToIntLocations(arena_, invoke);
646 }
647 
VisitMathMaxLongLong(HInvoke * invoke)648 void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
649   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
650 }
651 
VisitMathSqrt(HInvoke * invoke)652 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
653   CreateFPToFPLocations(arena_, invoke);
654 }
655 
VisitMathSqrt(HInvoke * invoke)656 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
657   LocationSummary* locations = invoke->GetLocations();
658   MacroAssembler* masm = GetVIXLAssembler();
659   __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
660 }
661 
VisitMathCeil(HInvoke * invoke)662 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
663   CreateFPToFPLocations(arena_, invoke);
664 }
665 
VisitMathCeil(HInvoke * invoke)666 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
667   LocationSummary* locations = invoke->GetLocations();
668   MacroAssembler* masm = GetVIXLAssembler();
669   __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
670 }
671 
VisitMathFloor(HInvoke * invoke)672 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
673   CreateFPToFPLocations(arena_, invoke);
674 }
675 
VisitMathFloor(HInvoke * invoke)676 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
677   LocationSummary* locations = invoke->GetLocations();
678   MacroAssembler* masm = GetVIXLAssembler();
679   __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
680 }
681 
VisitMathRint(HInvoke * invoke)682 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
683   CreateFPToFPLocations(arena_, invoke);
684 }
685 
VisitMathRint(HInvoke * invoke)686 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
687   LocationSummary* locations = invoke->GetLocations();
688   MacroAssembler* masm = GetVIXLAssembler();
689   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
690 }
691 
CreateFPToIntPlusFPTempLocations(ArenaAllocator * arena,HInvoke * invoke)692 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
693   LocationSummary* locations = new (arena) LocationSummary(invoke,
694                                                            LocationSummary::kNoCall,
695                                                            kIntrinsified);
696   locations->SetInAt(0, Location::RequiresFpuRegister());
697   locations->SetOut(Location::RequiresRegister());
698   locations->AddTemp(Location::RequiresFpuRegister());
699 }
700 
GenMathRound(HInvoke * invoke,bool is_double,vixl::aarch64::MacroAssembler * masm)701 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
702   // Java 8 API definition for Math.round():
703   // Return the closest long or int to the argument, with ties rounding to positive infinity.
704   //
705   // There is no single instruction in ARMv8 that can support the above definition.
706   // We choose to use FCVTAS here, because it has closest semantic.
707   // FCVTAS performs rounding to nearest integer, ties away from zero.
708   // For most inputs (positive values, zero or NaN), this instruction is enough.
709   // We only need a few handling code after FCVTAS if the input is negative half value.
710   //
711   // The reason why we didn't choose FCVTPS instruction here is that
712   // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
713   // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
714   // If we were using this instruction, for most inputs, more handling code would be needed.
715   LocationSummary* l = invoke->GetLocations();
716   FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
717   FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
718   Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
719   vixl::aarch64::Label done;
720 
721   // Round to nearest integer, ties away from zero.
722   __ Fcvtas(out_reg, in_reg);
723 
724   // For positive values, zero or NaN inputs, rounding is done.
725   __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
726 
727   // Handle input < 0 cases.
728   // If input is negative but not a tie, previous result (round to nearest) is valid.
729   // If input is a negative tie, out_reg += 1.
730   __ Frinta(tmp_fp, in_reg);
731   __ Fsub(tmp_fp, in_reg, tmp_fp);
732   __ Fcmp(tmp_fp, 0.5);
733   __ Cinc(out_reg, out_reg, eq);
734 
735   __ Bind(&done);
736 }
737 
VisitMathRoundDouble(HInvoke * invoke)738 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
739   CreateFPToIntPlusFPTempLocations(arena_, invoke);
740 }
741 
VisitMathRoundDouble(HInvoke * invoke)742 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
743   GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler());
744 }
745 
VisitMathRoundFloat(HInvoke * invoke)746 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
747   CreateFPToIntPlusFPTempLocations(arena_, invoke);
748 }
749 
VisitMathRoundFloat(HInvoke * invoke)750 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
751   GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler());
752 }
753 
VisitMemoryPeekByte(HInvoke * invoke)754 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
755   CreateIntToIntLocations(arena_, invoke);
756 }
757 
VisitMemoryPeekByte(HInvoke * invoke)758 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
759   MacroAssembler* masm = GetVIXLAssembler();
760   __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
761           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
762 }
763 
VisitMemoryPeekIntNative(HInvoke * invoke)764 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
765   CreateIntToIntLocations(arena_, invoke);
766 }
767 
VisitMemoryPeekIntNative(HInvoke * invoke)768 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
769   MacroAssembler* masm = GetVIXLAssembler();
770   __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
771          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
772 }
773 
VisitMemoryPeekLongNative(HInvoke * invoke)774 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
775   CreateIntToIntLocations(arena_, invoke);
776 }
777 
VisitMemoryPeekLongNative(HInvoke * invoke)778 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
779   MacroAssembler* masm = GetVIXLAssembler();
780   __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
781          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
782 }
783 
VisitMemoryPeekShortNative(HInvoke * invoke)784 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
785   CreateIntToIntLocations(arena_, invoke);
786 }
787 
VisitMemoryPeekShortNative(HInvoke * invoke)788 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
789   MacroAssembler* masm = GetVIXLAssembler();
790   __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
791            AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
792 }
793 
CreateIntIntToVoidLocations(ArenaAllocator * arena,HInvoke * invoke)794 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
795   LocationSummary* locations = new (arena) LocationSummary(invoke,
796                                                            LocationSummary::kNoCall,
797                                                            kIntrinsified);
798   locations->SetInAt(0, Location::RequiresRegister());
799   locations->SetInAt(1, Location::RequiresRegister());
800 }
801 
VisitMemoryPokeByte(HInvoke * invoke)802 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
803   CreateIntIntToVoidLocations(arena_, invoke);
804 }
805 
VisitMemoryPokeByte(HInvoke * invoke)806 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
807   MacroAssembler* masm = GetVIXLAssembler();
808   __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
809           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
810 }
811 
VisitMemoryPokeIntNative(HInvoke * invoke)812 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
813   CreateIntIntToVoidLocations(arena_, invoke);
814 }
815 
VisitMemoryPokeIntNative(HInvoke * invoke)816 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
817   MacroAssembler* masm = GetVIXLAssembler();
818   __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
819          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
820 }
821 
VisitMemoryPokeLongNative(HInvoke * invoke)822 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
823   CreateIntIntToVoidLocations(arena_, invoke);
824 }
825 
VisitMemoryPokeLongNative(HInvoke * invoke)826 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
827   MacroAssembler* masm = GetVIXLAssembler();
828   __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
829          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
830 }
831 
VisitMemoryPokeShortNative(HInvoke * invoke)832 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
833   CreateIntIntToVoidLocations(arena_, invoke);
834 }
835 
VisitMemoryPokeShortNative(HInvoke * invoke)836 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
837   MacroAssembler* masm = GetVIXLAssembler();
838   __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
839           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
840 }
841 
VisitThreadCurrentThread(HInvoke * invoke)842 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
843   LocationSummary* locations = new (arena_) LocationSummary(invoke,
844                                                             LocationSummary::kNoCall,
845                                                             kIntrinsified);
846   locations->SetOut(Location::RequiresRegister());
847 }
848 
VisitThreadCurrentThread(HInvoke * invoke)849 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
850   codegen_->Load(Primitive::kPrimNot, WRegisterFrom(invoke->GetLocations()->Out()),
851                  MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
852 }
853 
GenUnsafeGet(HInvoke * invoke,Primitive::Type type,bool is_volatile,CodeGeneratorARM64 * codegen)854 static void GenUnsafeGet(HInvoke* invoke,
855                          Primitive::Type type,
856                          bool is_volatile,
857                          CodeGeneratorARM64* codegen) {
858   LocationSummary* locations = invoke->GetLocations();
859   DCHECK((type == Primitive::kPrimInt) ||
860          (type == Primitive::kPrimLong) ||
861          (type == Primitive::kPrimNot));
862   Location base_loc = locations->InAt(1);
863   Register base = WRegisterFrom(base_loc);      // Object pointer.
864   Location offset_loc = locations->InAt(2);
865   Register offset = XRegisterFrom(offset_loc);  // Long offset.
866   Location trg_loc = locations->Out();
867   Register trg = RegisterFrom(trg_loc, type);
868 
869   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
870     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
871     Register temp = WRegisterFrom(locations->GetTemp(0));
872     codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
873                                                        trg_loc,
874                                                        base,
875                                                        /* offset */ 0u,
876                                                        /* index */ offset_loc,
877                                                        /* scale_factor */ 0u,
878                                                        temp,
879                                                        /* needs_null_check */ false,
880                                                        is_volatile);
881   } else {
882     // Other cases.
883     MemOperand mem_op(base.X(), offset);
884     if (is_volatile) {
885       codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
886     } else {
887       codegen->Load(type, trg, mem_op);
888     }
889 
890     if (type == Primitive::kPrimNot) {
891       DCHECK(trg.IsW());
892       codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
893     }
894   }
895 }
896 
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)897 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
898   bool can_call = kEmitCompilerReadBarrier &&
899       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
900        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
901   LocationSummary* locations = new (arena) LocationSummary(invoke,
902                                                            (can_call
903                                                                 ? LocationSummary::kCallOnSlowPath
904                                                                 : LocationSummary::kNoCall),
905                                                            kIntrinsified);
906   if (can_call && kUseBakerReadBarrier) {
907     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
908     // We need a temporary register for the read barrier marking slow
909     // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier.
910     locations->AddTemp(Location::RequiresRegister());
911   }
912   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
913   locations->SetInAt(1, Location::RequiresRegister());
914   locations->SetInAt(2, Location::RequiresRegister());
915   locations->SetOut(Location::RequiresRegister(),
916                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
917 }
918 
VisitUnsafeGet(HInvoke * invoke)919 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
920   CreateIntIntIntToIntLocations(arena_, invoke);
921 }
VisitUnsafeGetVolatile(HInvoke * invoke)922 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
923   CreateIntIntIntToIntLocations(arena_, invoke);
924 }
VisitUnsafeGetLong(HInvoke * invoke)925 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
926   CreateIntIntIntToIntLocations(arena_, invoke);
927 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)928 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
929   CreateIntIntIntToIntLocations(arena_, invoke);
930 }
VisitUnsafeGetObject(HInvoke * invoke)931 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
932   CreateIntIntIntToIntLocations(arena_, invoke);
933 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)934 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
935   CreateIntIntIntToIntLocations(arena_, invoke);
936 }
937 
VisitUnsafeGet(HInvoke * invoke)938 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
939   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
940 }
VisitUnsafeGetVolatile(HInvoke * invoke)941 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
942   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
943 }
VisitUnsafeGetLong(HInvoke * invoke)944 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
945   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
946 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)947 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
948   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
949 }
VisitUnsafeGetObject(HInvoke * invoke)950 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
951   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
952 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)953 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
954   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
955 }
956 
CreateIntIntIntIntToVoid(ArenaAllocator * arena,HInvoke * invoke)957 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
958   LocationSummary* locations = new (arena) LocationSummary(invoke,
959                                                            LocationSummary::kNoCall,
960                                                            kIntrinsified);
961   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
962   locations->SetInAt(1, Location::RequiresRegister());
963   locations->SetInAt(2, Location::RequiresRegister());
964   locations->SetInAt(3, Location::RequiresRegister());
965 }
966 
VisitUnsafePut(HInvoke * invoke)967 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
968   CreateIntIntIntIntToVoid(arena_, invoke);
969 }
VisitUnsafePutOrdered(HInvoke * invoke)970 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
971   CreateIntIntIntIntToVoid(arena_, invoke);
972 }
VisitUnsafePutVolatile(HInvoke * invoke)973 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
974   CreateIntIntIntIntToVoid(arena_, invoke);
975 }
VisitUnsafePutObject(HInvoke * invoke)976 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
977   CreateIntIntIntIntToVoid(arena_, invoke);
978 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)979 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
980   CreateIntIntIntIntToVoid(arena_, invoke);
981 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)982 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
983   CreateIntIntIntIntToVoid(arena_, invoke);
984 }
VisitUnsafePutLong(HInvoke * invoke)985 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
986   CreateIntIntIntIntToVoid(arena_, invoke);
987 }
VisitUnsafePutLongOrdered(HInvoke * invoke)988 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
989   CreateIntIntIntIntToVoid(arena_, invoke);
990 }
VisitUnsafePutLongVolatile(HInvoke * invoke)991 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
992   CreateIntIntIntIntToVoid(arena_, invoke);
993 }
994 
GenUnsafePut(HInvoke * invoke,Primitive::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM64 * codegen)995 static void GenUnsafePut(HInvoke* invoke,
996                          Primitive::Type type,
997                          bool is_volatile,
998                          bool is_ordered,
999                          CodeGeneratorARM64* codegen) {
1000   LocationSummary* locations = invoke->GetLocations();
1001   MacroAssembler* masm = codegen->GetVIXLAssembler();
1002 
1003   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
1004   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
1005   Register value = RegisterFrom(locations->InAt(3), type);
1006   Register source = value;
1007   MemOperand mem_op(base.X(), offset);
1008 
1009   {
1010     // We use a block to end the scratch scope before the write barrier, thus
1011     // freeing the temporary registers so they can be used in `MarkGCCard`.
1012     UseScratchRegisterScope temps(masm);
1013 
1014     if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1015       DCHECK(value.IsW());
1016       Register temp = temps.AcquireW();
1017       __ Mov(temp.W(), value.W());
1018       codegen->GetAssembler()->PoisonHeapReference(temp.W());
1019       source = temp;
1020     }
1021 
1022     if (is_volatile || is_ordered) {
1023       codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check */ false);
1024     } else {
1025       codegen->Store(type, source, mem_op);
1026     }
1027   }
1028 
1029   if (type == Primitive::kPrimNot) {
1030     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1031     codegen->MarkGCCard(base, value, value_can_be_null);
1032   }
1033 }
1034 
VisitUnsafePut(HInvoke * invoke)1035 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
1036   GenUnsafePut(invoke,
1037                Primitive::kPrimInt,
1038                /* is_volatile */ false,
1039                /* is_ordered */ false,
1040                codegen_);
1041 }
VisitUnsafePutOrdered(HInvoke * invoke)1042 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
1043   GenUnsafePut(invoke,
1044                Primitive::kPrimInt,
1045                /* is_volatile */ false,
1046                /* is_ordered */ true,
1047                codegen_);
1048 }
VisitUnsafePutVolatile(HInvoke * invoke)1049 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
1050   GenUnsafePut(invoke,
1051                Primitive::kPrimInt,
1052                /* is_volatile */ true,
1053                /* is_ordered */ false,
1054                codegen_);
1055 }
VisitUnsafePutObject(HInvoke * invoke)1056 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
1057   GenUnsafePut(invoke,
1058                Primitive::kPrimNot,
1059                /* is_volatile */ false,
1060                /* is_ordered */ false,
1061                codegen_);
1062 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1063 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1064   GenUnsafePut(invoke,
1065                Primitive::kPrimNot,
1066                /* is_volatile */ false,
1067                /* is_ordered */ true,
1068                codegen_);
1069 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1070 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1071   GenUnsafePut(invoke,
1072                Primitive::kPrimNot,
1073                /* is_volatile */ true,
1074                /* is_ordered */ false,
1075                codegen_);
1076 }
VisitUnsafePutLong(HInvoke * invoke)1077 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
1078   GenUnsafePut(invoke,
1079                Primitive::kPrimLong,
1080                /* is_volatile */ false,
1081                /* is_ordered */ false,
1082                codegen_);
1083 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1084 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1085   GenUnsafePut(invoke,
1086                Primitive::kPrimLong,
1087                /* is_volatile */ false,
1088                /* is_ordered */ true,
1089                codegen_);
1090 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1091 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1092   GenUnsafePut(invoke,
1093                Primitive::kPrimLong,
1094                /* is_volatile */ true,
1095                /* is_ordered */ false,
1096                codegen_);
1097 }
1098 
CreateIntIntIntIntIntToInt(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type)1099 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
1100                                        HInvoke* invoke,
1101                                        Primitive::Type type) {
1102   bool can_call = kEmitCompilerReadBarrier &&
1103       kUseBakerReadBarrier &&
1104       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1105   LocationSummary* locations = new (arena) LocationSummary(invoke,
1106                                                            (can_call
1107                                                                 ? LocationSummary::kCallOnSlowPath
1108                                                                 : LocationSummary::kNoCall),
1109                                                            kIntrinsified);
1110   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1111   locations->SetInAt(1, Location::RequiresRegister());
1112   locations->SetInAt(2, Location::RequiresRegister());
1113   locations->SetInAt(3, Location::RequiresRegister());
1114   locations->SetInAt(4, Location::RequiresRegister());
1115 
1116   // If heap poisoning is enabled, we don't want the unpoisoning
1117   // operations to potentially clobber the output. Likewise when
1118   // emitting a (Baker) read barrier, which may call.
1119   Location::OutputOverlap overlaps =
1120       ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
1121       ? Location::kOutputOverlap
1122       : Location::kNoOutputOverlap;
1123   locations->SetOut(Location::RequiresRegister(), overlaps);
1124   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1125     // Temporary register for (Baker) read barrier.
1126     locations->AddTemp(Location::RequiresRegister());
1127   }
1128 }
1129 
GenCas(HInvoke * invoke,Primitive::Type type,CodeGeneratorARM64 * codegen)1130 static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) {
1131   MacroAssembler* masm = codegen->GetVIXLAssembler();
1132   LocationSummary* locations = invoke->GetLocations();
1133 
1134   Location out_loc = locations->Out();
1135   Register out = WRegisterFrom(out_loc);                           // Boolean result.
1136 
1137   Register base = WRegisterFrom(locations->InAt(1));               // Object pointer.
1138   Location offset_loc = locations->InAt(2);
1139   Register offset = XRegisterFrom(offset_loc);                     // Long offset.
1140   Register expected = RegisterFrom(locations->InAt(3), type);      // Expected.
1141   Register value = RegisterFrom(locations->InAt(4), type);         // Value.
1142 
1143   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1144   if (type == Primitive::kPrimNot) {
1145     // Mark card for object assuming new value is stored.
1146     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1147     codegen->MarkGCCard(base, value, value_can_be_null);
1148 
1149     // The only read barrier implementation supporting the
1150     // UnsafeCASObject intrinsic is the Baker-style read barriers.
1151     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1152 
1153     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1154       Register temp = WRegisterFrom(locations->GetTemp(0));
1155       // Need to make sure the reference stored in the field is a to-space
1156       // one before attempting the CAS or the CAS could fail incorrectly.
1157       codegen->GenerateReferenceLoadWithBakerReadBarrier(
1158           invoke,
1159           out_loc,  // Unused, used only as a "temporary" within the read barrier.
1160           base,
1161           /* offset */ 0u,
1162           /* index */ offset_loc,
1163           /* scale_factor */ 0u,
1164           temp,
1165           /* needs_null_check */ false,
1166           /* use_load_acquire */ false,
1167           /* always_update_field */ true);
1168     }
1169   }
1170 
1171   UseScratchRegisterScope temps(masm);
1172   Register tmp_ptr = temps.AcquireX();                             // Pointer to actual memory.
1173   Register tmp_value = temps.AcquireSameSizeAs(value);             // Value in memory.
1174 
1175   Register tmp_32 = tmp_value.W();
1176 
1177   __ Add(tmp_ptr, base.X(), Operand(offset));
1178 
1179   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1180     codegen->GetAssembler()->PoisonHeapReference(expected);
1181     if (value.Is(expected)) {
1182       // Do not poison `value`, as it is the same register as
1183       // `expected`, which has just been poisoned.
1184     } else {
1185       codegen->GetAssembler()->PoisonHeapReference(value);
1186     }
1187   }
1188 
1189   // do {
1190   //   tmp_value = [tmp_ptr] - expected;
1191   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
1192   // result = tmp_value != 0;
1193 
1194   vixl::aarch64::Label loop_head, exit_loop;
1195   __ Bind(&loop_head);
1196   __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
1197   __ Cmp(tmp_value, expected);
1198   __ B(&exit_loop, ne);
1199   __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
1200   __ Cbnz(tmp_32, &loop_head);
1201   __ Bind(&exit_loop);
1202   __ Cset(out, eq);
1203 
1204   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1205     codegen->GetAssembler()->UnpoisonHeapReference(expected);
1206     if (value.Is(expected)) {
1207       // Do not unpoison `value`, as it is the same register as
1208       // `expected`, which has just been unpoisoned.
1209     } else {
1210       codegen->GetAssembler()->UnpoisonHeapReference(value);
1211     }
1212   }
1213 }
1214 
VisitUnsafeCASInt(HInvoke * invoke)1215 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1216   CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimInt);
1217 }
VisitUnsafeCASLong(HInvoke * invoke)1218 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1219   CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong);
1220 }
VisitUnsafeCASObject(HInvoke * invoke)1221 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1222   // The only read barrier implementation supporting the
1223   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1224   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1225     return;
1226   }
1227 
1228   CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimNot);
1229 }
1230 
VisitUnsafeCASInt(HInvoke * invoke)1231 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1232   GenCas(invoke, Primitive::kPrimInt, codegen_);
1233 }
VisitUnsafeCASLong(HInvoke * invoke)1234 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1235   GenCas(invoke, Primitive::kPrimLong, codegen_);
1236 }
VisitUnsafeCASObject(HInvoke * invoke)1237 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1238   // The only read barrier implementation supporting the
1239   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1240   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1241 
1242   GenCas(invoke, Primitive::kPrimNot, codegen_);
1243 }
1244 
VisitStringCompareTo(HInvoke * invoke)1245 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
1246   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1247                                                             invoke->InputAt(1)->CanBeNull()
1248                                                                 ? LocationSummary::kCallOnSlowPath
1249                                                                 : LocationSummary::kNoCall,
1250                                                             kIntrinsified);
1251   locations->SetInAt(0, Location::RequiresRegister());
1252   locations->SetInAt(1, Location::RequiresRegister());
1253   locations->AddTemp(Location::RequiresRegister());
1254   locations->AddTemp(Location::RequiresRegister());
1255   locations->AddTemp(Location::RequiresRegister());
1256   // Need temporary registers for String compression's feature.
1257   if (mirror::kUseStringCompression) {
1258     locations->AddTemp(Location::RequiresRegister());
1259   }
1260   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1261 }
1262 
VisitStringCompareTo(HInvoke * invoke)1263 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
1264   MacroAssembler* masm = GetVIXLAssembler();
1265   LocationSummary* locations = invoke->GetLocations();
1266 
1267   Register str = InputRegisterAt(invoke, 0);
1268   Register arg = InputRegisterAt(invoke, 1);
1269   DCHECK(str.IsW());
1270   DCHECK(arg.IsW());
1271   Register out = OutputRegister(invoke);
1272 
1273   Register temp0 = WRegisterFrom(locations->GetTemp(0));
1274   Register temp1 = WRegisterFrom(locations->GetTemp(1));
1275   Register temp2 = WRegisterFrom(locations->GetTemp(2));
1276   Register temp3;
1277   if (mirror::kUseStringCompression) {
1278     temp3 = WRegisterFrom(locations->GetTemp(3));
1279   }
1280 
1281   vixl::aarch64::Label loop;
1282   vixl::aarch64::Label find_char_diff;
1283   vixl::aarch64::Label end;
1284   vixl::aarch64::Label different_compression;
1285 
1286   // Get offsets of count and value fields within a string object.
1287   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1288   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1289 
1290   // Note that the null check must have been done earlier.
1291   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1292 
1293   // Take slow path and throw if input can be and is null.
1294   SlowPathCodeARM64* slow_path = nullptr;
1295   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1296   if (can_slow_path) {
1297     slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
1298     codegen_->AddSlowPath(slow_path);
1299     __ Cbz(arg, slow_path->GetEntryLabel());
1300   }
1301 
1302   // Reference equality check, return 0 if same reference.
1303   __ Subs(out, str, arg);
1304   __ B(&end, eq);
1305 
1306   if (mirror::kUseStringCompression) {
1307     // Load `count` fields of this and argument strings.
1308     __ Ldr(temp3, HeapOperand(str, count_offset));
1309     __ Ldr(temp2, HeapOperand(arg, count_offset));
1310     // Clean out compression flag from lengths.
1311     __ Lsr(temp0, temp3, 1u);
1312     __ Lsr(temp1, temp2, 1u);
1313   } else {
1314     // Load lengths of this and argument strings.
1315     __ Ldr(temp0, HeapOperand(str, count_offset));
1316     __ Ldr(temp1, HeapOperand(arg, count_offset));
1317   }
1318   // out = length diff.
1319   __ Subs(out, temp0, temp1);
1320   // temp0 = min(len(str), len(arg)).
1321   __ Csel(temp0, temp1, temp0, ge);
1322   // Shorter string is empty?
1323   __ Cbz(temp0, &end);
1324 
1325   if (mirror::kUseStringCompression) {
1326     // Check if both strings using same compression style to use this comparison loop.
1327     __ Eor(temp2, temp2, Operand(temp3));
1328     // Interleave with compression flag extraction which is needed for both paths
1329     // and also set flags which is needed only for the different compressions path.
1330     __ Ands(temp3.W(), temp3.W(), Operand(1));
1331     __ Tbnz(temp2, 0, &different_compression);  // Does not use flags.
1332   }
1333   // Store offset of string value in preparation for comparison loop.
1334   __ Mov(temp1, value_offset);
1335   if (mirror::kUseStringCompression) {
1336     // For string compression, calculate the number of bytes to compare (not chars).
1337     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1338     __ Lsl(temp0, temp0, temp3);
1339   }
1340 
1341   UseScratchRegisterScope scratch_scope(masm);
1342   Register temp4 = scratch_scope.AcquireX();
1343 
1344   // Assertions that must hold in order to compare strings 8 bytes at a time.
1345   DCHECK_ALIGNED(value_offset, 8);
1346   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1347 
1348   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1349   DCHECK_EQ(char_size, 2u);
1350 
1351   // Promote temp2 to an X reg, ready for LDR.
1352   temp2 = temp2.X();
1353 
1354   // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
1355   __ Bind(&loop);
1356   __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
1357   __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
1358   __ Cmp(temp4, temp2);
1359   __ B(ne, &find_char_diff);
1360   __ Add(temp1, temp1, char_size * 4);
1361   // With string compression, we have compared 8 bytes, otherwise 4 chars.
1362   __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
1363   __ B(&loop, hi);
1364   __ B(&end);
1365 
1366   // Promote temp1 to an X reg, ready for EOR.
1367   temp1 = temp1.X();
1368 
1369   // Find the single character difference.
1370   __ Bind(&find_char_diff);
1371   // Get the bit position of the first character that differs.
1372   __ Eor(temp1, temp2, temp4);
1373   __ Rbit(temp1, temp1);
1374   __ Clz(temp1, temp1);
1375 
1376   // If the number of chars remaining <= the index where the difference occurs (0-3), then
1377   // the difference occurs outside the remaining string data, so just return length diff (out).
1378   // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
1379   // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
1380   // unsigned when string compression is disabled.
1381   // When it's enabled, the comparison must be unsigned.
1382   __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
1383   __ B(ls, &end);
1384 
1385   // Extract the characters and calculate the difference.
1386   if (mirror:: kUseStringCompression) {
1387     __ Bic(temp1, temp1, 0x7);
1388     __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
1389   } else {
1390     __ Bic(temp1, temp1, 0xf);
1391   }
1392   __ Lsr(temp2, temp2, temp1);
1393   __ Lsr(temp4, temp4, temp1);
1394   if (mirror::kUseStringCompression) {
1395     // Prioritize the case of compressed strings and calculate such result first.
1396     __ Uxtb(temp1, temp4);
1397     __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
1398     __ Tbz(temp3, 0u, &end);  // If actually compressed, we're done.
1399   }
1400   __ Uxth(temp4, temp4);
1401   __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
1402 
1403   if (mirror::kUseStringCompression) {
1404     __ B(&end);
1405     __ Bind(&different_compression);
1406 
1407     // Comparison for different compression style.
1408     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1409     DCHECK_EQ(c_char_size, 1u);
1410     temp1 = temp1.W();
1411     temp2 = temp2.W();
1412     temp4 = temp4.W();
1413 
1414     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1415     // Note that flags have been set by the `str` compression flag extraction to `temp3`
1416     // before branching to the `different_compression` label.
1417     __ Csel(temp1, str, arg, eq);   // Pointer to the compressed string.
1418     __ Csel(temp2, str, arg, ne);   // Pointer to the uncompressed string.
1419 
1420     // We want to free up the temp3, currently holding `str` compression flag, for comparison.
1421     // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
1422     // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
1423     // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1424     __ Lsl(temp0, temp0, 1u);
1425 
1426     // Adjust temp1 and temp2 from string pointers to data pointers.
1427     __ Add(temp1, temp1, Operand(value_offset));
1428     __ Add(temp2, temp2, Operand(value_offset));
1429 
1430     // Complete the move of the compression flag.
1431     __ Sub(temp0, temp0, Operand(temp3));
1432 
1433     vixl::aarch64::Label different_compression_loop;
1434     vixl::aarch64::Label different_compression_diff;
1435 
1436     __ Bind(&different_compression_loop);
1437     __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
1438     __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
1439     __ Subs(temp4, temp4, Operand(temp3));
1440     __ B(&different_compression_diff, ne);
1441     __ Subs(temp0, temp0, 2);
1442     __ B(&different_compression_loop, hi);
1443     __ B(&end);
1444 
1445     // Calculate the difference.
1446     __ Bind(&different_compression_diff);
1447     __ Tst(temp0, Operand(1));
1448     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1449                   "Expecting 0=compressed, 1=uncompressed");
1450     __ Cneg(out, temp4, ne);
1451   }
1452 
1453   __ Bind(&end);
1454 
1455   if (can_slow_path) {
1456     __ Bind(slow_path->GetExitLabel());
1457   }
1458 }
1459 
1460 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1461 // The normal loop plus the pre-header is 9 instructions without string compression and 12
1462 // instructions with string compression. We can compare up to 8 bytes in 4 instructions
1463 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
1464 // to 10 instructions for the unrolled loop.
1465 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
1466 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1467 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1468   if (candidate->IsLoadString()) {
1469     HLoadString* load_string = candidate->AsLoadString();
1470     const DexFile& dex_file = load_string->GetDexFile();
1471     return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1472   }
1473   return nullptr;
1474 }
1475 
VisitStringEquals(HInvoke * invoke)1476 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
1477   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1478                                                             LocationSummary::kNoCall,
1479                                                             kIntrinsified);
1480   locations->SetInAt(0, Location::RequiresRegister());
1481   locations->SetInAt(1, Location::RequiresRegister());
1482 
1483   // For the generic implementation and for long const strings we need a temporary.
1484   // We do not need it for short const strings, up to 8 bytes, see code generation below.
1485   uint32_t const_string_length = 0u;
1486   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1487   if (const_string == nullptr) {
1488     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1489   }
1490   bool is_compressed =
1491       mirror::kUseStringCompression &&
1492       const_string != nullptr &&
1493       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1494   if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
1495     locations->AddTemp(Location::RequiresRegister());
1496   }
1497 
1498   // TODO: If the String.equals() is used only for an immediately following HIf, we can
1499   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1500   // Then we shall need an extra temporary register instead of the output register.
1501   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1502 }
1503 
VisitStringEquals(HInvoke * invoke)1504 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
1505   MacroAssembler* masm = GetVIXLAssembler();
1506   LocationSummary* locations = invoke->GetLocations();
1507 
1508   Register str = WRegisterFrom(locations->InAt(0));
1509   Register arg = WRegisterFrom(locations->InAt(1));
1510   Register out = XRegisterFrom(locations->Out());
1511 
1512   UseScratchRegisterScope scratch_scope(masm);
1513   Register temp = scratch_scope.AcquireW();
1514   Register temp1 = scratch_scope.AcquireW();
1515 
1516   vixl::aarch64::Label loop;
1517   vixl::aarch64::Label end;
1518   vixl::aarch64::Label return_true;
1519   vixl::aarch64::Label return_false;
1520 
1521   // Get offsets of count, value, and class fields within a string object.
1522   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1523   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1524   const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1525 
1526   // Note that the null check must have been done earlier.
1527   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1528 
1529   StringEqualsOptimizations optimizations(invoke);
1530   if (!optimizations.GetArgumentNotNull()) {
1531     // Check if input is null, return false if it is.
1532     __ Cbz(arg, &return_false);
1533   }
1534 
1535   // Reference equality check, return true if same reference.
1536   __ Cmp(str, arg);
1537   __ B(&return_true, eq);
1538 
1539   if (!optimizations.GetArgumentIsString()) {
1540     // Instanceof check for the argument by comparing class fields.
1541     // All string objects must have the same type since String cannot be subclassed.
1542     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1543     // If the argument is a string object, its class field must be equal to receiver's class field.
1544     __ Ldr(temp, MemOperand(str.X(), class_offset));
1545     __ Ldr(temp1, MemOperand(arg.X(), class_offset));
1546     __ Cmp(temp, temp1);
1547     __ B(&return_false, ne);
1548   }
1549 
1550   // Check if one of the inputs is a const string. Do not special-case both strings
1551   // being const, such cases should be handled by constant folding if needed.
1552   uint32_t const_string_length = 0u;
1553   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1554   if (const_string == nullptr) {
1555     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1556     if (const_string != nullptr) {
1557       std::swap(str, arg);  // Make sure the const string is in `str`.
1558     }
1559   }
1560   bool is_compressed =
1561       mirror::kUseStringCompression &&
1562       const_string != nullptr &&
1563       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1564 
1565   if (const_string != nullptr) {
1566     // Load `count` field of the argument string and check if it matches the const string.
1567     // Also compares the compression style, if differs return false.
1568     __ Ldr(temp, MemOperand(arg.X(), count_offset));
1569     // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
1570     scratch_scope.Release(temp1);
1571     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1572     temp1 = scratch_scope.AcquireW();
1573     __ B(&return_false, ne);
1574   } else {
1575     // Load `count` fields of this and argument strings.
1576     __ Ldr(temp, MemOperand(str.X(), count_offset));
1577     __ Ldr(temp1, MemOperand(arg.X(), count_offset));
1578     // Check if `count` fields are equal, return false if they're not.
1579     // Also compares the compression style, if differs return false.
1580     __ Cmp(temp, temp1);
1581     __ B(&return_false, ne);
1582   }
1583 
1584   // Assertions that must hold in order to compare strings 8 bytes at a time.
1585   DCHECK_ALIGNED(value_offset, 8);
1586   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1587 
1588   if (const_string != nullptr &&
1589       const_string_length < (is_compressed ? kShortConstStringEqualsCutoffInBytes
1590                                            : kShortConstStringEqualsCutoffInBytes / 2u)) {
1591     // Load and compare the contents. Though we know the contents of the short const string
1592     // at compile time, materializing constants may be more code than loading from memory.
1593     int32_t offset = value_offset;
1594     size_t remaining_bytes =
1595         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
1596     temp = temp.X();
1597     temp1 = temp1.X();
1598     while (remaining_bytes > 8u) {
1599       Register temp2 = XRegisterFrom(locations->GetTemp(0));
1600       __ Ldp(temp, temp1, MemOperand(str.X(), offset));
1601       __ Ldp(temp2, out, MemOperand(arg.X(), offset));
1602       __ Cmp(temp, temp2);
1603       __ Ccmp(temp1, out, NoFlag, eq);
1604       __ B(&return_false, ne);
1605       offset += 2u * sizeof(uint64_t);
1606       remaining_bytes -= 2u * sizeof(uint64_t);
1607     }
1608     if (remaining_bytes != 0u) {
1609       __ Ldr(temp, MemOperand(str.X(), offset));
1610       __ Ldr(temp1, MemOperand(arg.X(), offset));
1611       __ Cmp(temp, temp1);
1612       __ B(&return_false, ne);
1613     }
1614   } else {
1615     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1616     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1617                   "Expecting 0=compressed, 1=uncompressed");
1618     __ Cbz(temp, &return_true);
1619 
1620     if (mirror::kUseStringCompression) {
1621       // For string compression, calculate the number of bytes to compare (not chars).
1622       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1623       __ And(temp1, temp, Operand(1));    // Extract compression flag.
1624       __ Lsr(temp, temp, 1u);             // Extract length.
1625       __ Lsl(temp, temp, temp1);          // Calculate number of bytes to compare.
1626     }
1627 
1628     // Store offset of string value in preparation for comparison loop
1629     __ Mov(temp1, value_offset);
1630 
1631     temp1 = temp1.X();
1632     Register temp2 = XRegisterFrom(locations->GetTemp(0));
1633     // Loop to compare strings 8 bytes at a time starting at the front of the string.
1634     // Ok to do this because strings are zero-padded to kObjectAlignment.
1635     __ Bind(&loop);
1636     __ Ldr(out, MemOperand(str.X(), temp1));
1637     __ Ldr(temp2, MemOperand(arg.X(), temp1));
1638     __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
1639     __ Cmp(out, temp2);
1640     __ B(&return_false, ne);
1641     // With string compression, we have compared 8 bytes, otherwise 4 chars.
1642     __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
1643     __ B(&loop, hi);
1644   }
1645 
1646   // Return true and exit the function.
1647   // If loop does not result in returning false, we return true.
1648   __ Bind(&return_true);
1649   __ Mov(out, 1);
1650   __ B(&end);
1651 
1652   // Return false and exit the function.
1653   __ Bind(&return_false);
1654   __ Mov(out, 0);
1655   __ Bind(&end);
1656 }
1657 
GenerateVisitStringIndexOf(HInvoke * invoke,MacroAssembler * masm,CodeGeneratorARM64 * codegen,ArenaAllocator * allocator,bool start_at_zero)1658 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1659                                        MacroAssembler* masm,
1660                                        CodeGeneratorARM64* codegen,
1661                                        ArenaAllocator* allocator,
1662                                        bool start_at_zero) {
1663   LocationSummary* locations = invoke->GetLocations();
1664 
1665   // Note that the null check must have been done earlier.
1666   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1667 
1668   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1669   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1670   SlowPathCodeARM64* slow_path = nullptr;
1671   HInstruction* code_point = invoke->InputAt(1);
1672   if (code_point->IsIntConstant()) {
1673     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
1674       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1675       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1676       slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
1677       codegen->AddSlowPath(slow_path);
1678       __ B(slow_path->GetEntryLabel());
1679       __ Bind(slow_path->GetExitLabel());
1680       return;
1681     }
1682   } else if (code_point->GetType() != Primitive::kPrimChar) {
1683     Register char_reg = WRegisterFrom(locations->InAt(1));
1684     __ Tst(char_reg, 0xFFFF0000);
1685     slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
1686     codegen->AddSlowPath(slow_path);
1687     __ B(ne, slow_path->GetEntryLabel());
1688   }
1689 
1690   if (start_at_zero) {
1691     // Start-index = 0.
1692     Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
1693     __ Mov(tmp_reg, 0);
1694   }
1695 
1696   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1697   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1698 
1699   if (slow_path != nullptr) {
1700     __ Bind(slow_path->GetExitLabel());
1701   }
1702 }
1703 
VisitStringIndexOf(HInvoke * invoke)1704 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
1705   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1706                                                             LocationSummary::kCallOnMainAndSlowPath,
1707                                                             kIntrinsified);
1708   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1709   // best to align the inputs accordingly.
1710   InvokeRuntimeCallingConvention calling_convention;
1711   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1712   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1713   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
1714 
1715   // Need to send start_index=0.
1716   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1717 }
1718 
VisitStringIndexOf(HInvoke * invoke)1719 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
1720   GenerateVisitStringIndexOf(
1721       invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1722 }
1723 
VisitStringIndexOfAfter(HInvoke * invoke)1724 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1725   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1726                                                             LocationSummary::kCallOnMainAndSlowPath,
1727                                                             kIntrinsified);
1728   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1729   // best to align the inputs accordingly.
1730   InvokeRuntimeCallingConvention calling_convention;
1731   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1732   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1733   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1734   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
1735 }
1736 
VisitStringIndexOfAfter(HInvoke * invoke)1737 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1738   GenerateVisitStringIndexOf(
1739       invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1740 }
1741 
VisitStringNewStringFromBytes(HInvoke * invoke)1742 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1743   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1744                                                             LocationSummary::kCallOnMainAndSlowPath,
1745                                                             kIntrinsified);
1746   InvokeRuntimeCallingConvention calling_convention;
1747   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1748   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1749   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1750   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1751   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
1752 }
1753 
VisitStringNewStringFromBytes(HInvoke * invoke)1754 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1755   MacroAssembler* masm = GetVIXLAssembler();
1756   LocationSummary* locations = invoke->GetLocations();
1757 
1758   Register byte_array = WRegisterFrom(locations->InAt(0));
1759   __ Cmp(byte_array, 0);
1760   SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
1761   codegen_->AddSlowPath(slow_path);
1762   __ B(eq, slow_path->GetEntryLabel());
1763 
1764   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1765   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1766   __ Bind(slow_path->GetExitLabel());
1767 }
1768 
VisitStringNewStringFromChars(HInvoke * invoke)1769 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1770   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1771                                                             LocationSummary::kCallOnMainOnly,
1772                                                             kIntrinsified);
1773   InvokeRuntimeCallingConvention calling_convention;
1774   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1775   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1776   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1777   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
1778 }
1779 
VisitStringNewStringFromChars(HInvoke * invoke)1780 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1781   // No need to emit code checking whether `locations->InAt(2)` is a null
1782   // pointer, as callers of the native method
1783   //
1784   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1785   //
1786   // all include a null check on `data` before calling that method.
1787   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1788   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1789 }
1790 
VisitStringNewStringFromString(HInvoke * invoke)1791 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1792   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1793                                                             LocationSummary::kCallOnMainAndSlowPath,
1794                                                             kIntrinsified);
1795   InvokeRuntimeCallingConvention calling_convention;
1796   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1797   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
1798 }
1799 
VisitStringNewStringFromString(HInvoke * invoke)1800 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1801   MacroAssembler* masm = GetVIXLAssembler();
1802   LocationSummary* locations = invoke->GetLocations();
1803 
1804   Register string_to_copy = WRegisterFrom(locations->InAt(0));
1805   __ Cmp(string_to_copy, 0);
1806   SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
1807   codegen_->AddSlowPath(slow_path);
1808   __ B(eq, slow_path->GetEntryLabel());
1809 
1810   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1811   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1812   __ Bind(slow_path->GetExitLabel());
1813 }
1814 
CreateFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)1815 static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
1816   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1817   DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1818   DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
1819 
1820   LocationSummary* const locations = new (arena) LocationSummary(invoke,
1821                                                                  LocationSummary::kCallOnMainOnly,
1822                                                                  kIntrinsified);
1823   InvokeRuntimeCallingConvention calling_convention;
1824 
1825   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1826   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1827 }
1828 
CreateFPFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)1829 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
1830   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1831   DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1832   DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(1)->GetType()));
1833   DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
1834 
1835   LocationSummary* const locations = new (arena) LocationSummary(invoke,
1836                                                                  LocationSummary::kCallOnMainOnly,
1837                                                                  kIntrinsified);
1838   InvokeRuntimeCallingConvention calling_convention;
1839 
1840   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1841   locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
1842   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1843 }
1844 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorARM64 * codegen,QuickEntrypointEnum entry)1845 static void GenFPToFPCall(HInvoke* invoke,
1846                           CodeGeneratorARM64* codegen,
1847                           QuickEntrypointEnum entry) {
1848   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1849 }
1850 
VisitMathCos(HInvoke * invoke)1851 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
1852   CreateFPToFPCallLocations(arena_, invoke);
1853 }
1854 
VisitMathCos(HInvoke * invoke)1855 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
1856   GenFPToFPCall(invoke, codegen_, kQuickCos);
1857 }
1858 
VisitMathSin(HInvoke * invoke)1859 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
1860   CreateFPToFPCallLocations(arena_, invoke);
1861 }
1862 
VisitMathSin(HInvoke * invoke)1863 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
1864   GenFPToFPCall(invoke, codegen_, kQuickSin);
1865 }
1866 
VisitMathAcos(HInvoke * invoke)1867 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
1868   CreateFPToFPCallLocations(arena_, invoke);
1869 }
1870 
VisitMathAcos(HInvoke * invoke)1871 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
1872   GenFPToFPCall(invoke, codegen_, kQuickAcos);
1873 }
1874 
VisitMathAsin(HInvoke * invoke)1875 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
1876   CreateFPToFPCallLocations(arena_, invoke);
1877 }
1878 
VisitMathAsin(HInvoke * invoke)1879 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
1880   GenFPToFPCall(invoke, codegen_, kQuickAsin);
1881 }
1882 
VisitMathAtan(HInvoke * invoke)1883 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
1884   CreateFPToFPCallLocations(arena_, invoke);
1885 }
1886 
VisitMathAtan(HInvoke * invoke)1887 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
1888   GenFPToFPCall(invoke, codegen_, kQuickAtan);
1889 }
1890 
VisitMathCbrt(HInvoke * invoke)1891 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
1892   CreateFPToFPCallLocations(arena_, invoke);
1893 }
1894 
VisitMathCbrt(HInvoke * invoke)1895 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
1896   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
1897 }
1898 
VisitMathCosh(HInvoke * invoke)1899 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
1900   CreateFPToFPCallLocations(arena_, invoke);
1901 }
1902 
VisitMathCosh(HInvoke * invoke)1903 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
1904   GenFPToFPCall(invoke, codegen_, kQuickCosh);
1905 }
1906 
VisitMathExp(HInvoke * invoke)1907 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
1908   CreateFPToFPCallLocations(arena_, invoke);
1909 }
1910 
VisitMathExp(HInvoke * invoke)1911 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
1912   GenFPToFPCall(invoke, codegen_, kQuickExp);
1913 }
1914 
VisitMathExpm1(HInvoke * invoke)1915 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
1916   CreateFPToFPCallLocations(arena_, invoke);
1917 }
1918 
VisitMathExpm1(HInvoke * invoke)1919 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
1920   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
1921 }
1922 
VisitMathLog(HInvoke * invoke)1923 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
1924   CreateFPToFPCallLocations(arena_, invoke);
1925 }
1926 
VisitMathLog(HInvoke * invoke)1927 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
1928   GenFPToFPCall(invoke, codegen_, kQuickLog);
1929 }
1930 
VisitMathLog10(HInvoke * invoke)1931 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
1932   CreateFPToFPCallLocations(arena_, invoke);
1933 }
1934 
VisitMathLog10(HInvoke * invoke)1935 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
1936   GenFPToFPCall(invoke, codegen_, kQuickLog10);
1937 }
1938 
VisitMathSinh(HInvoke * invoke)1939 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
1940   CreateFPToFPCallLocations(arena_, invoke);
1941 }
1942 
VisitMathSinh(HInvoke * invoke)1943 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
1944   GenFPToFPCall(invoke, codegen_, kQuickSinh);
1945 }
1946 
VisitMathTan(HInvoke * invoke)1947 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
1948   CreateFPToFPCallLocations(arena_, invoke);
1949 }
1950 
VisitMathTan(HInvoke * invoke)1951 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
1952   GenFPToFPCall(invoke, codegen_, kQuickTan);
1953 }
1954 
VisitMathTanh(HInvoke * invoke)1955 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
1956   CreateFPToFPCallLocations(arena_, invoke);
1957 }
1958 
VisitMathTanh(HInvoke * invoke)1959 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
1960   GenFPToFPCall(invoke, codegen_, kQuickTanh);
1961 }
1962 
VisitMathAtan2(HInvoke * invoke)1963 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
1964   CreateFPFPToFPCallLocations(arena_, invoke);
1965 }
1966 
VisitMathAtan2(HInvoke * invoke)1967 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
1968   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1969 }
1970 
VisitMathHypot(HInvoke * invoke)1971 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
1972   CreateFPFPToFPCallLocations(arena_, invoke);
1973 }
1974 
VisitMathHypot(HInvoke * invoke)1975 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
1976   GenFPToFPCall(invoke, codegen_, kQuickHypot);
1977 }
1978 
VisitMathNextAfter(HInvoke * invoke)1979 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
1980   CreateFPFPToFPCallLocations(arena_, invoke);
1981 }
1982 
VisitMathNextAfter(HInvoke * invoke)1983 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
1984   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1985 }
1986 
VisitStringGetCharsNoCheck(HInvoke * invoke)1987 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1988   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1989                                                             LocationSummary::kNoCall,
1990                                                             kIntrinsified);
1991   locations->SetInAt(0, Location::RequiresRegister());
1992   locations->SetInAt(1, Location::RequiresRegister());
1993   locations->SetInAt(2, Location::RequiresRegister());
1994   locations->SetInAt(3, Location::RequiresRegister());
1995   locations->SetInAt(4, Location::RequiresRegister());
1996 
1997   locations->AddTemp(Location::RequiresRegister());
1998   locations->AddTemp(Location::RequiresRegister());
1999   locations->AddTemp(Location::RequiresRegister());
2000 }
2001 
VisitStringGetCharsNoCheck(HInvoke * invoke)2002 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2003   MacroAssembler* masm = GetVIXLAssembler();
2004   LocationSummary* locations = invoke->GetLocations();
2005 
2006   // Check assumption that sizeof(Char) is 2 (used in scaling below).
2007   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2008   DCHECK_EQ(char_size, 2u);
2009 
2010   // Location of data in char array buffer.
2011   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2012 
2013   // Location of char array data in string.
2014   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2015 
2016   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2017   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2018   Register srcObj = XRegisterFrom(locations->InAt(0));
2019   Register srcBegin = XRegisterFrom(locations->InAt(1));
2020   Register srcEnd = XRegisterFrom(locations->InAt(2));
2021   Register dstObj = XRegisterFrom(locations->InAt(3));
2022   Register dstBegin = XRegisterFrom(locations->InAt(4));
2023 
2024   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
2025   Register num_chr = XRegisterFrom(locations->GetTemp(1));
2026   Register tmp1 = XRegisterFrom(locations->GetTemp(2));
2027 
2028   UseScratchRegisterScope temps(masm);
2029   Register dst_ptr = temps.AcquireX();
2030   Register tmp2 = temps.AcquireX();
2031 
2032   vixl::aarch64::Label done;
2033   vixl::aarch64::Label compressed_string_loop;
2034   __ Sub(num_chr, srcEnd, srcBegin);
2035   // Early out for valid zero-length retrievals.
2036   __ Cbz(num_chr, &done);
2037 
2038   // dst address start to copy to.
2039   __ Add(dst_ptr, dstObj, Operand(data_offset));
2040   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
2041 
2042   // src address to copy from.
2043   __ Add(src_ptr, srcObj, Operand(value_offset));
2044   vixl::aarch64::Label compressed_string_preloop;
2045   if (mirror::kUseStringCompression) {
2046     // Location of count in string.
2047     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2048     // String's length.
2049     __ Ldr(tmp2, MemOperand(srcObj, count_offset));
2050     __ Tbz(tmp2, 0, &compressed_string_preloop);
2051   }
2052   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
2053 
2054   // Do the copy.
2055   vixl::aarch64::Label loop;
2056   vixl::aarch64::Label remainder;
2057 
2058   // Save repairing the value of num_chr on the < 8 character path.
2059   __ Subs(tmp1, num_chr, 8);
2060   __ B(lt, &remainder);
2061 
2062   // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2063   __ Mov(num_chr, tmp1);
2064 
2065   // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
2066   // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
2067   __ Bind(&loop);
2068   __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
2069   __ Subs(num_chr, num_chr, 8);
2070   __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
2071   __ B(ge, &loop);
2072 
2073   __ Adds(num_chr, num_chr, 8);
2074   __ B(eq, &done);
2075 
2076   // Main loop for < 8 character case and remainder handling. Loads and stores one
2077   // 16-bit Java character at a time.
2078   __ Bind(&remainder);
2079   __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
2080   __ Subs(num_chr, num_chr, 1);
2081   __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2082   __ B(gt, &remainder);
2083   __ B(&done);
2084 
2085   if (mirror::kUseStringCompression) {
2086     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
2087     DCHECK_EQ(c_char_size, 1u);
2088     __ Bind(&compressed_string_preloop);
2089     __ Add(src_ptr, src_ptr, Operand(srcBegin));
2090     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2091     __ Bind(&compressed_string_loop);
2092     __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
2093     __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2094     __ Subs(num_chr, num_chr, Operand(1));
2095     __ B(gt, &compressed_string_loop);
2096   }
2097 
2098   __ Bind(&done);
2099 }
2100 
2101 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
2102 // implementation there for longer copy lengths.
2103 static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
2104 
SetSystemArrayCopyLocationRequires(LocationSummary * locations,uint32_t at,HInstruction * input)2105 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
2106                                                uint32_t at,
2107                                                HInstruction* input) {
2108   HIntConstant* const_input = input->AsIntConstant();
2109   if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
2110     locations->SetInAt(at, Location::RequiresRegister());
2111   } else {
2112     locations->SetInAt(at, Location::RegisterOrConstant(input));
2113   }
2114 }
2115 
VisitSystemArrayCopyChar(HInvoke * invoke)2116 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2117   // Check to see if we have known failures that will cause us to have to bail out
2118   // to the runtime, and just generate the runtime call directly.
2119   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2120   HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
2121 
2122   // The positions must be non-negative.
2123   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2124       (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2125     // We will have to fail anyways.
2126     return;
2127   }
2128 
2129   // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2130   // native implementation.
2131   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2132   if (length != nullptr) {
2133     int32_t len = length->GetValue();
2134     if (len < 0 || len > kSystemArrayCopyCharThreshold) {
2135       // Just call as normal.
2136       return;
2137     }
2138   }
2139 
2140   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
2141   LocationSummary* locations = new (allocator) LocationSummary(invoke,
2142                                                                LocationSummary::kCallOnSlowPath,
2143                                                                kIntrinsified);
2144   // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2145   locations->SetInAt(0, Location::RequiresRegister());
2146   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2147   locations->SetInAt(2, Location::RequiresRegister());
2148   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2149   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2150 
2151   locations->AddTemp(Location::RequiresRegister());
2152   locations->AddTemp(Location::RequiresRegister());
2153   locations->AddTemp(Location::RequiresRegister());
2154 }
2155 
CheckSystemArrayCopyPosition(MacroAssembler * masm,const Location & pos,const Register & input,const Location & length,SlowPathCodeARM64 * slow_path,const Register & temp,bool length_is_input_length=false)2156 static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
2157                                          const Location& pos,
2158                                          const Register& input,
2159                                          const Location& length,
2160                                          SlowPathCodeARM64* slow_path,
2161                                          const Register& temp,
2162                                          bool length_is_input_length = false) {
2163   const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2164   if (pos.IsConstant()) {
2165     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2166     if (pos_const == 0) {
2167       if (!length_is_input_length) {
2168         // Check that length(input) >= length.
2169         __ Ldr(temp, MemOperand(input, length_offset));
2170         __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt));
2171         __ B(slow_path->GetEntryLabel(), lt);
2172       }
2173     } else {
2174       // Check that length(input) >= pos.
2175       __ Ldr(temp, MemOperand(input, length_offset));
2176       __ Subs(temp, temp, pos_const);
2177       __ B(slow_path->GetEntryLabel(), lt);
2178 
2179       // Check that (length(input) - pos) >= length.
2180       __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt));
2181       __ B(slow_path->GetEntryLabel(), lt);
2182     }
2183   } else if (length_is_input_length) {
2184     // The only way the copy can succeed is if pos is zero.
2185     __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
2186   } else {
2187     // Check that pos >= 0.
2188     Register pos_reg = WRegisterFrom(pos);
2189     __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
2190 
2191     // Check that pos <= length(input) && (length(input) - pos) >= length.
2192     __ Ldr(temp, MemOperand(input, length_offset));
2193     __ Subs(temp, temp, pos_reg);
2194     // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
2195     __ Ccmp(temp, OperandFrom(length, Primitive::kPrimInt), NFlag, ge);
2196     __ B(slow_path->GetEntryLabel(), lt);
2197   }
2198 }
2199 
2200 // Compute base source address, base destination address, and end
2201 // source address for System.arraycopy* intrinsics in `src_base`,
2202 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(MacroAssembler * masm,Primitive::Type type,const Register & src,const Location & src_pos,const Register & dst,const Location & dst_pos,const Location & copy_length,const Register & src_base,const Register & dst_base,const Register & src_end)2203 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
2204                                         Primitive::Type type,
2205                                         const Register& src,
2206                                         const Location& src_pos,
2207                                         const Register& dst,
2208                                         const Location& dst_pos,
2209                                         const Location& copy_length,
2210                                         const Register& src_base,
2211                                         const Register& dst_base,
2212                                         const Register& src_end) {
2213   // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
2214   DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar)
2215       << "Unexpected element type: " << type;
2216   const int32_t element_size = Primitive::ComponentSize(type);
2217   const int32_t element_size_shift = Primitive::ComponentSizeShift(type);
2218   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2219 
2220   if (src_pos.IsConstant()) {
2221     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2222     __ Add(src_base, src, element_size * constant + data_offset);
2223   } else {
2224     __ Add(src_base, src, data_offset);
2225     __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
2226   }
2227 
2228   if (dst_pos.IsConstant()) {
2229     int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
2230     __ Add(dst_base, dst, element_size * constant + data_offset);
2231   } else {
2232     __ Add(dst_base, dst, data_offset);
2233     __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
2234   }
2235 
2236   if (copy_length.IsConstant()) {
2237     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2238     __ Add(src_end, src_base, element_size * constant);
2239   } else {
2240     __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
2241   }
2242 }
2243 
VisitSystemArrayCopyChar(HInvoke * invoke)2244 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2245   MacroAssembler* masm = GetVIXLAssembler();
2246   LocationSummary* locations = invoke->GetLocations();
2247   Register src = XRegisterFrom(locations->InAt(0));
2248   Location src_pos = locations->InAt(1);
2249   Register dst = XRegisterFrom(locations->InAt(2));
2250   Location dst_pos = locations->InAt(3);
2251   Location length = locations->InAt(4);
2252 
2253   SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
2254   codegen_->AddSlowPath(slow_path);
2255 
2256   // If source and destination are the same, take the slow path. Overlapping copy regions must be
2257   // copied in reverse and we can't know in all cases if it's needed.
2258   __ Cmp(src, dst);
2259   __ B(slow_path->GetEntryLabel(), eq);
2260 
2261   // Bail out if the source is null.
2262   __ Cbz(src, slow_path->GetEntryLabel());
2263 
2264   // Bail out if the destination is null.
2265   __ Cbz(dst, slow_path->GetEntryLabel());
2266 
2267   if (!length.IsConstant()) {
2268     // Merge the following two comparisons into one:
2269     //   If the length is negative, bail out (delegate to libcore's native implementation).
2270     //   If the length > 32 then (currently) prefer libcore's native implementation.
2271     __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
2272     __ B(slow_path->GetEntryLabel(), hi);
2273   } else {
2274     // We have already checked in the LocationsBuilder for the constant case.
2275     DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2276     DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
2277   }
2278 
2279   Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
2280   Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
2281   Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
2282 
2283   CheckSystemArrayCopyPosition(masm,
2284                                src_pos,
2285                                src,
2286                                length,
2287                                slow_path,
2288                                src_curr_addr,
2289                                false);
2290 
2291   CheckSystemArrayCopyPosition(masm,
2292                                dst_pos,
2293                                dst,
2294                                length,
2295                                slow_path,
2296                                src_curr_addr,
2297                                false);
2298 
2299   src_curr_addr = src_curr_addr.X();
2300   dst_curr_addr = dst_curr_addr.X();
2301   src_stop_addr = src_stop_addr.X();
2302 
2303   GenSystemArrayCopyAddresses(masm,
2304                               Primitive::kPrimChar,
2305                               src,
2306                               src_pos,
2307                               dst,
2308                               dst_pos,
2309                               length,
2310                               src_curr_addr,
2311                               dst_curr_addr,
2312                               src_stop_addr);
2313 
2314   // Iterate over the arrays and do a raw copy of the chars.
2315   const int32_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2316   UseScratchRegisterScope temps(masm);
2317   Register tmp = temps.AcquireW();
2318   vixl::aarch64::Label loop, done;
2319   __ Bind(&loop);
2320   __ Cmp(src_curr_addr, src_stop_addr);
2321   __ B(&done, eq);
2322   __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
2323   __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
2324   __ B(&loop);
2325   __ Bind(&done);
2326 
2327   __ Bind(slow_path->GetExitLabel());
2328 }
2329 
2330 // We can choose to use the native implementation there for longer copy lengths.
2331 static constexpr int32_t kSystemArrayCopyThreshold = 128;
2332 
2333 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
2334 // We want to use two temporary registers in order to reduce the register pressure in arm64.
2335 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
VisitSystemArrayCopy(HInvoke * invoke)2336 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2337   // The only read barrier implementation supporting the
2338   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2339   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2340     return;
2341   }
2342 
2343   // Check to see if we have known failures that will cause us to have to bail out
2344   // to the runtime, and just generate the runtime call directly.
2345   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2346   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2347 
2348   // The positions must be non-negative.
2349   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2350       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
2351     // We will have to fail anyways.
2352     return;
2353   }
2354 
2355   // The length must be >= 0.
2356   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2357   if (length != nullptr) {
2358     int32_t len = length->GetValue();
2359     if (len < 0 || len >= kSystemArrayCopyThreshold) {
2360       // Just call as normal.
2361       return;
2362     }
2363   }
2364 
2365   SystemArrayCopyOptimizations optimizations(invoke);
2366 
2367   if (optimizations.GetDestinationIsSource()) {
2368     if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
2369       // We only support backward copying if source and destination are the same.
2370       return;
2371     }
2372   }
2373 
2374   if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
2375     // We currently don't intrinsify primitive copying.
2376     return;
2377   }
2378 
2379   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
2380   LocationSummary* locations = new (allocator) LocationSummary(invoke,
2381                                                                LocationSummary::kCallOnSlowPath,
2382                                                                kIntrinsified);
2383   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
2384   locations->SetInAt(0, Location::RequiresRegister());
2385   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2386   locations->SetInAt(2, Location::RequiresRegister());
2387   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2388   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2389 
2390   locations->AddTemp(Location::RequiresRegister());
2391   locations->AddTemp(Location::RequiresRegister());
2392   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2393     // Temporary register IP0, obtained from the VIXL scratch register
2394     // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
2395     // (because that register is clobbered by ReadBarrierMarkRegX
2396     // entry points). It cannot be used in calls to
2397     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
2398     // either. For these reasons, get a third extra temporary register
2399     // from the register allocator.
2400     locations->AddTemp(Location::RequiresRegister());
2401   } else {
2402     // Cases other than Baker read barriers: the third temporary will
2403     // be acquired from the VIXL scratch register pool.
2404   }
2405 }
2406 
VisitSystemArrayCopy(HInvoke * invoke)2407 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2408   // The only read barrier implementation supporting the
2409   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2410   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2411 
2412   MacroAssembler* masm = GetVIXLAssembler();
2413   LocationSummary* locations = invoke->GetLocations();
2414 
2415   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2416   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2417   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2418   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2419   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2420 
2421   Register src = XRegisterFrom(locations->InAt(0));
2422   Location src_pos = locations->InAt(1);
2423   Register dest = XRegisterFrom(locations->InAt(2));
2424   Location dest_pos = locations->InAt(3);
2425   Location length = locations->InAt(4);
2426   Register temp1 = WRegisterFrom(locations->GetTemp(0));
2427   Location temp1_loc = LocationFrom(temp1);
2428   Register temp2 = WRegisterFrom(locations->GetTemp(1));
2429   Location temp2_loc = LocationFrom(temp2);
2430 
2431   SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
2432   codegen_->AddSlowPath(intrinsic_slow_path);
2433 
2434   vixl::aarch64::Label conditions_on_positions_validated;
2435   SystemArrayCopyOptimizations optimizations(invoke);
2436 
2437   // If source and destination are the same, we go to slow path if we need to do
2438   // forward copying.
2439   if (src_pos.IsConstant()) {
2440     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2441     if (dest_pos.IsConstant()) {
2442       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2443       if (optimizations.GetDestinationIsSource()) {
2444         // Checked when building locations.
2445         DCHECK_GE(src_pos_constant, dest_pos_constant);
2446       } else if (src_pos_constant < dest_pos_constant) {
2447         __ Cmp(src, dest);
2448         __ B(intrinsic_slow_path->GetEntryLabel(), eq);
2449       }
2450       // Checked when building locations.
2451       DCHECK(!optimizations.GetDestinationIsSource()
2452              || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
2453     } else {
2454       if (!optimizations.GetDestinationIsSource()) {
2455         __ Cmp(src, dest);
2456         __ B(&conditions_on_positions_validated, ne);
2457       }
2458       __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
2459       __ B(intrinsic_slow_path->GetEntryLabel(), gt);
2460     }
2461   } else {
2462     if (!optimizations.GetDestinationIsSource()) {
2463       __ Cmp(src, dest);
2464       __ B(&conditions_on_positions_validated, ne);
2465     }
2466     __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
2467            OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
2468     __ B(intrinsic_slow_path->GetEntryLabel(), lt);
2469   }
2470 
2471   __ Bind(&conditions_on_positions_validated);
2472 
2473   if (!optimizations.GetSourceIsNotNull()) {
2474     // Bail out if the source is null.
2475     __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
2476   }
2477 
2478   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2479     // Bail out if the destination is null.
2480     __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
2481   }
2482 
2483   // We have already checked in the LocationsBuilder for the constant case.
2484   if (!length.IsConstant() &&
2485       !optimizations.GetCountIsSourceLength() &&
2486       !optimizations.GetCountIsDestinationLength()) {
2487     // Merge the following two comparisons into one:
2488     //   If the length is negative, bail out (delegate to libcore's native implementation).
2489     //   If the length >= 128 then (currently) prefer native implementation.
2490     __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
2491     __ B(intrinsic_slow_path->GetEntryLabel(), hs);
2492   }
2493   // Validity checks: source.
2494   CheckSystemArrayCopyPosition(masm,
2495                                src_pos,
2496                                src,
2497                                length,
2498                                intrinsic_slow_path,
2499                                temp1,
2500                                optimizations.GetCountIsSourceLength());
2501 
2502   // Validity checks: dest.
2503   CheckSystemArrayCopyPosition(masm,
2504                                dest_pos,
2505                                dest,
2506                                length,
2507                                intrinsic_slow_path,
2508                                temp1,
2509                                optimizations.GetCountIsDestinationLength());
2510   {
2511     // We use a block to end the scratch scope before the write barrier, thus
2512     // freeing the temporary registers so they can be used in `MarkGCCard`.
2513     UseScratchRegisterScope temps(masm);
2514     Location temp3_loc;  // Used only for Baker read barrier.
2515     Register temp3;
2516     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2517       temp3_loc = locations->GetTemp(2);
2518       temp3 = WRegisterFrom(temp3_loc);
2519     } else {
2520       temp3 = temps.AcquireW();
2521     }
2522 
2523     if (!optimizations.GetDoesNotNeedTypeCheck()) {
2524       // Check whether all elements of the source array are assignable to the component
2525       // type of the destination array. We do two checks: the classes are the same,
2526       // or the destination is Object[]. If none of these checks succeed, we go to the
2527       // slow path.
2528 
2529       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2530         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2531           // /* HeapReference<Class> */ temp1 = src->klass_
2532           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2533                                                           temp1_loc,
2534                                                           src.W(),
2535                                                           class_offset,
2536                                                           temp3_loc,
2537                                                           /* needs_null_check */ false,
2538                                                           /* use_load_acquire */ false);
2539           // Bail out if the source is not a non primitive array.
2540           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2541           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2542                                                           temp1_loc,
2543                                                           temp1,
2544                                                           component_offset,
2545                                                           temp3_loc,
2546                                                           /* needs_null_check */ false,
2547                                                           /* use_load_acquire */ false);
2548           __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
2549           // If heap poisoning is enabled, `temp1` has been unpoisoned
2550           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2551           // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2552           __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
2553           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2554           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2555         }
2556 
2557         // /* HeapReference<Class> */ temp1 = dest->klass_
2558         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2559                                                         temp1_loc,
2560                                                         dest.W(),
2561                                                         class_offset,
2562                                                         temp3_loc,
2563                                                         /* needs_null_check */ false,
2564                                                         /* use_load_acquire */ false);
2565 
2566         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2567           // Bail out if the destination is not a non primitive array.
2568           //
2569           // Register `temp1` is not trashed by the read barrier emitted
2570           // by GenerateFieldLoadWithBakerReadBarrier below, as that
2571           // method produces a call to a ReadBarrierMarkRegX entry point,
2572           // which saves all potentially live registers, including
2573           // temporaries such a `temp1`.
2574           // /* HeapReference<Class> */ temp2 = temp1->component_type_
2575           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2576                                                           temp2_loc,
2577                                                           temp1,
2578                                                           component_offset,
2579                                                           temp3_loc,
2580                                                           /* needs_null_check */ false,
2581                                                           /* use_load_acquire */ false);
2582           __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2583           // If heap poisoning is enabled, `temp2` has been unpoisoned
2584           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2585           // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2586           __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2587           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2588           __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2589         }
2590 
2591         // For the same reason given earlier, `temp1` is not trashed by the
2592         // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2593         // /* HeapReference<Class> */ temp2 = src->klass_
2594         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2595                                                         temp2_loc,
2596                                                         src.W(),
2597                                                         class_offset,
2598                                                         temp3_loc,
2599                                                         /* needs_null_check */ false,
2600                                                         /* use_load_acquire */ false);
2601         // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2602         __ Cmp(temp1, temp2);
2603 
2604         if (optimizations.GetDestinationIsTypedObjectArray()) {
2605           vixl::aarch64::Label do_copy;
2606           __ B(&do_copy, eq);
2607           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2608           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2609                                                           temp1_loc,
2610                                                           temp1,
2611                                                           component_offset,
2612                                                           temp3_loc,
2613                                                           /* needs_null_check */ false,
2614                                                           /* use_load_acquire */ false);
2615           // /* HeapReference<Class> */ temp1 = temp1->super_class_
2616           // We do not need to emit a read barrier for the following
2617           // heap reference load, as `temp1` is only used in a
2618           // comparison with null below, and this reference is not
2619           // kept afterwards.
2620           __ Ldr(temp1, HeapOperand(temp1, super_offset));
2621           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2622           __ Bind(&do_copy);
2623         } else {
2624           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2625         }
2626       } else {
2627         // Non read barrier code.
2628 
2629         // /* HeapReference<Class> */ temp1 = dest->klass_
2630         __ Ldr(temp1, MemOperand(dest, class_offset));
2631         // /* HeapReference<Class> */ temp2 = src->klass_
2632         __ Ldr(temp2, MemOperand(src, class_offset));
2633         bool did_unpoison = false;
2634         if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2635             !optimizations.GetSourceIsNonPrimitiveArray()) {
2636           // One or two of the references need to be unpoisoned. Unpoison them
2637           // both to make the identity check valid.
2638           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2639           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2640           did_unpoison = true;
2641         }
2642 
2643         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2644           // Bail out if the destination is not a non primitive array.
2645           // /* HeapReference<Class> */ temp3 = temp1->component_type_
2646           __ Ldr(temp3, HeapOperand(temp1, component_offset));
2647           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2648           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2649           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2650           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2651           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2652           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2653         }
2654 
2655         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2656           // Bail out if the source is not a non primitive array.
2657           // /* HeapReference<Class> */ temp3 = temp2->component_type_
2658           __ Ldr(temp3, HeapOperand(temp2, component_offset));
2659           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2660           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2661           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2662           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2663           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2664           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2665         }
2666 
2667         __ Cmp(temp1, temp2);
2668 
2669         if (optimizations.GetDestinationIsTypedObjectArray()) {
2670           vixl::aarch64::Label do_copy;
2671           __ B(&do_copy, eq);
2672           if (!did_unpoison) {
2673             codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2674           }
2675           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2676           __ Ldr(temp1, HeapOperand(temp1, component_offset));
2677           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2678           // /* HeapReference<Class> */ temp1 = temp1->super_class_
2679           __ Ldr(temp1, HeapOperand(temp1, super_offset));
2680           // No need to unpoison the result, we're comparing against null.
2681           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2682           __ Bind(&do_copy);
2683         } else {
2684           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2685         }
2686       }
2687     } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2688       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2689       // Bail out if the source is not a non primitive array.
2690       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2691         // /* HeapReference<Class> */ temp1 = src->klass_
2692         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2693                                                         temp1_loc,
2694                                                         src.W(),
2695                                                         class_offset,
2696                                                         temp3_loc,
2697                                                         /* needs_null_check */ false,
2698                                                         /* use_load_acquire */ false);
2699         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2700         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2701                                                         temp2_loc,
2702                                                         temp1,
2703                                                         component_offset,
2704                                                         temp3_loc,
2705                                                         /* needs_null_check */ false,
2706                                                         /* use_load_acquire */ false);
2707         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2708         // If heap poisoning is enabled, `temp2` has been unpoisoned
2709         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2710       } else {
2711         // /* HeapReference<Class> */ temp1 = src->klass_
2712         __ Ldr(temp1, HeapOperand(src.W(), class_offset));
2713         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2714         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2715         __ Ldr(temp2, HeapOperand(temp1, component_offset));
2716         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2717         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2718       }
2719       // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2720       __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2721       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2722       __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2723     }
2724 
2725     if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
2726       // Null constant length: not need to emit the loop code at all.
2727     } else {
2728       Register src_curr_addr = temp1.X();
2729       Register dst_curr_addr = temp2.X();
2730       Register src_stop_addr = temp3.X();
2731       vixl::aarch64::Label done;
2732       const Primitive::Type type = Primitive::kPrimNot;
2733       const int32_t element_size = Primitive::ComponentSize(type);
2734 
2735       if (length.IsRegister()) {
2736         // Don't enter the copy loop if the length is null.
2737         __ Cbz(WRegisterFrom(length), &done);
2738       }
2739 
2740       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2741         // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2742 
2743         // SystemArrayCopy implementation for Baker read barriers (see
2744         // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
2745         //
2746         //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2747         //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2748         //   bool is_gray = (rb_state == ReadBarrier::GrayState());
2749         //   if (is_gray) {
2750         //     // Slow-path copy.
2751         //     do {
2752         //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2753         //     } while (src_ptr != end_ptr)
2754         //   } else {
2755         //     // Fast-path copy.
2756         //     do {
2757         //       *dest_ptr++ = *src_ptr++;
2758         //     } while (src_ptr != end_ptr)
2759         //   }
2760 
2761         // Make sure `tmp` is not IP0, as it is clobbered by
2762         // ReadBarrierMarkRegX entry points in
2763         // ReadBarrierSystemArrayCopySlowPathARM64.
2764         DCHECK(temps.IsAvailable(ip0));
2765         temps.Exclude(ip0);
2766         Register tmp = temps.AcquireW();
2767         DCHECK_NE(LocationFrom(tmp).reg(), IP0);
2768         // Put IP0 back in the pool so that VIXL has at least one
2769         // scratch register available to emit macro-instructions (note
2770         // that IP1 is already used for `tmp`). Indeed some
2771         // macro-instructions used in GenSystemArrayCopyAddresses
2772         // (invoked hereunder) may require a scratch register (for
2773         // instance to emit a load with a large constant offset).
2774         temps.Include(ip0);
2775 
2776         // /* int32_t */ monitor = src->monitor_
2777         __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
2778         // /* LockWord */ lock_word = LockWord(monitor)
2779         static_assert(sizeof(LockWord) == sizeof(int32_t),
2780                       "art::LockWord and int32_t have different sizes.");
2781 
2782         // Introduce a dependency on the lock_word including rb_state,
2783         // to prevent load-load reordering, and without using
2784         // a memory barrier (which would be more expensive).
2785         // `src` is unchanged by this operation, but its value now depends
2786         // on `tmp`.
2787         __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
2788 
2789         // Compute base source address, base destination address, and end
2790         // source address for System.arraycopy* intrinsics in `src_base`,
2791         // `dst_base` and `src_end` respectively.
2792         // Note that `src_curr_addr` is computed from from `src` (and
2793         // `src_pos`) here, and thus honors the artificial dependency
2794         // of `src` on `tmp`.
2795         GenSystemArrayCopyAddresses(masm,
2796                                     type,
2797                                     src,
2798                                     src_pos,
2799                                     dest,
2800                                     dest_pos,
2801                                     length,
2802                                     src_curr_addr,
2803                                     dst_curr_addr,
2804                                     src_stop_addr);
2805 
2806         // Slow path used to copy array when `src` is gray.
2807         SlowPathCodeARM64* read_barrier_slow_path =
2808             new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
2809         codegen_->AddSlowPath(read_barrier_slow_path);
2810 
2811         // Given the numeric representation, it's enough to check the low bit of the rb_state.
2812         static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2813         static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2814         __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
2815 
2816         // Fast-path copy.
2817         // Iterate over the arrays and do a raw copy of the objects. We don't need to
2818         // poison/unpoison.
2819         vixl::aarch64::Label loop;
2820         __ Bind(&loop);
2821         __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2822         __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2823         __ Cmp(src_curr_addr, src_stop_addr);
2824         __ B(&loop, ne);
2825 
2826         __ Bind(read_barrier_slow_path->GetExitLabel());
2827       } else {
2828         // Non read barrier code.
2829         // Compute base source address, base destination address, and end
2830         // source address for System.arraycopy* intrinsics in `src_base`,
2831         // `dst_base` and `src_end` respectively.
2832         GenSystemArrayCopyAddresses(masm,
2833                                     type,
2834                                     src,
2835                                     src_pos,
2836                                     dest,
2837                                     dest_pos,
2838                                     length,
2839                                     src_curr_addr,
2840                                     dst_curr_addr,
2841                                     src_stop_addr);
2842         // Iterate over the arrays and do a raw copy of the objects. We don't need to
2843         // poison/unpoison.
2844         vixl::aarch64::Label loop;
2845         __ Bind(&loop);
2846         {
2847           Register tmp = temps.AcquireW();
2848           __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2849           __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2850         }
2851         __ Cmp(src_curr_addr, src_stop_addr);
2852         __ B(&loop, ne);
2853       }
2854       __ Bind(&done);
2855     }
2856   }
2857 
2858   // We only need one card marking on the destination array.
2859   codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
2860 
2861   __ Bind(intrinsic_slow_path->GetExitLabel());
2862 }
2863 
GenIsInfinite(LocationSummary * locations,bool is64bit,MacroAssembler * masm)2864 static void GenIsInfinite(LocationSummary* locations,
2865                           bool is64bit,
2866                           MacroAssembler* masm) {
2867   Operand infinity;
2868   Register out;
2869 
2870   if (is64bit) {
2871     infinity = kPositiveInfinityDouble;
2872     out = XRegisterFrom(locations->Out());
2873   } else {
2874     infinity = kPositiveInfinityFloat;
2875     out = WRegisterFrom(locations->Out());
2876   }
2877 
2878   const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out);
2879 
2880   MoveFPToInt(locations, is64bit, masm);
2881   __ Eor(out, out, infinity);
2882   // We don't care about the sign bit, so shift left.
2883   __ Cmp(zero, Operand(out, LSL, 1));
2884   __ Cset(out, eq);
2885 }
2886 
VisitFloatIsInfinite(HInvoke * invoke)2887 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2888   CreateFPToIntLocations(arena_, invoke);
2889 }
2890 
VisitFloatIsInfinite(HInvoke * invoke)2891 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2892   GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
2893 }
2894 
VisitDoubleIsInfinite(HInvoke * invoke)2895 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2896   CreateFPToIntLocations(arena_, invoke);
2897 }
2898 
VisitDoubleIsInfinite(HInvoke * invoke)2899 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2900   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
2901 }
2902 
VisitReferenceGetReferent(HInvoke * invoke)2903 void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
2904   if (kEmitCompilerReadBarrier) {
2905     // Do not intrinsify this call with the read barrier configuration.
2906     return;
2907   }
2908   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2909                                                             LocationSummary::kCallOnSlowPath,
2910                                                             kIntrinsified);
2911   locations->SetInAt(0, Location::RequiresRegister());
2912   locations->SetOut(Location::SameAsFirstInput());
2913   locations->AddTemp(Location::RequiresRegister());
2914 }
2915 
VisitReferenceGetReferent(HInvoke * invoke)2916 void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
2917   DCHECK(!kEmitCompilerReadBarrier);
2918   MacroAssembler* masm = GetVIXLAssembler();
2919   LocationSummary* locations = invoke->GetLocations();
2920 
2921   Register obj = InputRegisterAt(invoke, 0);
2922   Register out = OutputRegister(invoke);
2923 
2924   SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
2925   codegen_->AddSlowPath(slow_path);
2926 
2927   // Load ArtMethod first.
2928   HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2929   DCHECK(invoke_direct != nullptr);
2930   Register temp0 = XRegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
2931                                  invoke_direct, locations->GetTemp(0)));
2932 
2933   // Now get declaring class.
2934   __ Ldr(temp0.W(), MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
2935 
2936   uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2937   uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2938   DCHECK_NE(slow_path_flag_offset, 0u);
2939   DCHECK_NE(disable_flag_offset, 0u);
2940   DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2941 
2942   // Check static flags that prevent using intrinsic.
2943   if (slow_path_flag_offset == disable_flag_offset + 1) {
2944     // Load two adjacent flags in one 64-bit load.
2945     __ Ldr(temp0, MemOperand(temp0, disable_flag_offset));
2946   } else {
2947     UseScratchRegisterScope temps(masm);
2948     Register temp1 = temps.AcquireW();
2949     __ Ldr(temp1.W(), MemOperand(temp0, disable_flag_offset));
2950     __ Ldr(temp0.W(), MemOperand(temp0, slow_path_flag_offset));
2951     __ Orr(temp0, temp1, temp0);
2952   }
2953   __ Cbnz(temp0, slow_path->GetEntryLabel());
2954 
2955   {
2956     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2957     vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2958     // Fast path.
2959     __ Ldr(out, HeapOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
2960     codegen_->MaybeRecordImplicitNullCheck(invoke);
2961   }
2962   codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
2963   __ Bind(slow_path->GetExitLabel());
2964 }
2965 
VisitIntegerValueOf(HInvoke * invoke)2966 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
2967   InvokeRuntimeCallingConvention calling_convention;
2968   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2969       invoke,
2970       codegen_,
2971       calling_convention.GetReturnLocation(Primitive::kPrimNot),
2972       Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
2973 }
2974 
VisitIntegerValueOf(HInvoke * invoke)2975 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
2976   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
2977   LocationSummary* locations = invoke->GetLocations();
2978   MacroAssembler* masm = GetVIXLAssembler();
2979 
2980   Register out = RegisterFrom(locations->Out(), Primitive::kPrimNot);
2981   UseScratchRegisterScope temps(masm);
2982   Register temp = temps.AcquireW();
2983   InvokeRuntimeCallingConvention calling_convention;
2984   Register argument = calling_convention.GetRegisterAt(0);
2985   if (invoke->InputAt(0)->IsConstant()) {
2986     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2987     if (value >= info.low && value <= info.high) {
2988       // Just embed the j.l.Integer in the code.
2989       ScopedObjectAccess soa(Thread::Current());
2990       mirror::Object* boxed = info.cache->Get(value + (-info.low));
2991       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
2992       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
2993       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
2994     } else {
2995       // Allocate and initialize a new j.l.Integer.
2996       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2997       // JIT object table.
2998       uint32_t address =
2999           dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3000       __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
3001       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3002       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3003       __ Mov(temp.W(), value);
3004       __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
3005       // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3006       // one.
3007       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3008     }
3009   } else {
3010     Register in = RegisterFrom(locations->InAt(0), Primitive::kPrimInt);
3011     // Check bounds of our cache.
3012     __ Add(out.W(), in.W(), -info.low);
3013     __ Cmp(out.W(), info.high - info.low + 1);
3014     vixl::aarch64::Label allocate, done;
3015     __ B(&allocate, hs);
3016     // If the value is within the bounds, load the j.l.Integer directly from the array.
3017     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3018     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3019     __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
3020     MemOperand source = HeapOperand(
3021         temp, out.X(), LSL, Primitive::ComponentSizeShift(Primitive::kPrimNot));
3022     codegen_->Load(Primitive::kPrimNot, out, source);
3023     codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
3024     __ B(&done);
3025     __ Bind(&allocate);
3026     // Otherwise allocate and initialize a new j.l.Integer.
3027     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3028     __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
3029     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3030     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3031     __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
3032     // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3033     // one.
3034     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3035     __ Bind(&done);
3036   }
3037 }
3038 
3039 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
3040 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
3041 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
3042 UNIMPLEMENTED_INTRINSIC(ARM64, LongLowestOneBit)
3043 
3044 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
3045 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
3046 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
3047 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
3048 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
3049 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend);
3050 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
3051 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
3052 
3053 // 1.8.
3054 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
3055 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
3056 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
3057 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
3058 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
3059 
3060 UNREACHABLE_INTRINSICS(ARM64)
3061 
3062 #undef __
3063 
3064 }  // namespace arm64
3065 }  // namespace art
3066