1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "arch/arm64/asm_support_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "art_method.h"
22 #include "base/bit_utils.h"
23 #include "base/bit_utils_iterator.h"
24 #include "class_table.h"
25 #include "code_generator_utils.h"
26 #include "compiled_method.h"
27 #include "entrypoints/quick/quick_entrypoints.h"
28 #include "entrypoints/quick/quick_entrypoints_enum.h"
29 #include "gc/accounting/card_table.h"
30 #include "heap_poisoning.h"
31 #include "intrinsics.h"
32 #include "intrinsics_arm64.h"
33 #include "linker/arm64/relative_patcher_arm64.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "offsets.h"
39 #include "thread.h"
40 #include "utils/arm64/assembler_arm64.h"
41 #include "utils/assembler.h"
42 #include "utils/stack_checks.h"
43 
44 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
45 using vixl::ExactAssemblyScope;
46 using vixl::CodeBufferCheckScope;
47 using vixl::EmissionCheckScope;
48 
49 #ifdef __
50 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
51 #endif
52 
53 namespace art {
54 
55 template<class MirrorType>
56 class GcRoot;
57 
58 namespace arm64 {
59 
60 using helpers::ARM64EncodableConstantOrRegister;
61 using helpers::ArtVixlRegCodeCoherentForRegSet;
62 using helpers::CPURegisterFrom;
63 using helpers::DRegisterFrom;
64 using helpers::FPRegisterFrom;
65 using helpers::HeapOperand;
66 using helpers::HeapOperandFrom;
67 using helpers::InputCPURegisterAt;
68 using helpers::InputCPURegisterOrZeroRegAt;
69 using helpers::InputFPRegisterAt;
70 using helpers::InputOperandAt;
71 using helpers::InputRegisterAt;
72 using helpers::Int64ConstantFrom;
73 using helpers::IsConstantZeroBitPattern;
74 using helpers::LocationFrom;
75 using helpers::OperandFromMemOperand;
76 using helpers::OutputCPURegister;
77 using helpers::OutputFPRegister;
78 using helpers::OutputRegister;
79 using helpers::QRegisterFrom;
80 using helpers::RegisterFrom;
81 using helpers::StackOperandFrom;
82 using helpers::VIXLRegCodeFromART;
83 using helpers::WRegisterFrom;
84 using helpers::XRegisterFrom;
85 
86 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
87 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
88 // generates less code/data with a small num_entries.
89 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
90 
91 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
92 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
93 // For the Baker read barrier implementation using link-generated thunks we need to split
94 // the offset explicitly.
95 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
96 
97 // Flags controlling the use of link-time generated thunks for Baker read barriers.
98 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
99 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
100 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
101 
102 // Some instructions have special requirements for a temporary, for example
103 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
104 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field
105 // loads with large offsets need a fixed register to limit the number of link-time
106 // thunks we generate. For these and similar cases, we want to reserve a specific
107 // register that's neither callee-save nor an argument register. We choose x15.
FixedTempLocation()108 inline Location FixedTempLocation() {
109   return Location::RegisterLocation(x15.GetCode());
110 }
111 
ARM64Condition(IfCondition cond)112 inline Condition ARM64Condition(IfCondition cond) {
113   switch (cond) {
114     case kCondEQ: return eq;
115     case kCondNE: return ne;
116     case kCondLT: return lt;
117     case kCondLE: return le;
118     case kCondGT: return gt;
119     case kCondGE: return ge;
120     case kCondB:  return lo;
121     case kCondBE: return ls;
122     case kCondA:  return hi;
123     case kCondAE: return hs;
124   }
125   LOG(FATAL) << "Unreachable";
126   UNREACHABLE();
127 }
128 
ARM64FPCondition(IfCondition cond,bool gt_bias)129 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
130   // The ARM64 condition codes can express all the necessary branches, see the
131   // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
132   // There is no dex instruction or HIR that would need the missing conditions
133   // "equal or unordered" or "not equal".
134   switch (cond) {
135     case kCondEQ: return eq;
136     case kCondNE: return ne /* unordered */;
137     case kCondLT: return gt_bias ? cc : lt /* unordered */;
138     case kCondLE: return gt_bias ? ls : le /* unordered */;
139     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
140     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
141     default:
142       LOG(FATAL) << "UNREACHABLE";
143       UNREACHABLE();
144   }
145 }
146 
ARM64ReturnLocation(DataType::Type return_type)147 Location ARM64ReturnLocation(DataType::Type return_type) {
148   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
149   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
150   // but we use the exact registers for clarity.
151   if (return_type == DataType::Type::kFloat32) {
152     return LocationFrom(s0);
153   } else if (return_type == DataType::Type::kFloat64) {
154     return LocationFrom(d0);
155   } else if (return_type == DataType::Type::kInt64) {
156     return LocationFrom(x0);
157   } else if (return_type == DataType::Type::kVoid) {
158     return Location::NoLocation();
159   } else {
160     return LocationFrom(w0);
161   }
162 }
163 
GetReturnLocation(DataType::Type return_type)164 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
165   return ARM64ReturnLocation(return_type);
166 }
167 
168 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
169 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
170 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
171 
172 // Calculate memory accessing operand for save/restore live registers.
SaveRestoreLiveRegistersHelper(CodeGenerator * codegen,LocationSummary * locations,int64_t spill_offset,bool is_save)173 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
174                                            LocationSummary* locations,
175                                            int64_t spill_offset,
176                                            bool is_save) {
177   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
178   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
179   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
180                                          codegen->GetNumberOfCoreRegisters(),
181                                          fp_spills,
182                                          codegen->GetNumberOfFloatingPointRegisters()));
183 
184   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
185   unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
186   CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
187 
188   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
189   UseScratchRegisterScope temps(masm);
190 
191   Register base = masm->StackPointer();
192   int64_t core_spill_size = core_list.GetTotalSizeInBytes();
193   int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
194   int64_t reg_size = kXRegSizeInBytes;
195   int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
196   uint32_t ls_access_size = WhichPowerOf2(reg_size);
197   if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
198       !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
199     // If the offset does not fit in the instruction's immediate field, use an alternate register
200     // to compute the base address(float point registers spill base address).
201     Register new_base = temps.AcquireSameSizeAs(base);
202     __ Add(new_base, base, Operand(spill_offset + core_spill_size));
203     base = new_base;
204     spill_offset = -core_spill_size;
205     int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
206     DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
207     DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
208   }
209 
210   if (is_save) {
211     __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
212     __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
213   } else {
214     __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
215     __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
216   }
217 }
218 
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)219 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
220   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
221   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
222   for (uint32_t i : LowToHighBits(core_spills)) {
223     // If the register holds an object, update the stack mask.
224     if (locations->RegisterContainsObject(i)) {
225       locations->SetStackBit(stack_offset / kVRegSize);
226     }
227     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
228     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
229     saved_core_stack_offsets_[i] = stack_offset;
230     stack_offset += kXRegSizeInBytes;
231   }
232 
233   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
234   for (uint32_t i : LowToHighBits(fp_spills)) {
235     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
236     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
237     saved_fpu_stack_offsets_[i] = stack_offset;
238     stack_offset += kDRegSizeInBytes;
239   }
240 
241   SaveRestoreLiveRegistersHelper(codegen,
242                                  locations,
243                                  codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */);
244 }
245 
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)246 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
247   SaveRestoreLiveRegistersHelper(codegen,
248                                  locations,
249                                  codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */);
250 }
251 
252 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
253  public:
BoundsCheckSlowPathARM64(HBoundsCheck * instruction)254   explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
255 
EmitNativeCode(CodeGenerator * codegen)256   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
257     LocationSummary* locations = instruction_->GetLocations();
258     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
259 
260     __ Bind(GetEntryLabel());
261     if (instruction_->CanThrowIntoCatchBlock()) {
262       // Live registers will be restored in the catch block if caught.
263       SaveLiveRegisters(codegen, instruction_->GetLocations());
264     }
265     // We're moving two locations to locations that could overlap, so we need a parallel
266     // move resolver.
267     InvokeRuntimeCallingConvention calling_convention;
268     codegen->EmitParallelMoves(locations->InAt(0),
269                                LocationFrom(calling_convention.GetRegisterAt(0)),
270                                DataType::Type::kInt32,
271                                locations->InAt(1),
272                                LocationFrom(calling_convention.GetRegisterAt(1)),
273                                DataType::Type::kInt32);
274     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
275         ? kQuickThrowStringBounds
276         : kQuickThrowArrayBounds;
277     arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
278     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
279     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
280   }
281 
IsFatal() const282   bool IsFatal() const OVERRIDE { return true; }
283 
GetDescription() const284   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
285 
286  private:
287   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
288 };
289 
290 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
291  public:
DivZeroCheckSlowPathARM64(HDivZeroCheck * instruction)292   explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
293 
EmitNativeCode(CodeGenerator * codegen)294   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
295     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
296     __ Bind(GetEntryLabel());
297     arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
298     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
299   }
300 
IsFatal() const301   bool IsFatal() const OVERRIDE { return true; }
302 
GetDescription() const303   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
304 
305  private:
306   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
307 };
308 
309 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
310  public:
LoadClassSlowPathARM64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)311   LoadClassSlowPathARM64(HLoadClass* cls,
312                          HInstruction* at,
313                          uint32_t dex_pc,
314                          bool do_clinit)
315       : SlowPathCodeARM64(at),
316         cls_(cls),
317         dex_pc_(dex_pc),
318         do_clinit_(do_clinit) {
319     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
320   }
321 
EmitNativeCode(CodeGenerator * codegen)322   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
323     LocationSummary* locations = instruction_->GetLocations();
324     Location out = locations->Out();
325     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
326 
327     __ Bind(GetEntryLabel());
328     SaveLiveRegisters(codegen, locations);
329 
330     InvokeRuntimeCallingConvention calling_convention;
331     dex::TypeIndex type_index = cls_->GetTypeIndex();
332     __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
333     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
334                                                 : kQuickInitializeType;
335     arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
336     if (do_clinit_) {
337       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
338     } else {
339       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
340     }
341 
342     // Move the class to the desired location.
343     if (out.IsValid()) {
344       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
345       DataType::Type type = instruction_->GetType();
346       arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
347     }
348     RestoreLiveRegisters(codegen, locations);
349     __ B(GetExitLabel());
350   }
351 
GetDescription() const352   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; }
353 
354  private:
355   // The class this slow path will load.
356   HLoadClass* const cls_;
357 
358   // The dex PC of `at_`.
359   const uint32_t dex_pc_;
360 
361   // Whether to initialize the class.
362   const bool do_clinit_;
363 
364   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
365 };
366 
367 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
368  public:
LoadStringSlowPathARM64(HLoadString * instruction)369   explicit LoadStringSlowPathARM64(HLoadString* instruction)
370       : SlowPathCodeARM64(instruction) {}
371 
EmitNativeCode(CodeGenerator * codegen)372   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
373     LocationSummary* locations = instruction_->GetLocations();
374     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
375     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
376 
377     __ Bind(GetEntryLabel());
378     SaveLiveRegisters(codegen, locations);
379 
380     InvokeRuntimeCallingConvention calling_convention;
381     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
382     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
383     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
384     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
385     DataType::Type type = instruction_->GetType();
386     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
387 
388     RestoreLiveRegisters(codegen, locations);
389 
390     __ B(GetExitLabel());
391   }
392 
GetDescription() const393   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
394 
395  private:
396   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
397 };
398 
399 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
400  public:
NullCheckSlowPathARM64(HNullCheck * instr)401   explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
402 
EmitNativeCode(CodeGenerator * codegen)403   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
404     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
405     __ Bind(GetEntryLabel());
406     if (instruction_->CanThrowIntoCatchBlock()) {
407       // Live registers will be restored in the catch block if caught.
408       SaveLiveRegisters(codegen, instruction_->GetLocations());
409     }
410     arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
411                                  instruction_,
412                                  instruction_->GetDexPc(),
413                                  this);
414     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
415   }
416 
IsFatal() const417   bool IsFatal() const OVERRIDE { return true; }
418 
GetDescription() const419   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
420 
421  private:
422   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
423 };
424 
425 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
426  public:
SuspendCheckSlowPathARM64(HSuspendCheck * instruction,HBasicBlock * successor)427   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
428       : SlowPathCodeARM64(instruction), successor_(successor) {}
429 
EmitNativeCode(CodeGenerator * codegen)430   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
431     LocationSummary* locations = instruction_->GetLocations();
432     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
433     __ Bind(GetEntryLabel());
434     SaveLiveRegisters(codegen, locations);  // Only saves live 128-bit regs for SIMD.
435     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
436     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
437     RestoreLiveRegisters(codegen, locations);  // Only restores live 128-bit regs for SIMD.
438     if (successor_ == nullptr) {
439       __ B(GetReturnLabel());
440     } else {
441       __ B(arm64_codegen->GetLabelOf(successor_));
442     }
443   }
444 
GetReturnLabel()445   vixl::aarch64::Label* GetReturnLabel() {
446     DCHECK(successor_ == nullptr);
447     return &return_label_;
448   }
449 
GetSuccessor() const450   HBasicBlock* GetSuccessor() const {
451     return successor_;
452   }
453 
GetDescription() const454   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
455 
456  private:
457   // If not null, the block to branch to after the suspend check.
458   HBasicBlock* const successor_;
459 
460   // If `successor_` is null, the label to branch to after the suspend check.
461   vixl::aarch64::Label return_label_;
462 
463   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
464 };
465 
466 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
467  public:
TypeCheckSlowPathARM64(HInstruction * instruction,bool is_fatal)468   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
469       : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
470 
EmitNativeCode(CodeGenerator * codegen)471   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
472     LocationSummary* locations = instruction_->GetLocations();
473 
474     DCHECK(instruction_->IsCheckCast()
475            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
476     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
477     uint32_t dex_pc = instruction_->GetDexPc();
478 
479     __ Bind(GetEntryLabel());
480 
481     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
482       SaveLiveRegisters(codegen, locations);
483     }
484 
485     // We're moving two locations to locations that could overlap, so we need a parallel
486     // move resolver.
487     InvokeRuntimeCallingConvention calling_convention;
488     codegen->EmitParallelMoves(locations->InAt(0),
489                                LocationFrom(calling_convention.GetRegisterAt(0)),
490                                DataType::Type::kReference,
491                                locations->InAt(1),
492                                LocationFrom(calling_convention.GetRegisterAt(1)),
493                                DataType::Type::kReference);
494     if (instruction_->IsInstanceOf()) {
495       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
496       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
497       DataType::Type ret_type = instruction_->GetType();
498       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
499       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
500     } else {
501       DCHECK(instruction_->IsCheckCast());
502       arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
503       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
504     }
505 
506     if (!is_fatal_) {
507       RestoreLiveRegisters(codegen, locations);
508       __ B(GetExitLabel());
509     }
510   }
511 
GetDescription() const512   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; }
IsFatal() const513   bool IsFatal() const OVERRIDE { return is_fatal_; }
514 
515  private:
516   const bool is_fatal_;
517 
518   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
519 };
520 
521 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
522  public:
DeoptimizationSlowPathARM64(HDeoptimize * instruction)523   explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
524       : SlowPathCodeARM64(instruction) {}
525 
EmitNativeCode(CodeGenerator * codegen)526   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
527     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
528     __ Bind(GetEntryLabel());
529     LocationSummary* locations = instruction_->GetLocations();
530     SaveLiveRegisters(codegen, locations);
531     InvokeRuntimeCallingConvention calling_convention;
532     __ Mov(calling_convention.GetRegisterAt(0),
533            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
534     arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
535     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
536   }
537 
GetDescription() const538   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
539 
540  private:
541   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
542 };
543 
544 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
545  public:
ArraySetSlowPathARM64(HInstruction * instruction)546   explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
547 
EmitNativeCode(CodeGenerator * codegen)548   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
549     LocationSummary* locations = instruction_->GetLocations();
550     __ Bind(GetEntryLabel());
551     SaveLiveRegisters(codegen, locations);
552 
553     InvokeRuntimeCallingConvention calling_convention;
554     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
555     parallel_move.AddMove(
556         locations->InAt(0),
557         LocationFrom(calling_convention.GetRegisterAt(0)),
558         DataType::Type::kReference,
559         nullptr);
560     parallel_move.AddMove(
561         locations->InAt(1),
562         LocationFrom(calling_convention.GetRegisterAt(1)),
563         DataType::Type::kInt32,
564         nullptr);
565     parallel_move.AddMove(
566         locations->InAt(2),
567         LocationFrom(calling_convention.GetRegisterAt(2)),
568         DataType::Type::kReference,
569         nullptr);
570     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
571 
572     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
573     arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
574     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
575     RestoreLiveRegisters(codegen, locations);
576     __ B(GetExitLabel());
577   }
578 
GetDescription() const579   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
580 
581  private:
582   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
583 };
584 
EmitTable(CodeGeneratorARM64 * codegen)585 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
586   uint32_t num_entries = switch_instr_->GetNumEntries();
587   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
588 
589   // We are about to use the assembler to place literals directly. Make sure we have enough
590   // underlying code buffer and we have generated the jump table with right size.
591   EmissionCheckScope scope(codegen->GetVIXLAssembler(),
592                            num_entries * sizeof(int32_t),
593                            CodeBufferCheckScope::kExactSize);
594 
595   __ Bind(&table_start_);
596   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
597   for (uint32_t i = 0; i < num_entries; i++) {
598     vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
599     DCHECK(target_label->IsBound());
600     ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
601     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
602     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
603     Literal<int32_t> literal(jump_offset);
604     __ place(&literal);
605   }
606 }
607 
608 // Abstract base class for read barrier slow paths marking a reference
609 // `ref`.
610 //
611 // Argument `entrypoint` must be a register location holding the read
612 // barrier marking runtime entry point to be invoked or an empty
613 // location; in the latter case, the read barrier marking runtime
614 // entry point will be loaded by the slow path code itself.
615 class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
616  protected:
ReadBarrierMarkSlowPathBaseARM64(HInstruction * instruction,Location ref,Location entrypoint)617   ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
618       : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
619     DCHECK(kEmitCompilerReadBarrier);
620   }
621 
GetDescription() const622   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
623 
624   // Generate assembly code calling the read barrier marking runtime
625   // entry point (ReadBarrierMarkRegX).
GenerateReadBarrierMarkRuntimeCall(CodeGenerator * codegen)626   void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
627     // No need to save live registers; it's taken care of by the
628     // entrypoint. Also, there is no need to update the stack mask,
629     // as this runtime call will not trigger a garbage collection.
630     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
631     DCHECK_NE(ref_.reg(), LR);
632     DCHECK_NE(ref_.reg(), WSP);
633     DCHECK_NE(ref_.reg(), WZR);
634     // IP0 is used internally by the ReadBarrierMarkRegX entry point
635     // as a temporary, it cannot be the entry point's input/output.
636     DCHECK_NE(ref_.reg(), IP0);
637     DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
638     // "Compact" slow path, saving two moves.
639     //
640     // Instead of using the standard runtime calling convention (input
641     // and output in W0):
642     //
643     //   W0 <- ref
644     //   W0 <- ReadBarrierMark(W0)
645     //   ref <- W0
646     //
647     // we just use rX (the register containing `ref`) as input and output
648     // of a dedicated entrypoint:
649     //
650     //   rX <- ReadBarrierMarkRegX(rX)
651     //
652     if (entrypoint_.IsValid()) {
653       arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
654       __ Blr(XRegisterFrom(entrypoint_));
655     } else {
656       // Entrypoint is not already loaded, load from the thread.
657       int32_t entry_point_offset =
658           Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
659       // This runtime call does not require a stack map.
660       arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
661     }
662   }
663 
664   // The location (register) of the marked object reference.
665   const Location ref_;
666 
667   // The location of the entrypoint if it is already loaded.
668   const Location entrypoint_;
669 
670  private:
671   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
672 };
673 
674 // Slow path marking an object reference `ref` during a read
675 // barrier. The field `obj.field` in the object `obj` holding this
676 // reference does not get updated by this slow path after marking.
677 //
678 // This means that after the execution of this slow path, `ref` will
679 // always be up-to-date, but `obj.field` may not; i.e., after the
680 // flip, `ref` will be a to-space reference, but `obj.field` will
681 // probably still be a from-space reference (unless it gets updated by
682 // another thread, or if another thread installed another object
683 // reference (different from `ref`) in `obj.field`).
684 //
685 // Argument `entrypoint` must be a register location holding the read
686 // barrier marking runtime entry point to be invoked or an empty
687 // location; in the latter case, the read barrier marking runtime
688 // entry point will be loaded by the slow path code itself.
689 class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
690  public:
ReadBarrierMarkSlowPathARM64(HInstruction * instruction,Location ref,Location entrypoint=Location::NoLocation ())691   ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
692                                Location ref,
693                                Location entrypoint = Location::NoLocation())
694       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
695     DCHECK(kEmitCompilerReadBarrier);
696   }
697 
GetDescription() const698   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
699 
EmitNativeCode(CodeGenerator * codegen)700   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
701     LocationSummary* locations = instruction_->GetLocations();
702     DCHECK(locations->CanCall());
703     DCHECK(ref_.IsRegister()) << ref_;
704     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
705     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
706         << "Unexpected instruction in read barrier marking slow path: "
707         << instruction_->DebugName();
708 
709     __ Bind(GetEntryLabel());
710     GenerateReadBarrierMarkRuntimeCall(codegen);
711     __ B(GetExitLabel());
712   }
713 
714  private:
715   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
716 };
717 
718 // Slow path loading `obj`'s lock word, loading a reference from
719 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
720 // marking `ref` if `obj` is gray according to the lock word (Baker
721 // read barrier). The field `obj.field` in the object `obj` holding
722 // this reference does not get updated by this slow path after marking
723 // (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
724 // below for that).
725 //
726 // This means that after the execution of this slow path, `ref` will
727 // always be up-to-date, but `obj.field` may not; i.e., after the
728 // flip, `ref` will be a to-space reference, but `obj.field` will
729 // probably still be a from-space reference (unless it gets updated by
730 // another thread, or if another thread installed another object
731 // reference (different from `ref`) in `obj.field`).
732 //
733 // Argument `entrypoint` must be a register location holding the read
734 // barrier marking runtime entry point to be invoked or an empty
735 // location; in the latter case, the read barrier marking runtime
736 // entry point will be loaded by the slow path code itself.
737 class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
738  public:
LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire,Register temp,Location entrypoint=Location::NoLocation ())739   LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
740                                                  Location ref,
741                                                  Register obj,
742                                                  uint32_t offset,
743                                                  Location index,
744                                                  size_t scale_factor,
745                                                  bool needs_null_check,
746                                                  bool use_load_acquire,
747                                                  Register temp,
748                                                  Location entrypoint = Location::NoLocation())
749       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
750         obj_(obj),
751         offset_(offset),
752         index_(index),
753         scale_factor_(scale_factor),
754         needs_null_check_(needs_null_check),
755         use_load_acquire_(use_load_acquire),
756         temp_(temp) {
757     DCHECK(kEmitCompilerReadBarrier);
758     DCHECK(kUseBakerReadBarrier);
759   }
760 
GetDescription() const761   const char* GetDescription() const OVERRIDE {
762     return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
763   }
764 
EmitNativeCode(CodeGenerator * codegen)765   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
766     LocationSummary* locations = instruction_->GetLocations();
767     DCHECK(locations->CanCall());
768     DCHECK(ref_.IsRegister()) << ref_;
769     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
770     DCHECK(obj_.IsW());
771     DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
772     DCHECK(instruction_->IsInstanceFieldGet() ||
773            instruction_->IsStaticFieldGet() ||
774            instruction_->IsArrayGet() ||
775            instruction_->IsArraySet() ||
776            instruction_->IsInstanceOf() ||
777            instruction_->IsCheckCast() ||
778            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
779            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
780         << "Unexpected instruction in read barrier marking slow path: "
781         << instruction_->DebugName();
782     // The read barrier instrumentation of object ArrayGet
783     // instructions does not support the HIntermediateAddress
784     // instruction.
785     DCHECK(!(instruction_->IsArrayGet() &&
786              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
787 
788     // Temporary register `temp_`, used to store the lock word, must
789     // not be IP0 nor IP1, as we may use them to emit the reference
790     // load (in the call to GenerateRawReferenceLoad below), and we
791     // need the lock word to still be in `temp_` after the reference
792     // load.
793     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
794     DCHECK_NE(LocationFrom(temp_).reg(), IP1);
795 
796     __ Bind(GetEntryLabel());
797 
798     // When using MaybeGenerateReadBarrierSlow, the read barrier call is
799     // inserted after the original load. However, in fast path based
800     // Baker's read barriers, we need to perform the load of
801     // mirror::Object::monitor_ *before* the original reference load.
802     // This load-load ordering is required by the read barrier.
803     // The slow path (for Baker's algorithm) should look like:
804     //
805     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
806     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
807     //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
808     //   bool is_gray = (rb_state == ReadBarrier::GrayState());
809     //   if (is_gray) {
810     //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
811     //   }
812     //
813     // Note: the original implementation in ReadBarrier::Barrier is
814     // slightly more complex as it performs additional checks that we do
815     // not do here for performance reasons.
816 
817     // /* int32_t */ monitor = obj->monitor_
818     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
819     __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
820     if (needs_null_check_) {
821       codegen->MaybeRecordImplicitNullCheck(instruction_);
822     }
823     // /* LockWord */ lock_word = LockWord(monitor)
824     static_assert(sizeof(LockWord) == sizeof(int32_t),
825                   "art::LockWord and int32_t have different sizes.");
826 
827     // Introduce a dependency on the lock_word including rb_state,
828     // to prevent load-load reordering, and without using
829     // a memory barrier (which would be more expensive).
830     // `obj` is unchanged by this operation, but its value now depends
831     // on `temp`.
832     __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
833 
834     // The actual reference load.
835     // A possible implicit null check has already been handled above.
836     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
837     arm64_codegen->GenerateRawReferenceLoad(instruction_,
838                                             ref_,
839                                             obj_,
840                                             offset_,
841                                             index_,
842                                             scale_factor_,
843                                             /* needs_null_check */ false,
844                                             use_load_acquire_);
845 
846     // Mark the object `ref` when `obj` is gray.
847     //
848     //   if (rb_state == ReadBarrier::GrayState())
849     //     ref = ReadBarrier::Mark(ref);
850     //
851     // Given the numeric representation, it's enough to check the low bit of the rb_state.
852     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
853     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
854     __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
855     GenerateReadBarrierMarkRuntimeCall(codegen);
856 
857     __ B(GetExitLabel());
858   }
859 
860  private:
861   // The register containing the object holding the marked object reference field.
862   Register obj_;
863   // The offset, index and scale factor to access the reference in `obj_`.
864   uint32_t offset_;
865   Location index_;
866   size_t scale_factor_;
867   // Is a null check required?
868   bool needs_null_check_;
869   // Should this reference load use Load-Acquire semantics?
870   bool use_load_acquire_;
871   // A temporary register used to hold the lock word of `obj_`.
872   Register temp_;
873 
874   DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
875 };
876 
877 // Slow path loading `obj`'s lock word, loading a reference from
878 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
879 // marking `ref` if `obj` is gray according to the lock word (Baker
880 // read barrier). If needed, this slow path also atomically updates
881 // the field `obj.field` in the object `obj` holding this reference
882 // after marking (contrary to
883 // LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
884 // tries to update `obj.field`).
885 //
886 // This means that after the execution of this slow path, both `ref`
887 // and `obj.field` will be up-to-date; i.e., after the flip, both will
888 // hold the same to-space reference (unless another thread installed
889 // another object reference (different from `ref`) in `obj.field`).
890 //
891 // Argument `entrypoint` must be a register location holding the read
892 // barrier marking runtime entry point to be invoked or an empty
893 // location; in the latter case, the read barrier marking runtime
894 // entry point will be loaded by the slow path code itself.
895 class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
896     : public ReadBarrierMarkSlowPathBaseARM64 {
897  public:
LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire,Register temp,Location entrypoint=Location::NoLocation ())898   LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
899       HInstruction* instruction,
900       Location ref,
901       Register obj,
902       uint32_t offset,
903       Location index,
904       size_t scale_factor,
905       bool needs_null_check,
906       bool use_load_acquire,
907       Register temp,
908       Location entrypoint = Location::NoLocation())
909       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
910         obj_(obj),
911         offset_(offset),
912         index_(index),
913         scale_factor_(scale_factor),
914         needs_null_check_(needs_null_check),
915         use_load_acquire_(use_load_acquire),
916         temp_(temp) {
917     DCHECK(kEmitCompilerReadBarrier);
918     DCHECK(kUseBakerReadBarrier);
919   }
920 
GetDescription() const921   const char* GetDescription() const OVERRIDE {
922     return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
923   }
924 
EmitNativeCode(CodeGenerator * codegen)925   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
926     LocationSummary* locations = instruction_->GetLocations();
927     Register ref_reg = WRegisterFrom(ref_);
928     DCHECK(locations->CanCall());
929     DCHECK(ref_.IsRegister()) << ref_;
930     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
931     DCHECK(obj_.IsW());
932     DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
933 
934     // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
935     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
936         << "Unexpected instruction in read barrier marking and field updating slow path: "
937         << instruction_->DebugName();
938     DCHECK(instruction_->GetLocations()->Intrinsified());
939     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
940     DCHECK_EQ(offset_, 0u);
941     DCHECK_EQ(scale_factor_, 0u);
942     DCHECK_EQ(use_load_acquire_, false);
943     // The location of the offset of the marked reference field within `obj_`.
944     Location field_offset = index_;
945     DCHECK(field_offset.IsRegister()) << field_offset;
946 
947     // Temporary register `temp_`, used to store the lock word, must
948     // not be IP0 nor IP1, as we may use them to emit the reference
949     // load (in the call to GenerateRawReferenceLoad below), and we
950     // need the lock word to still be in `temp_` after the reference
951     // load.
952     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
953     DCHECK_NE(LocationFrom(temp_).reg(), IP1);
954 
955     __ Bind(GetEntryLabel());
956 
957     // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's:
958     //
959     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
960     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
961     //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
962     //   bool is_gray = (rb_state == ReadBarrier::GrayState());
963     //   if (is_gray) {
964     //     old_ref = ref;
965     //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
966     //     compareAndSwapObject(obj, field_offset, old_ref, ref);
967     //   }
968 
969     // /* int32_t */ monitor = obj->monitor_
970     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
971     __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
972     if (needs_null_check_) {
973       codegen->MaybeRecordImplicitNullCheck(instruction_);
974     }
975     // /* LockWord */ lock_word = LockWord(monitor)
976     static_assert(sizeof(LockWord) == sizeof(int32_t),
977                   "art::LockWord and int32_t have different sizes.");
978 
979     // Introduce a dependency on the lock_word including rb_state,
980     // to prevent load-load reordering, and without using
981     // a memory barrier (which would be more expensive).
982     // `obj` is unchanged by this operation, but its value now depends
983     // on `temp`.
984     __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
985 
986     // The actual reference load.
987     // A possible implicit null check has already been handled above.
988     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
989     arm64_codegen->GenerateRawReferenceLoad(instruction_,
990                                             ref_,
991                                             obj_,
992                                             offset_,
993                                             index_,
994                                             scale_factor_,
995                                             /* needs_null_check */ false,
996                                             use_load_acquire_);
997 
998     // Mark the object `ref` when `obj` is gray.
999     //
1000     //   if (rb_state == ReadBarrier::GrayState())
1001     //     ref = ReadBarrier::Mark(ref);
1002     //
1003     // Given the numeric representation, it's enough to check the low bit of the rb_state.
1004     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
1005     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1006     __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
1007 
1008     // Save the old value of the reference before marking it.
1009     // Note that we cannot use IP to save the old reference, as IP is
1010     // used internally by the ReadBarrierMarkRegX entry point, and we
1011     // need the old reference after the call to that entry point.
1012     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
1013     __ Mov(temp_.W(), ref_reg);
1014 
1015     GenerateReadBarrierMarkRuntimeCall(codegen);
1016 
1017     // If the new reference is different from the old reference,
1018     // update the field in the holder (`*(obj_ + field_offset)`).
1019     //
1020     // Note that this field could also hold a different object, if
1021     // another thread had concurrently changed it. In that case, the
1022     // LDXR/CMP/BNE sequence of instructions in the compare-and-set
1023     // (CAS) operation below would abort the CAS, leaving the field
1024     // as-is.
1025     __ Cmp(temp_.W(), ref_reg);
1026     __ B(eq, GetExitLabel());
1027 
1028     // Update the the holder's field atomically.  This may fail if
1029     // mutator updates before us, but it's OK.  This is achieved
1030     // using a strong compare-and-set (CAS) operation with relaxed
1031     // memory synchronization ordering, where the expected value is
1032     // the old reference and the desired value is the new reference.
1033 
1034     MacroAssembler* masm = arm64_codegen->GetVIXLAssembler();
1035     UseScratchRegisterScope temps(masm);
1036 
1037     // Convenience aliases.
1038     Register base = obj_.W();
1039     Register offset = XRegisterFrom(field_offset);
1040     Register expected = temp_.W();
1041     Register value = ref_reg;
1042     Register tmp_ptr = temps.AcquireX();    // Pointer to actual memory.
1043     Register tmp_value = temps.AcquireW();  // Value in memory.
1044 
1045     __ Add(tmp_ptr, base.X(), Operand(offset));
1046 
1047     if (kPoisonHeapReferences) {
1048       arm64_codegen->GetAssembler()->PoisonHeapReference(expected);
1049       if (value.Is(expected)) {
1050         // Do not poison `value`, as it is the same register as
1051         // `expected`, which has just been poisoned.
1052       } else {
1053         arm64_codegen->GetAssembler()->PoisonHeapReference(value);
1054       }
1055     }
1056 
1057     // do {
1058     //   tmp_value = [tmp_ptr] - expected;
1059     // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
1060 
1061     vixl::aarch64::Label loop_head, comparison_failed, exit_loop;
1062     __ Bind(&loop_head);
1063     __ Ldxr(tmp_value, MemOperand(tmp_ptr));
1064     __ Cmp(tmp_value, expected);
1065     __ B(&comparison_failed, ne);
1066     __ Stxr(tmp_value, value, MemOperand(tmp_ptr));
1067     __ Cbnz(tmp_value, &loop_head);
1068     __ B(&exit_loop);
1069     __ Bind(&comparison_failed);
1070     __ Clrex();
1071     __ Bind(&exit_loop);
1072 
1073     if (kPoisonHeapReferences) {
1074       arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected);
1075       if (value.Is(expected)) {
1076         // Do not unpoison `value`, as it is the same register as
1077         // `expected`, which has just been unpoisoned.
1078       } else {
1079         arm64_codegen->GetAssembler()->UnpoisonHeapReference(value);
1080       }
1081     }
1082 
1083     __ B(GetExitLabel());
1084   }
1085 
1086  private:
1087   // The register containing the object holding the marked object reference field.
1088   const Register obj_;
1089   // The offset, index and scale factor to access the reference in `obj_`.
1090   uint32_t offset_;
1091   Location index_;
1092   size_t scale_factor_;
1093   // Is a null check required?
1094   bool needs_null_check_;
1095   // Should this reference load use Load-Acquire semantics?
1096   bool use_load_acquire_;
1097   // A temporary register used to hold the lock word of `obj_`; and
1098   // also to hold the original reference value, when the reference is
1099   // marked.
1100   const Register temp_;
1101 
1102   DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
1103 };
1104 
1105 // Slow path generating a read barrier for a heap reference.
1106 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
1107  public:
ReadBarrierForHeapReferenceSlowPathARM64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)1108   ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
1109                                            Location out,
1110                                            Location ref,
1111                                            Location obj,
1112                                            uint32_t offset,
1113                                            Location index)
1114       : SlowPathCodeARM64(instruction),
1115         out_(out),
1116         ref_(ref),
1117         obj_(obj),
1118         offset_(offset),
1119         index_(index) {
1120     DCHECK(kEmitCompilerReadBarrier);
1121     // If `obj` is equal to `out` or `ref`, it means the initial object
1122     // has been overwritten by (or after) the heap object reference load
1123     // to be instrumented, e.g.:
1124     //
1125     //   __ Ldr(out, HeapOperand(out, class_offset);
1126     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
1127     //
1128     // In that case, we have lost the information about the original
1129     // object, and the emitted read barrier cannot work properly.
1130     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
1131     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
1132   }
1133 
EmitNativeCode(CodeGenerator * codegen)1134   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
1135     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1136     LocationSummary* locations = instruction_->GetLocations();
1137     DataType::Type type = DataType::Type::kReference;
1138     DCHECK(locations->CanCall());
1139     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
1140     DCHECK(instruction_->IsInstanceFieldGet() ||
1141            instruction_->IsStaticFieldGet() ||
1142            instruction_->IsArrayGet() ||
1143            instruction_->IsInstanceOf() ||
1144            instruction_->IsCheckCast() ||
1145            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
1146         << "Unexpected instruction in read barrier for heap reference slow path: "
1147         << instruction_->DebugName();
1148     // The read barrier instrumentation of object ArrayGet
1149     // instructions does not support the HIntermediateAddress
1150     // instruction.
1151     DCHECK(!(instruction_->IsArrayGet() &&
1152              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
1153 
1154     __ Bind(GetEntryLabel());
1155 
1156     SaveLiveRegisters(codegen, locations);
1157 
1158     // We may have to change the index's value, but as `index_` is a
1159     // constant member (like other "inputs" of this slow path),
1160     // introduce a copy of it, `index`.
1161     Location index = index_;
1162     if (index_.IsValid()) {
1163       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
1164       if (instruction_->IsArrayGet()) {
1165         // Compute the actual memory offset and store it in `index`.
1166         Register index_reg = RegisterFrom(index_, DataType::Type::kInt32);
1167         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
1168         if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
1169           // We are about to change the value of `index_reg` (see the
1170           // calls to vixl::MacroAssembler::Lsl and
1171           // vixl::MacroAssembler::Mov below), but it has
1172           // not been saved by the previous call to
1173           // art::SlowPathCode::SaveLiveRegisters, as it is a
1174           // callee-save register --
1175           // art::SlowPathCode::SaveLiveRegisters does not consider
1176           // callee-save registers, as it has been designed with the
1177           // assumption that callee-save registers are supposed to be
1178           // handled by the called function.  So, as a callee-save
1179           // register, `index_reg` _would_ eventually be saved onto
1180           // the stack, but it would be too late: we would have
1181           // changed its value earlier.  Therefore, we manually save
1182           // it here into another freely available register,
1183           // `free_reg`, chosen of course among the caller-save
1184           // registers (as a callee-save `free_reg` register would
1185           // exhibit the same problem).
1186           //
1187           // Note we could have requested a temporary register from
1188           // the register allocator instead; but we prefer not to, as
1189           // this is a slow path, and we know we can find a
1190           // caller-save register that is available.
1191           Register free_reg = FindAvailableCallerSaveRegister(codegen);
1192           __ Mov(free_reg.W(), index_reg);
1193           index_reg = free_reg;
1194           index = LocationFrom(index_reg);
1195         } else {
1196           // The initial register stored in `index_` has already been
1197           // saved in the call to art::SlowPathCode::SaveLiveRegisters
1198           // (as it is not a callee-save register), so we can freely
1199           // use it.
1200         }
1201         // Shifting the index value contained in `index_reg` by the scale
1202         // factor (2) cannot overflow in practice, as the runtime is
1203         // unable to allocate object arrays with a size larger than
1204         // 2^26 - 1 (that is, 2^28 - 4 bytes).
1205         __ Lsl(index_reg, index_reg, DataType::SizeShift(type));
1206         static_assert(
1207             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
1208             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
1209         __ Add(index_reg, index_reg, Operand(offset_));
1210       } else {
1211         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
1212         // intrinsics, `index_` is not shifted by a scale factor of 2
1213         // (as in the case of ArrayGet), as it is actually an offset
1214         // to an object field within an object.
1215         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
1216         DCHECK(instruction_->GetLocations()->Intrinsified());
1217         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
1218                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
1219             << instruction_->AsInvoke()->GetIntrinsic();
1220         DCHECK_EQ(offset_, 0u);
1221         DCHECK(index_.IsRegister());
1222       }
1223     }
1224 
1225     // We're moving two or three locations to locations that could
1226     // overlap, so we need a parallel move resolver.
1227     InvokeRuntimeCallingConvention calling_convention;
1228     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
1229     parallel_move.AddMove(ref_,
1230                           LocationFrom(calling_convention.GetRegisterAt(0)),
1231                           type,
1232                           nullptr);
1233     parallel_move.AddMove(obj_,
1234                           LocationFrom(calling_convention.GetRegisterAt(1)),
1235                           type,
1236                           nullptr);
1237     if (index.IsValid()) {
1238       parallel_move.AddMove(index,
1239                             LocationFrom(calling_convention.GetRegisterAt(2)),
1240                             DataType::Type::kInt32,
1241                             nullptr);
1242       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
1243     } else {
1244       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
1245       arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
1246     }
1247     arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
1248                                  instruction_,
1249                                  instruction_->GetDexPc(),
1250                                  this);
1251     CheckEntrypointTypes<
1252         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
1253     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
1254 
1255     RestoreLiveRegisters(codegen, locations);
1256 
1257     __ B(GetExitLabel());
1258   }
1259 
GetDescription() const1260   const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
1261 
1262  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)1263   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
1264     size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
1265     size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
1266     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
1267       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
1268         return Register(VIXLRegCodeFromART(i), kXRegSize);
1269       }
1270     }
1271     // We shall never fail to find a free caller-save register, as
1272     // there are more than two core caller-save registers on ARM64
1273     // (meaning it is possible to find one which is different from
1274     // `ref` and `obj`).
1275     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
1276     LOG(FATAL) << "Could not find a free register";
1277     UNREACHABLE();
1278   }
1279 
1280   const Location out_;
1281   const Location ref_;
1282   const Location obj_;
1283   const uint32_t offset_;
1284   // An additional location containing an index to an array.
1285   // Only used for HArrayGet and the UnsafeGetObject &
1286   // UnsafeGetObjectVolatile intrinsics.
1287   const Location index_;
1288 
1289   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
1290 };
1291 
1292 // Slow path generating a read barrier for a GC root.
1293 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
1294  public:
ReadBarrierForRootSlowPathARM64(HInstruction * instruction,Location out,Location root)1295   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
1296       : SlowPathCodeARM64(instruction), out_(out), root_(root) {
1297     DCHECK(kEmitCompilerReadBarrier);
1298   }
1299 
EmitNativeCode(CodeGenerator * codegen)1300   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
1301     LocationSummary* locations = instruction_->GetLocations();
1302     DataType::Type type = DataType::Type::kReference;
1303     DCHECK(locations->CanCall());
1304     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
1305     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
1306         << "Unexpected instruction in read barrier for GC root slow path: "
1307         << instruction_->DebugName();
1308 
1309     __ Bind(GetEntryLabel());
1310     SaveLiveRegisters(codegen, locations);
1311 
1312     InvokeRuntimeCallingConvention calling_convention;
1313     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1314     // The argument of the ReadBarrierForRootSlow is not a managed
1315     // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
1316     // thus we need a 64-bit move here, and we cannot use
1317     //
1318     //   arm64_codegen->MoveLocation(
1319     //       LocationFrom(calling_convention.GetRegisterAt(0)),
1320     //       root_,
1321     //       type);
1322     //
1323     // which would emit a 32-bit move, as `type` is a (32-bit wide)
1324     // reference type (`DataType::Type::kReference`).
1325     __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
1326     arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
1327                                  instruction_,
1328                                  instruction_->GetDexPc(),
1329                                  this);
1330     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
1331     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
1332 
1333     RestoreLiveRegisters(codegen, locations);
1334     __ B(GetExitLabel());
1335   }
1336 
GetDescription() const1337   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
1338 
1339  private:
1340   const Location out_;
1341   const Location root_;
1342 
1343   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
1344 };
1345 
1346 #undef __
1347 
GetNextLocation(DataType::Type type)1348 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
1349   Location next_location;
1350   if (type == DataType::Type::kVoid) {
1351     LOG(FATAL) << "Unreachable type " << type;
1352   }
1353 
1354   if (DataType::IsFloatingPointType(type) &&
1355       (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
1356     next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
1357   } else if (!DataType::IsFloatingPointType(type) &&
1358              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
1359     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
1360   } else {
1361     size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
1362     next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
1363                                                 : Location::StackSlot(stack_offset);
1364   }
1365 
1366   // Space on the stack is reserved for all arguments.
1367   stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
1368   return next_location;
1369 }
1370 
GetMethodLocation() const1371 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
1372   return LocationFrom(kArtMethodRegister);
1373 }
1374 
CodeGeneratorARM64(HGraph * graph,const Arm64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1375 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
1376                                        const Arm64InstructionSetFeatures& isa_features,
1377                                        const CompilerOptions& compiler_options,
1378                                        OptimizingCompilerStats* stats)
1379     : CodeGenerator(graph,
1380                     kNumberOfAllocatableRegisters,
1381                     kNumberOfAllocatableFPRegisters,
1382                     kNumberOfAllocatableRegisterPairs,
1383                     callee_saved_core_registers.GetList(),
1384                     callee_saved_fp_registers.GetList(),
1385                     compiler_options,
1386                     stats),
1387       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1388       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1389       location_builder_(graph, this),
1390       instruction_visitor_(graph, this),
1391       move_resolver_(graph->GetAllocator(), this),
1392       assembler_(graph->GetAllocator()),
1393       isa_features_(isa_features),
1394       uint32_literals_(std::less<uint32_t>(),
1395                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1396       uint64_literals_(std::less<uint64_t>(),
1397                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1398       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1399       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1400       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1401       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1402       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1403       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1404       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1405       jit_string_patches_(StringReferenceValueComparator(),
1406                           graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1407       jit_class_patches_(TypeReferenceValueComparator(),
1408                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1409   // Save the link register (containing the return address) to mimic Quick.
1410   AddAllocatedRegister(LocationFrom(lr));
1411 }
1412 
1413 #define __ GetVIXLAssembler()->
1414 
EmitJumpTables()1415 void CodeGeneratorARM64::EmitJumpTables() {
1416   for (auto&& jump_table : jump_tables_) {
1417     jump_table->EmitTable(this);
1418   }
1419 }
1420 
Finalize(CodeAllocator * allocator)1421 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
1422   EmitJumpTables();
1423   // Ensure we emit the literal pool.
1424   __ FinalizeCode();
1425 
1426   CodeGenerator::Finalize(allocator);
1427 }
1428 
PrepareForEmitNativeCode()1429 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1430   // Note: There are 6 kinds of moves:
1431   // 1. constant -> GPR/FPR (non-cycle)
1432   // 2. constant -> stack (non-cycle)
1433   // 3. GPR/FPR -> GPR/FPR
1434   // 4. GPR/FPR -> stack
1435   // 5. stack -> GPR/FPR
1436   // 6. stack -> stack (non-cycle)
1437   // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1438   // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1439   // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1440   // dependency.
1441   vixl_temps_.Open(GetVIXLAssembler());
1442 }
1443 
FinishEmitNativeCode()1444 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1445   vixl_temps_.Close();
1446 }
1447 
AllocateScratchLocationFor(Location::Kind kind)1448 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1449   DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1450          || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1451          || kind == Location::kSIMDStackSlot);
1452   kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1453       ? Location::kFpuRegister
1454       : Location::kRegister;
1455   Location scratch = GetScratchLocation(kind);
1456   if (!scratch.Equals(Location::NoLocation())) {
1457     return scratch;
1458   }
1459   // Allocate from VIXL temp registers.
1460   if (kind == Location::kRegister) {
1461     scratch = LocationFrom(vixl_temps_.AcquireX());
1462   } else {
1463     DCHECK_EQ(kind, Location::kFpuRegister);
1464     scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
1465         ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
1466         : vixl_temps_.AcquireD());
1467   }
1468   AddScratchLocation(scratch);
1469   return scratch;
1470 }
1471 
FreeScratchLocation(Location loc)1472 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1473   if (loc.IsRegister()) {
1474     vixl_temps_.Release(XRegisterFrom(loc));
1475   } else {
1476     DCHECK(loc.IsFpuRegister());
1477     vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
1478   }
1479   RemoveScratchLocation(loc);
1480 }
1481 
EmitMove(size_t index)1482 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1483   MoveOperands* move = moves_[index];
1484   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
1485 }
1486 
GenerateFrameEntry()1487 void CodeGeneratorARM64::GenerateFrameEntry() {
1488   MacroAssembler* masm = GetVIXLAssembler();
1489   __ Bind(&frame_entry_label_);
1490 
1491   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1492     UseScratchRegisterScope temps(masm);
1493     Register temp = temps.AcquireX();
1494     __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
1495     __ Add(temp, temp, 1);
1496     __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
1497   }
1498 
1499   bool do_overflow_check =
1500       FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
1501   if (do_overflow_check) {
1502     UseScratchRegisterScope temps(masm);
1503     Register temp = temps.AcquireX();
1504     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1505     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64)));
1506     {
1507       // Ensure that between load and RecordPcInfo there are no pools emitted.
1508       ExactAssemblyScope eas(GetVIXLAssembler(),
1509                              kInstructionSize,
1510                              CodeBufferCheckScope::kExactSize);
1511       __ ldr(wzr, MemOperand(temp, 0));
1512       RecordPcInfo(nullptr, 0);
1513     }
1514   }
1515 
1516   if (!HasEmptyFrame()) {
1517     int frame_size = GetFrameSize();
1518     // Stack layout:
1519     //      sp[frame_size - 8]        : lr.
1520     //      ...                       : other preserved core registers.
1521     //      ...                       : other preserved fp registers.
1522     //      ...                       : reserved frame space.
1523     //      sp[0]                     : current method.
1524 
1525     // Save the current method if we need it. Note that we do not
1526     // do this in HCurrentMethod, as the instruction might have been removed
1527     // in the SSA graph.
1528     if (RequiresCurrentMethod()) {
1529       __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1530     } else {
1531       __ Claim(frame_size);
1532     }
1533     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1534     GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(),
1535         frame_size - GetCoreSpillSize());
1536     GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
1537         frame_size - FrameEntrySpillSize());
1538 
1539     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1540       // Initialize should_deoptimize flag to 0.
1541       Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1542       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1543     }
1544   }
1545 
1546   MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
1547 }
1548 
GenerateFrameExit()1549 void CodeGeneratorARM64::GenerateFrameExit() {
1550   GetAssembler()->cfi().RememberState();
1551   if (!HasEmptyFrame()) {
1552     int frame_size = GetFrameSize();
1553     GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(),
1554         frame_size - FrameEntrySpillSize());
1555     GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(),
1556         frame_size - GetCoreSpillSize());
1557     __ Drop(frame_size);
1558     GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1559   }
1560   __ Ret();
1561   GetAssembler()->cfi().RestoreState();
1562   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1563 }
1564 
GetFramePreservedCoreRegisters() const1565 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1566   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1567   return CPURegList(CPURegister::kRegister, kXRegSize,
1568                     core_spill_mask_);
1569 }
1570 
GetFramePreservedFPRegisters() const1571 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1572   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1573                                          GetNumberOfFloatingPointRegisters()));
1574   return CPURegList(CPURegister::kFPRegister, kDRegSize,
1575                     fpu_spill_mask_);
1576 }
1577 
Bind(HBasicBlock * block)1578 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1579   __ Bind(GetLabelOf(block));
1580 }
1581 
MoveConstant(Location location,int32_t value)1582 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1583   DCHECK(location.IsRegister());
1584   __ Mov(RegisterFrom(location, DataType::Type::kInt32), value);
1585 }
1586 
AddLocationAsTemp(Location location,LocationSummary * locations)1587 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1588   if (location.IsRegister()) {
1589     locations->AddTemp(location);
1590   } else {
1591     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1592   }
1593 }
1594 
MarkGCCard(Register object,Register value,bool value_can_be_null)1595 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
1596   UseScratchRegisterScope temps(GetVIXLAssembler());
1597   Register card = temps.AcquireX();
1598   Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
1599   vixl::aarch64::Label done;
1600   if (value_can_be_null) {
1601     __ Cbz(value, &done);
1602   }
1603   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1604   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1605   __ Strb(card, MemOperand(card, temp.X()));
1606   if (value_can_be_null) {
1607     __ Bind(&done);
1608   }
1609 }
1610 
SetupBlockedRegisters() const1611 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1612   // Blocked core registers:
1613   //      lr        : Runtime reserved.
1614   //      tr        : Runtime reserved.
1615   //      mr        : Runtime reserved.
1616   //      ip1       : VIXL core temp.
1617   //      ip0       : VIXL core temp.
1618   //
1619   // Blocked fp registers:
1620   //      d31       : VIXL fp temp.
1621   CPURegList reserved_core_registers = vixl_reserved_core_registers;
1622   reserved_core_registers.Combine(runtime_reserved_core_registers);
1623   while (!reserved_core_registers.IsEmpty()) {
1624     blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1625   }
1626 
1627   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1628   while (!reserved_fp_registers.IsEmpty()) {
1629     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1630   }
1631 
1632   if (GetGraph()->IsDebuggable()) {
1633     // Stubs do not save callee-save floating point registers. If the graph
1634     // is debuggable, we need to deal with these registers differently. For
1635     // now, just block them.
1636     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1637     while (!reserved_fp_registers_debuggable.IsEmpty()) {
1638       blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1639     }
1640   }
1641 }
1642 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1643 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1644   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1645   __ Str(reg, MemOperand(sp, stack_index));
1646   return kArm64WordSize;
1647 }
1648 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1649 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1650   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1651   __ Ldr(reg, MemOperand(sp, stack_index));
1652   return kArm64WordSize;
1653 }
1654 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1655 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1656   FPRegister reg = FPRegister(reg_id, kDRegSize);
1657   __ Str(reg, MemOperand(sp, stack_index));
1658   return kArm64WordSize;
1659 }
1660 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1661 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1662   FPRegister reg = FPRegister(reg_id, kDRegSize);
1663   __ Ldr(reg, MemOperand(sp, stack_index));
1664   return kArm64WordSize;
1665 }
1666 
DumpCoreRegister(std::ostream & stream,int reg) const1667 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1668   stream << XRegister(reg);
1669 }
1670 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1671 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1672   stream << DRegister(reg);
1673 }
1674 
MoveConstant(CPURegister destination,HConstant * constant)1675 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1676   if (constant->IsIntConstant()) {
1677     __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1678   } else if (constant->IsLongConstant()) {
1679     __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1680   } else if (constant->IsNullConstant()) {
1681     __ Mov(Register(destination), 0);
1682   } else if (constant->IsFloatConstant()) {
1683     __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
1684   } else {
1685     DCHECK(constant->IsDoubleConstant());
1686     __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue());
1687   }
1688 }
1689 
1690 
CoherentConstantAndType(Location constant,DataType::Type type)1691 static bool CoherentConstantAndType(Location constant, DataType::Type type) {
1692   DCHECK(constant.IsConstant());
1693   HConstant* cst = constant.GetConstant();
1694   return (cst->IsIntConstant() && type == DataType::Type::kInt32) ||
1695          // Null is mapped to a core W register, which we associate with kPrimInt.
1696          (cst->IsNullConstant() && type == DataType::Type::kInt32) ||
1697          (cst->IsLongConstant() && type == DataType::Type::kInt64) ||
1698          (cst->IsFloatConstant() && type == DataType::Type::kFloat32) ||
1699          (cst->IsDoubleConstant() && type == DataType::Type::kFloat64);
1700 }
1701 
1702 // Allocate a scratch register from the VIXL pool, querying first
1703 // the floating-point register pool, and then the core register
1704 // pool. This is essentially a reimplementation of
1705 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1706 // using a different allocation strategy.
AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler * masm,vixl::aarch64::UseScratchRegisterScope * temps,int size_in_bits)1707 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1708                                                     vixl::aarch64::UseScratchRegisterScope* temps,
1709                                                     int size_in_bits) {
1710   return masm->GetScratchFPRegisterList()->IsEmpty()
1711       ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1712       : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1713 }
1714 
MoveLocation(Location destination,Location source,DataType::Type dst_type)1715 void CodeGeneratorARM64::MoveLocation(Location destination,
1716                                       Location source,
1717                                       DataType::Type dst_type) {
1718   if (source.Equals(destination)) {
1719     return;
1720   }
1721 
1722   // A valid move can always be inferred from the destination and source
1723   // locations. When moving from and to a register, the argument type can be
1724   // used to generate 32bit instead of 64bit moves. In debug mode we also
1725   // checks the coherency of the locations and the type.
1726   bool unspecified_type = (dst_type == DataType::Type::kVoid);
1727 
1728   if (destination.IsRegister() || destination.IsFpuRegister()) {
1729     if (unspecified_type) {
1730       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1731       if (source.IsStackSlot() ||
1732           (src_cst != nullptr && (src_cst->IsIntConstant()
1733                                   || src_cst->IsFloatConstant()
1734                                   || src_cst->IsNullConstant()))) {
1735         // For stack slots and 32bit constants, a 64bit type is appropriate.
1736         dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
1737       } else {
1738         // If the source is a double stack slot or a 64bit constant, a 64bit
1739         // type is appropriate. Else the source is a register, and since the
1740         // type has not been specified, we chose a 64bit type to force a 64bit
1741         // move.
1742         dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
1743       }
1744     }
1745     DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
1746            (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
1747     CPURegister dst = CPURegisterFrom(destination, dst_type);
1748     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1749       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1750       __ Ldr(dst, StackOperandFrom(source));
1751     } else if (source.IsSIMDStackSlot()) {
1752       __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1753     } else if (source.IsConstant()) {
1754       DCHECK(CoherentConstantAndType(source, dst_type));
1755       MoveConstant(dst, source.GetConstant());
1756     } else if (source.IsRegister()) {
1757       if (destination.IsRegister()) {
1758         __ Mov(Register(dst), RegisterFrom(source, dst_type));
1759       } else {
1760         DCHECK(destination.IsFpuRegister());
1761         DataType::Type source_type = DataType::Is64BitType(dst_type)
1762             ? DataType::Type::kInt64
1763             : DataType::Type::kInt32;
1764         __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1765       }
1766     } else {
1767       DCHECK(source.IsFpuRegister());
1768       if (destination.IsRegister()) {
1769         DataType::Type source_type = DataType::Is64BitType(dst_type)
1770             ? DataType::Type::kFloat64
1771             : DataType::Type::kFloat32;
1772         __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1773       } else {
1774         DCHECK(destination.IsFpuRegister());
1775         if (GetGraph()->HasSIMD()) {
1776           __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1777         } else {
1778           __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type));
1779         }
1780       }
1781     }
1782   } else if (destination.IsSIMDStackSlot()) {
1783     if (source.IsFpuRegister()) {
1784       __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1785     } else {
1786       DCHECK(source.IsSIMDStackSlot());
1787       UseScratchRegisterScope temps(GetVIXLAssembler());
1788       if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) {
1789         Register temp = temps.AcquireX();
1790         __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1791         __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1792         __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1793         __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1794       } else {
1795         FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1796         __ Ldr(temp, StackOperandFrom(source));
1797         __ Str(temp, StackOperandFrom(destination));
1798       }
1799     }
1800   } else {  // The destination is not a register. It must be a stack slot.
1801     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1802     if (source.IsRegister() || source.IsFpuRegister()) {
1803       if (unspecified_type) {
1804         if (source.IsRegister()) {
1805           dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
1806         } else {
1807           dst_type =
1808               destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
1809         }
1810       }
1811       DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
1812              (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
1813       __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1814     } else if (source.IsConstant()) {
1815       DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1816           << source << " " << dst_type;
1817       UseScratchRegisterScope temps(GetVIXLAssembler());
1818       HConstant* src_cst = source.GetConstant();
1819       CPURegister temp;
1820       if (src_cst->IsZeroBitPattern()) {
1821         temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1822             ? Register(xzr)
1823             : Register(wzr);
1824       } else {
1825         if (src_cst->IsIntConstant()) {
1826           temp = temps.AcquireW();
1827         } else if (src_cst->IsLongConstant()) {
1828           temp = temps.AcquireX();
1829         } else if (src_cst->IsFloatConstant()) {
1830           temp = temps.AcquireS();
1831         } else {
1832           DCHECK(src_cst->IsDoubleConstant());
1833           temp = temps.AcquireD();
1834         }
1835         MoveConstant(temp, src_cst);
1836       }
1837       __ Str(temp, StackOperandFrom(destination));
1838     } else {
1839       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1840       DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1841       UseScratchRegisterScope temps(GetVIXLAssembler());
1842       // Use any scratch register (a core or a floating-point one)
1843       // from VIXL scratch register pools as a temporary.
1844       //
1845       // We used to only use the FP scratch register pool, but in some
1846       // rare cases the only register from this pool (D31) would
1847       // already be used (e.g. within a ParallelMove instruction, when
1848       // a move is blocked by a another move requiring a scratch FP
1849       // register, which would reserve D31). To prevent this issue, we
1850       // ask for a scratch register of any type (core or FP).
1851       //
1852       // Also, we start by asking for a FP scratch register first, as the
1853       // demand of scratch core registers is higher. This is why we
1854       // use AcquireFPOrCoreCPURegisterOfSize instead of
1855       // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1856       // allocates core scratch registers first.
1857       CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1858           GetVIXLAssembler(),
1859           &temps,
1860           (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1861       __ Ldr(temp, StackOperandFrom(source));
1862       __ Str(temp, StackOperandFrom(destination));
1863     }
1864   }
1865 }
1866 
Load(DataType::Type type,CPURegister dst,const MemOperand & src)1867 void CodeGeneratorARM64::Load(DataType::Type type,
1868                               CPURegister dst,
1869                               const MemOperand& src) {
1870   switch (type) {
1871     case DataType::Type::kBool:
1872     case DataType::Type::kUint8:
1873       __ Ldrb(Register(dst), src);
1874       break;
1875     case DataType::Type::kInt8:
1876       __ Ldrsb(Register(dst), src);
1877       break;
1878     case DataType::Type::kUint16:
1879       __ Ldrh(Register(dst), src);
1880       break;
1881     case DataType::Type::kInt16:
1882       __ Ldrsh(Register(dst), src);
1883       break;
1884     case DataType::Type::kInt32:
1885     case DataType::Type::kReference:
1886     case DataType::Type::kInt64:
1887     case DataType::Type::kFloat32:
1888     case DataType::Type::kFloat64:
1889       DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1890       __ Ldr(dst, src);
1891       break;
1892     case DataType::Type::kUint32:
1893     case DataType::Type::kUint64:
1894     case DataType::Type::kVoid:
1895       LOG(FATAL) << "Unreachable type " << type;
1896   }
1897 }
1898 
LoadAcquire(HInstruction * instruction,CPURegister dst,const MemOperand & src,bool needs_null_check)1899 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1900                                      CPURegister dst,
1901                                      const MemOperand& src,
1902                                      bool needs_null_check) {
1903   MacroAssembler* masm = GetVIXLAssembler();
1904   UseScratchRegisterScope temps(masm);
1905   Register temp_base = temps.AcquireX();
1906   DataType::Type type = instruction->GetType();
1907 
1908   DCHECK(!src.IsPreIndex());
1909   DCHECK(!src.IsPostIndex());
1910 
1911   // TODO(vixl): Let the MacroAssembler handle MemOperand.
1912   __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1913   {
1914     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1915     MemOperand base = MemOperand(temp_base);
1916     switch (type) {
1917       case DataType::Type::kBool:
1918       case DataType::Type::kUint8:
1919       case DataType::Type::kInt8:
1920         {
1921           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1922           __ ldarb(Register(dst), base);
1923           if (needs_null_check) {
1924             MaybeRecordImplicitNullCheck(instruction);
1925           }
1926         }
1927         if (type == DataType::Type::kInt8) {
1928           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1929         }
1930         break;
1931       case DataType::Type::kUint16:
1932       case DataType::Type::kInt16:
1933         {
1934           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1935           __ ldarh(Register(dst), base);
1936           if (needs_null_check) {
1937             MaybeRecordImplicitNullCheck(instruction);
1938           }
1939         }
1940         if (type == DataType::Type::kInt16) {
1941           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1942         }
1943         break;
1944       case DataType::Type::kInt32:
1945       case DataType::Type::kReference:
1946       case DataType::Type::kInt64:
1947         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1948         {
1949           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1950           __ ldar(Register(dst), base);
1951           if (needs_null_check) {
1952             MaybeRecordImplicitNullCheck(instruction);
1953           }
1954         }
1955         break;
1956       case DataType::Type::kFloat32:
1957       case DataType::Type::kFloat64: {
1958         DCHECK(dst.IsFPRegister());
1959         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1960 
1961         Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1962         {
1963           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1964           __ ldar(temp, base);
1965           if (needs_null_check) {
1966             MaybeRecordImplicitNullCheck(instruction);
1967           }
1968         }
1969         __ Fmov(FPRegister(dst), temp);
1970         break;
1971       }
1972       case DataType::Type::kUint32:
1973       case DataType::Type::kUint64:
1974       case DataType::Type::kVoid:
1975         LOG(FATAL) << "Unreachable type " << type;
1976     }
1977   }
1978 }
1979 
Store(DataType::Type type,CPURegister src,const MemOperand & dst)1980 void CodeGeneratorARM64::Store(DataType::Type type,
1981                                CPURegister src,
1982                                const MemOperand& dst) {
1983   switch (type) {
1984     case DataType::Type::kBool:
1985     case DataType::Type::kUint8:
1986     case DataType::Type::kInt8:
1987       __ Strb(Register(src), dst);
1988       break;
1989     case DataType::Type::kUint16:
1990     case DataType::Type::kInt16:
1991       __ Strh(Register(src), dst);
1992       break;
1993     case DataType::Type::kInt32:
1994     case DataType::Type::kReference:
1995     case DataType::Type::kInt64:
1996     case DataType::Type::kFloat32:
1997     case DataType::Type::kFloat64:
1998       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1999       __ Str(src, dst);
2000       break;
2001     case DataType::Type::kUint32:
2002     case DataType::Type::kUint64:
2003     case DataType::Type::kVoid:
2004       LOG(FATAL) << "Unreachable type " << type;
2005   }
2006 }
2007 
StoreRelease(HInstruction * instruction,DataType::Type type,CPURegister src,const MemOperand & dst,bool needs_null_check)2008 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
2009                                       DataType::Type type,
2010                                       CPURegister src,
2011                                       const MemOperand& dst,
2012                                       bool needs_null_check) {
2013   MacroAssembler* masm = GetVIXLAssembler();
2014   UseScratchRegisterScope temps(GetVIXLAssembler());
2015   Register temp_base = temps.AcquireX();
2016 
2017   DCHECK(!dst.IsPreIndex());
2018   DCHECK(!dst.IsPostIndex());
2019 
2020   // TODO(vixl): Let the MacroAssembler handle this.
2021   Operand op = OperandFromMemOperand(dst);
2022   __ Add(temp_base, dst.GetBaseRegister(), op);
2023   MemOperand base = MemOperand(temp_base);
2024   // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2025   switch (type) {
2026     case DataType::Type::kBool:
2027     case DataType::Type::kUint8:
2028     case DataType::Type::kInt8:
2029       {
2030         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2031         __ stlrb(Register(src), base);
2032         if (needs_null_check) {
2033           MaybeRecordImplicitNullCheck(instruction);
2034         }
2035       }
2036       break;
2037     case DataType::Type::kUint16:
2038     case DataType::Type::kInt16:
2039       {
2040         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2041         __ stlrh(Register(src), base);
2042         if (needs_null_check) {
2043           MaybeRecordImplicitNullCheck(instruction);
2044         }
2045       }
2046       break;
2047     case DataType::Type::kInt32:
2048     case DataType::Type::kReference:
2049     case DataType::Type::kInt64:
2050       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
2051       {
2052         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2053         __ stlr(Register(src), base);
2054         if (needs_null_check) {
2055           MaybeRecordImplicitNullCheck(instruction);
2056         }
2057       }
2058       break;
2059     case DataType::Type::kFloat32:
2060     case DataType::Type::kFloat64: {
2061       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
2062       Register temp_src;
2063       if (src.IsZero()) {
2064         // The zero register is used to avoid synthesizing zero constants.
2065         temp_src = Register(src);
2066       } else {
2067         DCHECK(src.IsFPRegister());
2068         temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2069         __ Fmov(temp_src, FPRegister(src));
2070       }
2071       {
2072         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2073         __ stlr(temp_src, base);
2074         if (needs_null_check) {
2075           MaybeRecordImplicitNullCheck(instruction);
2076         }
2077       }
2078       break;
2079     }
2080     case DataType::Type::kUint32:
2081     case DataType::Type::kUint64:
2082     case DataType::Type::kVoid:
2083       LOG(FATAL) << "Unreachable type " << type;
2084   }
2085 }
2086 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2087 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
2088                                        HInstruction* instruction,
2089                                        uint32_t dex_pc,
2090                                        SlowPathCode* slow_path) {
2091   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2092 
2093   __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value()));
2094   {
2095     // Ensure the pc position is recorded immediately after the `blr` instruction.
2096     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
2097     __ blr(lr);
2098     if (EntrypointRequiresStackMap(entrypoint)) {
2099       RecordPcInfo(instruction, dex_pc, slow_path);
2100     }
2101   }
2102 }
2103 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2104 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2105                                                              HInstruction* instruction,
2106                                                              SlowPathCode* slow_path) {
2107   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2108   __ Ldr(lr, MemOperand(tr, entry_point_offset));
2109   __ Blr(lr);
2110 }
2111 
GenerateClassInitializationCheck(SlowPathCodeARM64 * slow_path,Register class_reg)2112 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
2113                                                                      Register class_reg) {
2114   UseScratchRegisterScope temps(GetVIXLAssembler());
2115   Register temp = temps.AcquireW();
2116   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
2117   const size_t status_byte_offset =
2118       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
2119   constexpr uint32_t shifted_initialized_value =
2120       enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
2121 
2122   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
2123   // TODO(vixl): Let the MacroAssembler handle MemOperand.
2124   __ Add(temp, class_reg, status_byte_offset);
2125   __ Ldarb(temp, HeapOperand(temp));
2126   __ Cmp(temp, shifted_initialized_value);
2127   __ B(lo, slow_path->GetEntryLabel());
2128   __ Bind(slow_path->GetExitLabel());
2129 }
2130 
GenerateMemoryBarrier(MemBarrierKind kind)2131 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
2132   BarrierType type = BarrierAll;
2133 
2134   switch (kind) {
2135     case MemBarrierKind::kAnyAny:
2136     case MemBarrierKind::kAnyStore: {
2137       type = BarrierAll;
2138       break;
2139     }
2140     case MemBarrierKind::kLoadAny: {
2141       type = BarrierReads;
2142       break;
2143     }
2144     case MemBarrierKind::kStoreStore: {
2145       type = BarrierWrites;
2146       break;
2147     }
2148     default:
2149       LOG(FATAL) << "Unexpected memory barrier " << kind;
2150   }
2151   __ Dmb(InnerShareable, type);
2152 }
2153 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)2154 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
2155                                                          HBasicBlock* successor) {
2156   SuspendCheckSlowPathARM64* slow_path =
2157       down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
2158   if (slow_path == nullptr) {
2159     slow_path =
2160         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor);
2161     instruction->SetSlowPath(slow_path);
2162     codegen_->AddSlowPath(slow_path);
2163     if (successor != nullptr) {
2164       DCHECK(successor->IsLoopHeader());
2165     }
2166   } else {
2167     DCHECK_EQ(slow_path->GetSuccessor(), successor);
2168   }
2169 
2170   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
2171   Register temp = temps.AcquireW();
2172 
2173   __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
2174   if (successor == nullptr) {
2175     __ Cbnz(temp, slow_path->GetEntryLabel());
2176     __ Bind(slow_path->GetReturnLabel());
2177   } else {
2178     __ Cbz(temp, codegen_->GetLabelOf(successor));
2179     __ B(slow_path->GetEntryLabel());
2180     // slow_path will return to GetLabelOf(successor).
2181   }
2182 }
2183 
InstructionCodeGeneratorARM64(HGraph * graph,CodeGeneratorARM64 * codegen)2184 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
2185                                                              CodeGeneratorARM64* codegen)
2186       : InstructionCodeGenerator(graph, codegen),
2187         assembler_(codegen->GetAssembler()),
2188         codegen_(codegen) {}
2189 
HandleBinaryOp(HBinaryOperation * instr)2190 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
2191   DCHECK_EQ(instr->InputCount(), 2U);
2192   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2193   DataType::Type type = instr->GetResultType();
2194   switch (type) {
2195     case DataType::Type::kInt32:
2196     case DataType::Type::kInt64:
2197       locations->SetInAt(0, Location::RequiresRegister());
2198       locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
2199       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2200       break;
2201 
2202     case DataType::Type::kFloat32:
2203     case DataType::Type::kFloat64:
2204       locations->SetInAt(0, Location::RequiresFpuRegister());
2205       locations->SetInAt(1, Location::RequiresFpuRegister());
2206       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2207       break;
2208 
2209     default:
2210       LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
2211   }
2212 }
2213 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2214 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
2215                                            const FieldInfo& field_info) {
2216   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2217 
2218   bool object_field_get_with_read_barrier =
2219       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
2220   LocationSummary* locations =
2221       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2222                                                        object_field_get_with_read_barrier
2223                                                            ? LocationSummary::kCallOnSlowPath
2224                                                            : LocationSummary::kNoCall);
2225   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
2226     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2227     // We need a temporary register for the read barrier marking slow
2228     // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
2229     if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
2230         !Runtime::Current()->UseJitCompilation() &&
2231         !field_info.IsVolatile()) {
2232       // If link-time thunks for the Baker read barrier are enabled, for AOT
2233       // non-volatile loads we need a temporary only if the offset is too big.
2234       if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
2235         locations->AddTemp(FixedTempLocation());
2236       }
2237     } else {
2238       locations->AddTemp(Location::RequiresRegister());
2239     }
2240   }
2241   locations->SetInAt(0, Location::RequiresRegister());
2242   if (DataType::IsFloatingPointType(instruction->GetType())) {
2243     locations->SetOut(Location::RequiresFpuRegister());
2244   } else {
2245     // The output overlaps for an object field get when read barriers
2246     // are enabled: we do not want the load to overwrite the object's
2247     // location, as we need it to emit the read barrier.
2248     locations->SetOut(
2249         Location::RequiresRegister(),
2250         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2251   }
2252 }
2253 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2254 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
2255                                                    const FieldInfo& field_info) {
2256   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2257   LocationSummary* locations = instruction->GetLocations();
2258   Location base_loc = locations->InAt(0);
2259   Location out = locations->Out();
2260   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
2261   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
2262   DataType::Type load_type = instruction->GetType();
2263   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
2264 
2265   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier &&
2266       load_type == DataType::Type::kReference) {
2267     // Object FieldGet with Baker's read barrier case.
2268     // /* HeapReference<Object> */ out = *(base + offset)
2269     Register base = RegisterFrom(base_loc, DataType::Type::kReference);
2270     Location maybe_temp =
2271         (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2272     // Note that potential implicit null checks are handled in this
2273     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
2274     codegen_->GenerateFieldLoadWithBakerReadBarrier(
2275         instruction,
2276         out,
2277         base,
2278         offset,
2279         maybe_temp,
2280         /* needs_null_check */ true,
2281         field_info.IsVolatile());
2282   } else {
2283     // General case.
2284     if (field_info.IsVolatile()) {
2285       // Note that a potential implicit null check is handled in this
2286       // CodeGeneratorARM64::LoadAcquire call.
2287       // NB: LoadAcquire will record the pc info if needed.
2288       codegen_->LoadAcquire(
2289           instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
2290     } else {
2291       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2292       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2293       codegen_->Load(load_type, OutputCPURegister(instruction), field);
2294       codegen_->MaybeRecordImplicitNullCheck(instruction);
2295     }
2296     if (load_type == DataType::Type::kReference) {
2297       // If read barriers are enabled, emit read barriers other than
2298       // Baker's using a slow path (and also unpoison the loaded
2299       // reference, if heap poisoning is enabled).
2300       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2301     }
2302   }
2303 }
2304 
HandleFieldSet(HInstruction * instruction)2305 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2306   LocationSummary* locations =
2307       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2308   locations->SetInAt(0, Location::RequiresRegister());
2309   if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
2310     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
2311   } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
2312     locations->SetInAt(1, Location::RequiresFpuRegister());
2313   } else {
2314     locations->SetInAt(1, Location::RequiresRegister());
2315   }
2316 }
2317 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)2318 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2319                                                    const FieldInfo& field_info,
2320                                                    bool value_can_be_null) {
2321   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2322 
2323   Register obj = InputRegisterAt(instruction, 0);
2324   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2325   CPURegister source = value;
2326   Offset offset = field_info.GetFieldOffset();
2327   DataType::Type field_type = field_info.GetFieldType();
2328 
2329   {
2330     // We use a block to end the scratch scope before the write barrier, thus
2331     // freeing the temporary registers so they can be used in `MarkGCCard`.
2332     UseScratchRegisterScope temps(GetVIXLAssembler());
2333 
2334     if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
2335       DCHECK(value.IsW());
2336       Register temp = temps.AcquireW();
2337       __ Mov(temp, value.W());
2338       GetAssembler()->PoisonHeapReference(temp.W());
2339       source = temp;
2340     }
2341 
2342     if (field_info.IsVolatile()) {
2343       codegen_->StoreRelease(
2344           instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true);
2345     } else {
2346       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2347       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2348       codegen_->Store(field_type, source, HeapOperand(obj, offset));
2349       codegen_->MaybeRecordImplicitNullCheck(instruction);
2350     }
2351   }
2352 
2353   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
2354     codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
2355   }
2356 }
2357 
HandleBinaryOp(HBinaryOperation * instr)2358 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2359   DataType::Type type = instr->GetType();
2360 
2361   switch (type) {
2362     case DataType::Type::kInt32:
2363     case DataType::Type::kInt64: {
2364       Register dst = OutputRegister(instr);
2365       Register lhs = InputRegisterAt(instr, 0);
2366       Operand rhs = InputOperandAt(instr, 1);
2367       if (instr->IsAdd()) {
2368         __ Add(dst, lhs, rhs);
2369       } else if (instr->IsAnd()) {
2370         __ And(dst, lhs, rhs);
2371       } else if (instr->IsOr()) {
2372         __ Orr(dst, lhs, rhs);
2373       } else if (instr->IsSub()) {
2374         __ Sub(dst, lhs, rhs);
2375       } else if (instr->IsRor()) {
2376         if (rhs.IsImmediate()) {
2377           uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2378           __ Ror(dst, lhs, shift);
2379         } else {
2380           // Ensure shift distance is in the same size register as the result. If
2381           // we are rotating a long and the shift comes in a w register originally,
2382           // we don't need to sxtw for use as an x since the shift distances are
2383           // all & reg_bits - 1.
2384           __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2385         }
2386       } else {
2387         DCHECK(instr->IsXor());
2388         __ Eor(dst, lhs, rhs);
2389       }
2390       break;
2391     }
2392     case DataType::Type::kFloat32:
2393     case DataType::Type::kFloat64: {
2394       FPRegister dst = OutputFPRegister(instr);
2395       FPRegister lhs = InputFPRegisterAt(instr, 0);
2396       FPRegister rhs = InputFPRegisterAt(instr, 1);
2397       if (instr->IsAdd()) {
2398         __ Fadd(dst, lhs, rhs);
2399       } else if (instr->IsSub()) {
2400         __ Fsub(dst, lhs, rhs);
2401       } else {
2402         LOG(FATAL) << "Unexpected floating-point binary operation";
2403       }
2404       break;
2405     }
2406     default:
2407       LOG(FATAL) << "Unexpected binary operation type " << type;
2408   }
2409 }
2410 
HandleShift(HBinaryOperation * instr)2411 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2412   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2413 
2414   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2415   DataType::Type type = instr->GetResultType();
2416   switch (type) {
2417     case DataType::Type::kInt32:
2418     case DataType::Type::kInt64: {
2419       locations->SetInAt(0, Location::RequiresRegister());
2420       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2421       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2422       break;
2423     }
2424     default:
2425       LOG(FATAL) << "Unexpected shift type " << type;
2426   }
2427 }
2428 
HandleShift(HBinaryOperation * instr)2429 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2430   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2431 
2432   DataType::Type type = instr->GetType();
2433   switch (type) {
2434     case DataType::Type::kInt32:
2435     case DataType::Type::kInt64: {
2436       Register dst = OutputRegister(instr);
2437       Register lhs = InputRegisterAt(instr, 0);
2438       Operand rhs = InputOperandAt(instr, 1);
2439       if (rhs.IsImmediate()) {
2440         uint32_t shift_value = rhs.GetImmediate() &
2441             (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2442         if (instr->IsShl()) {
2443           __ Lsl(dst, lhs, shift_value);
2444         } else if (instr->IsShr()) {
2445           __ Asr(dst, lhs, shift_value);
2446         } else {
2447           __ Lsr(dst, lhs, shift_value);
2448         }
2449       } else {
2450         Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2451 
2452         if (instr->IsShl()) {
2453           __ Lsl(dst, lhs, rhs_reg);
2454         } else if (instr->IsShr()) {
2455           __ Asr(dst, lhs, rhs_reg);
2456         } else {
2457           __ Lsr(dst, lhs, rhs_reg);
2458         }
2459       }
2460       break;
2461     }
2462     default:
2463       LOG(FATAL) << "Unexpected shift operation type " << type;
2464   }
2465 }
2466 
VisitAdd(HAdd * instruction)2467 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2468   HandleBinaryOp(instruction);
2469 }
2470 
VisitAdd(HAdd * instruction)2471 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2472   HandleBinaryOp(instruction);
2473 }
2474 
VisitAnd(HAnd * instruction)2475 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2476   HandleBinaryOp(instruction);
2477 }
2478 
VisitAnd(HAnd * instruction)2479 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2480   HandleBinaryOp(instruction);
2481 }
2482 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2483 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2484   DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType();
2485   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2486   locations->SetInAt(0, Location::RequiresRegister());
2487   // There is no immediate variant of negated bitwise instructions in AArch64.
2488   locations->SetInAt(1, Location::RequiresRegister());
2489   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2490 }
2491 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2492 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2493   Register dst = OutputRegister(instr);
2494   Register lhs = InputRegisterAt(instr, 0);
2495   Register rhs = InputRegisterAt(instr, 1);
2496 
2497   switch (instr->GetOpKind()) {
2498     case HInstruction::kAnd:
2499       __ Bic(dst, lhs, rhs);
2500       break;
2501     case HInstruction::kOr:
2502       __ Orn(dst, lhs, rhs);
2503       break;
2504     case HInstruction::kXor:
2505       __ Eon(dst, lhs, rhs);
2506       break;
2507     default:
2508       LOG(FATAL) << "Unreachable";
2509   }
2510 }
2511 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2512 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2513     HDataProcWithShifterOp* instruction) {
2514   DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
2515          instruction->GetType() == DataType::Type::kInt64);
2516   LocationSummary* locations =
2517       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2518   if (instruction->GetInstrKind() == HInstruction::kNeg) {
2519     locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
2520   } else {
2521     locations->SetInAt(0, Location::RequiresRegister());
2522   }
2523   locations->SetInAt(1, Location::RequiresRegister());
2524   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2525 }
2526 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2527 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2528     HDataProcWithShifterOp* instruction) {
2529   DataType::Type type = instruction->GetType();
2530   HInstruction::InstructionKind kind = instruction->GetInstrKind();
2531   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
2532   Register out = OutputRegister(instruction);
2533   Register left;
2534   if (kind != HInstruction::kNeg) {
2535     left = InputRegisterAt(instruction, 0);
2536   }
2537   // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2538   // shifter operand operation, the IR generating `right_reg` (input to the type
2539   // conversion) can have a different type from the current instruction's type,
2540   // so we manually indicate the type.
2541   Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2542   Operand right_operand(0);
2543 
2544   HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2545   if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2546     right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2547   } else {
2548     right_operand = Operand(right_reg,
2549                             helpers::ShiftFromOpKind(op_kind),
2550                             instruction->GetShiftAmount());
2551   }
2552 
2553   // Logical binary operations do not support extension operations in the
2554   // operand. Note that VIXL would still manage if it was passed by generating
2555   // the extension as a separate instruction.
2556   // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2557   DCHECK(!right_operand.IsExtendedRegister() ||
2558          (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
2559           kind != HInstruction::kNeg));
2560   switch (kind) {
2561     case HInstruction::kAdd:
2562       __ Add(out, left, right_operand);
2563       break;
2564     case HInstruction::kAnd:
2565       __ And(out, left, right_operand);
2566       break;
2567     case HInstruction::kNeg:
2568       DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2569       __ Neg(out, right_operand);
2570       break;
2571     case HInstruction::kOr:
2572       __ Orr(out, left, right_operand);
2573       break;
2574     case HInstruction::kSub:
2575       __ Sub(out, left, right_operand);
2576       break;
2577     case HInstruction::kXor:
2578       __ Eor(out, left, right_operand);
2579       break;
2580     default:
2581       LOG(FATAL) << "Unexpected operation kind: " << kind;
2582       UNREACHABLE();
2583   }
2584 }
2585 
VisitIntermediateAddress(HIntermediateAddress * instruction)2586 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2587   LocationSummary* locations =
2588       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2589   locations->SetInAt(0, Location::RequiresRegister());
2590   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2591   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2592 }
2593 
VisitIntermediateAddress(HIntermediateAddress * instruction)2594 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2595   __ Add(OutputRegister(instruction),
2596          InputRegisterAt(instruction, 0),
2597          Operand(InputOperandAt(instruction, 1)));
2598 }
2599 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2600 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
2601   LocationSummary* locations =
2602       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2603 
2604   HIntConstant* shift = instruction->GetShift()->AsIntConstant();
2605 
2606   locations->SetInAt(0, Location::RequiresRegister());
2607   // For byte case we don't need to shift the index variable so we can encode the data offset into
2608   // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
2609   // data offset constant generation out of the loop and reduce the critical path length in the
2610   // loop.
2611   locations->SetInAt(1, shift->GetValue() == 0
2612                         ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
2613                         : Location::RequiresRegister());
2614   locations->SetInAt(2, Location::ConstantLocation(shift));
2615   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2616 }
2617 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2618 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
2619     HIntermediateAddressIndex* instruction) {
2620   Register index_reg = InputRegisterAt(instruction, 0);
2621   uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2));
2622   uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
2623 
2624   if (shift == 0) {
2625     __ Add(OutputRegister(instruction), index_reg, offset);
2626   } else {
2627     Register offset_reg = InputRegisterAt(instruction, 1);
2628     __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
2629   }
2630 }
2631 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2632 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2633   LocationSummary* locations =
2634       new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
2635   HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2636   if (instr->GetOpKind() == HInstruction::kSub &&
2637       accumulator->IsConstant() &&
2638       accumulator->AsConstant()->IsArithmeticZero()) {
2639     // Don't allocate register for Mneg instruction.
2640   } else {
2641     locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2642                        Location::RequiresRegister());
2643   }
2644   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2645   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2646   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2647 }
2648 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2649 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2650   Register res = OutputRegister(instr);
2651   Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2652   Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2653 
2654   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2655   // This fixup should be carried out for all multiply-accumulate instructions:
2656   // madd, msub, smaddl, smsubl, umaddl and umsubl.
2657   if (instr->GetType() == DataType::Type::kInt64 &&
2658       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2659     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2660     vixl::aarch64::Instruction* prev =
2661         masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
2662     if (prev->IsLoadOrStore()) {
2663       // Make sure we emit only exactly one nop.
2664       ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2665       __ nop();
2666     }
2667   }
2668 
2669   if (instr->GetOpKind() == HInstruction::kAdd) {
2670     Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2671     __ Madd(res, mul_left, mul_right, accumulator);
2672   } else {
2673     DCHECK(instr->GetOpKind() == HInstruction::kSub);
2674     HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2675     if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2676       __ Mneg(res, mul_left, mul_right);
2677     } else {
2678       Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2679       __ Msub(res, mul_left, mul_right, accumulator);
2680     }
2681   }
2682 }
2683 
VisitArrayGet(HArrayGet * instruction)2684 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2685   bool object_array_get_with_read_barrier =
2686       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
2687   LocationSummary* locations =
2688       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2689                                                        object_array_get_with_read_barrier
2690                                                            ? LocationSummary::kCallOnSlowPath
2691                                                            : LocationSummary::kNoCall);
2692   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2693     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2694     // We need a temporary register for the read barrier marking slow
2695     // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
2696     if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
2697         !Runtime::Current()->UseJitCompilation() &&
2698         instruction->GetIndex()->IsConstant()) {
2699       // Array loads with constant index are treated as field loads.
2700       // If link-time thunks for the Baker read barrier are enabled, for AOT
2701       // constant index loads we need a temporary only if the offset is too big.
2702       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2703       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2704       offset += index << DataType::SizeShift(DataType::Type::kReference);
2705       if (offset >= kReferenceLoadMinFarOffset) {
2706         locations->AddTemp(FixedTempLocation());
2707       }
2708     } else {
2709       locations->AddTemp(Location::RequiresRegister());
2710     }
2711   }
2712   locations->SetInAt(0, Location::RequiresRegister());
2713   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2714   if (DataType::IsFloatingPointType(instruction->GetType())) {
2715     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2716   } else {
2717     // The output overlaps in the case of an object array get with
2718     // read barriers enabled: we do not want the move to overwrite the
2719     // array's location, as we need it to emit the read barrier.
2720     locations->SetOut(
2721         Location::RequiresRegister(),
2722         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2723   }
2724 }
2725 
VisitArrayGet(HArrayGet * instruction)2726 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2727   DataType::Type type = instruction->GetType();
2728   Register obj = InputRegisterAt(instruction, 0);
2729   LocationSummary* locations = instruction->GetLocations();
2730   Location index = locations->InAt(1);
2731   Location out = locations->Out();
2732   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2733   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2734                                         instruction->IsStringCharAt();
2735   MacroAssembler* masm = GetVIXLAssembler();
2736   UseScratchRegisterScope temps(masm);
2737 
2738   // The read barrier instrumentation of object ArrayGet instructions
2739   // does not support the HIntermediateAddress instruction.
2740   DCHECK(!((type == DataType::Type::kReference) &&
2741            instruction->GetArray()->IsIntermediateAddress() &&
2742            kEmitCompilerReadBarrier));
2743 
2744   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2745     // Object ArrayGet with Baker's read barrier case.
2746     // Note that a potential implicit null check is handled in the
2747     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2748     DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
2749     if (index.IsConstant()) {
2750       // Array load with a constant index can be treated as a field load.
2751       offset += Int64ConstantFrom(index) << DataType::SizeShift(type);
2752       Location maybe_temp =
2753           (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2754       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2755                                                       out,
2756                                                       obj.W(),
2757                                                       offset,
2758                                                       maybe_temp,
2759                                                       /* needs_null_check */ false,
2760                                                       /* use_load_acquire */ false);
2761     } else {
2762       Register temp = WRegisterFrom(locations->GetTemp(0));
2763       codegen_->GenerateArrayLoadWithBakerReadBarrier(
2764           instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
2765     }
2766   } else {
2767     // General case.
2768     MemOperand source = HeapOperand(obj);
2769     Register length;
2770     if (maybe_compressed_char_at) {
2771       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2772       length = temps.AcquireW();
2773       {
2774         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2775         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2776 
2777         if (instruction->GetArray()->IsIntermediateAddress()) {
2778           DCHECK_LT(count_offset, offset);
2779           int64_t adjusted_offset =
2780               static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2781           // Note that `adjusted_offset` is negative, so this will be a LDUR.
2782           __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2783         } else {
2784           __ Ldr(length, HeapOperand(obj, count_offset));
2785         }
2786         codegen_->MaybeRecordImplicitNullCheck(instruction);
2787       }
2788     }
2789     if (index.IsConstant()) {
2790       if (maybe_compressed_char_at) {
2791         vixl::aarch64::Label uncompressed_load, done;
2792         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2793                       "Expecting 0=compressed, 1=uncompressed");
2794         __ Tbnz(length.W(), 0, &uncompressed_load);
2795         __ Ldrb(Register(OutputCPURegister(instruction)),
2796                 HeapOperand(obj, offset + Int64ConstantFrom(index)));
2797         __ B(&done);
2798         __ Bind(&uncompressed_load);
2799         __ Ldrh(Register(OutputCPURegister(instruction)),
2800                 HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
2801         __ Bind(&done);
2802       } else {
2803         offset += Int64ConstantFrom(index) << DataType::SizeShift(type);
2804         source = HeapOperand(obj, offset);
2805       }
2806     } else {
2807       Register temp = temps.AcquireSameSizeAs(obj);
2808       if (instruction->GetArray()->IsIntermediateAddress()) {
2809         // We do not need to compute the intermediate address from the array: the
2810         // input instruction has done it already. See the comment in
2811         // `TryExtractArrayAccessAddress()`.
2812         if (kIsDebugBuild) {
2813           HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
2814           DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2815         }
2816         temp = obj;
2817       } else {
2818         __ Add(temp, obj, offset);
2819       }
2820       if (maybe_compressed_char_at) {
2821         vixl::aarch64::Label uncompressed_load, done;
2822         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2823                       "Expecting 0=compressed, 1=uncompressed");
2824         __ Tbnz(length.W(), 0, &uncompressed_load);
2825         __ Ldrb(Register(OutputCPURegister(instruction)),
2826                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2827         __ B(&done);
2828         __ Bind(&uncompressed_load);
2829         __ Ldrh(Register(OutputCPURegister(instruction)),
2830                 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2831         __ Bind(&done);
2832       } else {
2833         source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type));
2834       }
2835     }
2836     if (!maybe_compressed_char_at) {
2837       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2838       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2839       codegen_->Load(type, OutputCPURegister(instruction), source);
2840       codegen_->MaybeRecordImplicitNullCheck(instruction);
2841     }
2842 
2843     if (type == DataType::Type::kReference) {
2844       static_assert(
2845           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2846           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2847       Location obj_loc = locations->InAt(0);
2848       if (index.IsConstant()) {
2849         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2850       } else {
2851         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2852       }
2853     }
2854   }
2855 }
2856 
VisitArrayLength(HArrayLength * instruction)2857 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2858   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2859   locations->SetInAt(0, Location::RequiresRegister());
2860   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2861 }
2862 
VisitArrayLength(HArrayLength * instruction)2863 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2864   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2865   vixl::aarch64::Register out = OutputRegister(instruction);
2866   {
2867     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2868     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2869     __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2870     codegen_->MaybeRecordImplicitNullCheck(instruction);
2871   }
2872   // Mask out compression flag from String's array length.
2873   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2874     __ Lsr(out.W(), out.W(), 1u);
2875   }
2876 }
2877 
VisitArraySet(HArraySet * instruction)2878 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2879   DataType::Type value_type = instruction->GetComponentType();
2880 
2881   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2882   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
2883       instruction,
2884       may_need_runtime_call_for_type_check ?
2885           LocationSummary::kCallOnSlowPath :
2886           LocationSummary::kNoCall);
2887   locations->SetInAt(0, Location::RequiresRegister());
2888   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2889   if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
2890     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
2891   } else if (DataType::IsFloatingPointType(value_type)) {
2892     locations->SetInAt(2, Location::RequiresFpuRegister());
2893   } else {
2894     locations->SetInAt(2, Location::RequiresRegister());
2895   }
2896 }
2897 
VisitArraySet(HArraySet * instruction)2898 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2899   DataType::Type value_type = instruction->GetComponentType();
2900   LocationSummary* locations = instruction->GetLocations();
2901   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2902   bool needs_write_barrier =
2903       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
2904 
2905   Register array = InputRegisterAt(instruction, 0);
2906   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2907   CPURegister source = value;
2908   Location index = locations->InAt(1);
2909   size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
2910   MemOperand destination = HeapOperand(array);
2911   MacroAssembler* masm = GetVIXLAssembler();
2912 
2913   if (!needs_write_barrier) {
2914     DCHECK(!may_need_runtime_call_for_type_check);
2915     if (index.IsConstant()) {
2916       offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type);
2917       destination = HeapOperand(array, offset);
2918     } else {
2919       UseScratchRegisterScope temps(masm);
2920       Register temp = temps.AcquireSameSizeAs(array);
2921       if (instruction->GetArray()->IsIntermediateAddress()) {
2922         // We do not need to compute the intermediate address from the array: the
2923         // input instruction has done it already. See the comment in
2924         // `TryExtractArrayAccessAddress()`.
2925         if (kIsDebugBuild) {
2926           HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
2927           DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2928         }
2929         temp = array;
2930       } else {
2931         __ Add(temp, array, offset);
2932       }
2933       destination = HeapOperand(temp,
2934                                 XRegisterFrom(index),
2935                                 LSL,
2936                                 DataType::SizeShift(value_type));
2937     }
2938     {
2939       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2940       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2941       codegen_->Store(value_type, value, destination);
2942       codegen_->MaybeRecordImplicitNullCheck(instruction);
2943     }
2944   } else {
2945     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2946     vixl::aarch64::Label done;
2947     SlowPathCodeARM64* slow_path = nullptr;
2948     {
2949       // We use a block to end the scratch scope before the write barrier, thus
2950       // freeing the temporary registers so they can be used in `MarkGCCard`.
2951       UseScratchRegisterScope temps(masm);
2952       Register temp = temps.AcquireSameSizeAs(array);
2953       if (index.IsConstant()) {
2954         offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type);
2955         destination = HeapOperand(array, offset);
2956       } else {
2957         destination = HeapOperand(temp,
2958                                   XRegisterFrom(index),
2959                                   LSL,
2960                                   DataType::SizeShift(value_type));
2961       }
2962 
2963       uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2964       uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2965       uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2966 
2967       if (may_need_runtime_call_for_type_check) {
2968         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
2969         codegen_->AddSlowPath(slow_path);
2970         if (instruction->GetValueCanBeNull()) {
2971           vixl::aarch64::Label non_zero;
2972           __ Cbnz(Register(value), &non_zero);
2973           if (!index.IsConstant()) {
2974             __ Add(temp, array, offset);
2975           }
2976           {
2977             // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools
2978             // emitted.
2979             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2980             __ Str(wzr, destination);
2981             codegen_->MaybeRecordImplicitNullCheck(instruction);
2982           }
2983           __ B(&done);
2984           __ Bind(&non_zero);
2985         }
2986 
2987         // Note that when Baker read barriers are enabled, the type
2988         // checks are performed without read barriers.  This is fine,
2989         // even in the case where a class object is in the from-space
2990         // after the flip, as a comparison involving such a type would
2991         // not produce a false positive; it may of course produce a
2992         // false negative, in which case we would take the ArraySet
2993         // slow path.
2994 
2995         Register temp2 = temps.AcquireSameSizeAs(array);
2996         // /* HeapReference<Class> */ temp = array->klass_
2997         {
2998           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2999           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3000           __ Ldr(temp, HeapOperand(array, class_offset));
3001           codegen_->MaybeRecordImplicitNullCheck(instruction);
3002         }
3003         GetAssembler()->MaybeUnpoisonHeapReference(temp);
3004 
3005         // /* HeapReference<Class> */ temp = temp->component_type_
3006         __ Ldr(temp, HeapOperand(temp, component_offset));
3007         // /* HeapReference<Class> */ temp2 = value->klass_
3008         __ Ldr(temp2, HeapOperand(Register(value), class_offset));
3009         // If heap poisoning is enabled, no need to unpoison `temp`
3010         // nor `temp2`, as we are comparing two poisoned references.
3011         __ Cmp(temp, temp2);
3012         temps.Release(temp2);
3013 
3014         if (instruction->StaticTypeOfArrayIsObjectArray()) {
3015           vixl::aarch64::Label do_put;
3016           __ B(eq, &do_put);
3017           // If heap poisoning is enabled, the `temp` reference has
3018           // not been unpoisoned yet; unpoison it now.
3019           GetAssembler()->MaybeUnpoisonHeapReference(temp);
3020 
3021           // /* HeapReference<Class> */ temp = temp->super_class_
3022           __ Ldr(temp, HeapOperand(temp, super_offset));
3023           // If heap poisoning is enabled, no need to unpoison
3024           // `temp`, as we are comparing against null below.
3025           __ Cbnz(temp, slow_path->GetEntryLabel());
3026           __ Bind(&do_put);
3027         } else {
3028           __ B(ne, slow_path->GetEntryLabel());
3029         }
3030       }
3031 
3032       if (kPoisonHeapReferences) {
3033         Register temp2 = temps.AcquireSameSizeAs(array);
3034           DCHECK(value.IsW());
3035         __ Mov(temp2, value.W());
3036         GetAssembler()->PoisonHeapReference(temp2);
3037         source = temp2;
3038       }
3039 
3040       if (!index.IsConstant()) {
3041         __ Add(temp, array, offset);
3042       } else {
3043         // We no longer need the `temp` here so release it as the store below may
3044         // need a scratch register (if the constant index makes the offset too large)
3045         // and the poisoned `source` could be using the other scratch register.
3046         temps.Release(temp);
3047       }
3048       {
3049         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
3050         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3051         __ Str(source, destination);
3052 
3053         if (!may_need_runtime_call_for_type_check) {
3054           codegen_->MaybeRecordImplicitNullCheck(instruction);
3055         }
3056       }
3057     }
3058 
3059     codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
3060 
3061     if (done.IsLinked()) {
3062       __ Bind(&done);
3063     }
3064 
3065     if (slow_path != nullptr) {
3066       __ Bind(slow_path->GetExitLabel());
3067     }
3068   }
3069 }
3070 
VisitBoundsCheck(HBoundsCheck * instruction)3071 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3072   RegisterSet caller_saves = RegisterSet::Empty();
3073   InvokeRuntimeCallingConvention calling_convention;
3074   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3075   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
3076   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
3077   locations->SetInAt(0, Location::RequiresRegister());
3078   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3079 }
3080 
VisitBoundsCheck(HBoundsCheck * instruction)3081 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3082   BoundsCheckSlowPathARM64* slow_path =
3083       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
3084   codegen_->AddSlowPath(slow_path);
3085   __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
3086   __ B(slow_path->GetEntryLabel(), hs);
3087 }
3088 
VisitClinitCheck(HClinitCheck * check)3089 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
3090   LocationSummary* locations =
3091       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
3092   locations->SetInAt(0, Location::RequiresRegister());
3093   if (check->HasUses()) {
3094     locations->SetOut(Location::SameAsFirstInput());
3095   }
3096 }
3097 
VisitClinitCheck(HClinitCheck * check)3098 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
3099   // We assume the class is not null.
3100   SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(
3101       check->GetLoadClass(), check, check->GetDexPc(), true);
3102   codegen_->AddSlowPath(slow_path);
3103   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
3104 }
3105 
IsFloatingPointZeroConstant(HInstruction * inst)3106 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
3107   return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
3108       || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
3109 }
3110 
GenerateFcmp(HInstruction * instruction)3111 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
3112   FPRegister lhs_reg = InputFPRegisterAt(instruction, 0);
3113   Location rhs_loc = instruction->GetLocations()->InAt(1);
3114   if (rhs_loc.IsConstant()) {
3115     // 0.0 is the only immediate that can be encoded directly in
3116     // an FCMP instruction.
3117     //
3118     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
3119     // specify that in a floating-point comparison, positive zero
3120     // and negative zero are considered equal, so we can use the
3121     // literal 0.0 for both cases here.
3122     //
3123     // Note however that some methods (Float.equal, Float.compare,
3124     // Float.compareTo, Double.equal, Double.compare,
3125     // Double.compareTo, Math.max, Math.min, StrictMath.max,
3126     // StrictMath.min) consider 0.0 to be (strictly) greater than
3127     // -0.0. So if we ever translate calls to these methods into a
3128     // HCompare instruction, we must handle the -0.0 case with
3129     // care here.
3130     DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
3131     __ Fcmp(lhs_reg, 0.0);
3132   } else {
3133     __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
3134   }
3135 }
3136 
VisitCompare(HCompare * compare)3137 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
3138   LocationSummary* locations =
3139       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
3140   DataType::Type in_type = compare->InputAt(0)->GetType();
3141   switch (in_type) {
3142     case DataType::Type::kBool:
3143     case DataType::Type::kUint8:
3144     case DataType::Type::kInt8:
3145     case DataType::Type::kUint16:
3146     case DataType::Type::kInt16:
3147     case DataType::Type::kInt32:
3148     case DataType::Type::kInt64: {
3149       locations->SetInAt(0, Location::RequiresRegister());
3150       locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
3151       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3152       break;
3153     }
3154     case DataType::Type::kFloat32:
3155     case DataType::Type::kFloat64: {
3156       locations->SetInAt(0, Location::RequiresFpuRegister());
3157       locations->SetInAt(1,
3158                          IsFloatingPointZeroConstant(compare->InputAt(1))
3159                              ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
3160                              : Location::RequiresFpuRegister());
3161       locations->SetOut(Location::RequiresRegister());
3162       break;
3163     }
3164     default:
3165       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
3166   }
3167 }
3168 
VisitCompare(HCompare * compare)3169 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
3170   DataType::Type in_type = compare->InputAt(0)->GetType();
3171 
3172   //  0 if: left == right
3173   //  1 if: left  > right
3174   // -1 if: left  < right
3175   switch (in_type) {
3176     case DataType::Type::kBool:
3177     case DataType::Type::kUint8:
3178     case DataType::Type::kInt8:
3179     case DataType::Type::kUint16:
3180     case DataType::Type::kInt16:
3181     case DataType::Type::kInt32:
3182     case DataType::Type::kInt64: {
3183       Register result = OutputRegister(compare);
3184       Register left = InputRegisterAt(compare, 0);
3185       Operand right = InputOperandAt(compare, 1);
3186       __ Cmp(left, right);
3187       __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
3188       __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
3189       break;
3190     }
3191     case DataType::Type::kFloat32:
3192     case DataType::Type::kFloat64: {
3193       Register result = OutputRegister(compare);
3194       GenerateFcmp(compare);
3195       __ Cset(result, ne);
3196       __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
3197       break;
3198     }
3199     default:
3200       LOG(FATAL) << "Unimplemented compare type " << in_type;
3201   }
3202 }
3203 
HandleCondition(HCondition * instruction)3204 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
3205   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
3206 
3207   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3208     locations->SetInAt(0, Location::RequiresFpuRegister());
3209     locations->SetInAt(1,
3210                        IsFloatingPointZeroConstant(instruction->InputAt(1))
3211                            ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
3212                            : Location::RequiresFpuRegister());
3213   } else {
3214     // Integer cases.
3215     locations->SetInAt(0, Location::RequiresRegister());
3216     locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3217   }
3218 
3219   if (!instruction->IsEmittedAtUseSite()) {
3220     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3221   }
3222 }
3223 
HandleCondition(HCondition * instruction)3224 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
3225   if (instruction->IsEmittedAtUseSite()) {
3226     return;
3227   }
3228 
3229   LocationSummary* locations = instruction->GetLocations();
3230   Register res = RegisterFrom(locations->Out(), instruction->GetType());
3231   IfCondition if_cond = instruction->GetCondition();
3232 
3233   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3234     GenerateFcmp(instruction);
3235     __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
3236   } else {
3237     // Integer cases.
3238     Register lhs = InputRegisterAt(instruction, 0);
3239     Operand rhs = InputOperandAt(instruction, 1);
3240     __ Cmp(lhs, rhs);
3241     __ Cset(res, ARM64Condition(if_cond));
3242   }
3243 }
3244 
3245 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
3246   M(Equal)                                                                               \
3247   M(NotEqual)                                                                            \
3248   M(LessThan)                                                                            \
3249   M(LessThanOrEqual)                                                                     \
3250   M(GreaterThan)                                                                         \
3251   M(GreaterThanOrEqual)                                                                  \
3252   M(Below)                                                                               \
3253   M(BelowOrEqual)                                                                        \
3254   M(Above)                                                                               \
3255   M(AboveOrEqual)
3256 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
3257 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
3258 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)3259 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3260 #undef DEFINE_CONDITION_VISITORS
3261 #undef FOR_EACH_CONDITION_INSTRUCTION
3262 
3263 void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3264   DCHECK(instruction->IsDiv() || instruction->IsRem());
3265 
3266   LocationSummary* locations = instruction->GetLocations();
3267   Location second = locations->InAt(1);
3268   DCHECK(second.IsConstant());
3269 
3270   Register out = OutputRegister(instruction);
3271   Register dividend = InputRegisterAt(instruction, 0);
3272   int64_t imm = Int64FromConstant(second.GetConstant());
3273   DCHECK(imm == 1 || imm == -1);
3274 
3275   if (instruction->IsRem()) {
3276     __ Mov(out, 0);
3277   } else {
3278     if (imm == 1) {
3279       __ Mov(out, dividend);
3280     } else {
3281       __ Neg(out, dividend);
3282     }
3283   }
3284 }
3285 
DivRemByPowerOfTwo(HBinaryOperation * instruction)3286 void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
3287   DCHECK(instruction->IsDiv() || instruction->IsRem());
3288 
3289   LocationSummary* locations = instruction->GetLocations();
3290   Location second = locations->InAt(1);
3291   DCHECK(second.IsConstant());
3292 
3293   Register out = OutputRegister(instruction);
3294   Register dividend = InputRegisterAt(instruction, 0);
3295   int64_t imm = Int64FromConstant(second.GetConstant());
3296   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3297   int ctz_imm = CTZ(abs_imm);
3298 
3299   UseScratchRegisterScope temps(GetVIXLAssembler());
3300   Register temp = temps.AcquireSameSizeAs(out);
3301 
3302   if (instruction->IsDiv()) {
3303     __ Add(temp, dividend, abs_imm - 1);
3304     __ Cmp(dividend, 0);
3305     __ Csel(out, temp, dividend, lt);
3306     if (imm > 0) {
3307       __ Asr(out, out, ctz_imm);
3308     } else {
3309       __ Neg(out, Operand(out, ASR, ctz_imm));
3310     }
3311   } else {
3312     int bits = instruction->GetResultType() == DataType::Type::kInt32 ? 32 : 64;
3313     __ Asr(temp, dividend, bits - 1);
3314     __ Lsr(temp, temp, bits - ctz_imm);
3315     __ Add(out, dividend, temp);
3316     __ And(out, out, abs_imm - 1);
3317     __ Sub(out, out, temp);
3318   }
3319 }
3320 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3321 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3322   DCHECK(instruction->IsDiv() || instruction->IsRem());
3323 
3324   LocationSummary* locations = instruction->GetLocations();
3325   Location second = locations->InAt(1);
3326   DCHECK(second.IsConstant());
3327 
3328   Register out = OutputRegister(instruction);
3329   Register dividend = InputRegisterAt(instruction, 0);
3330   int64_t imm = Int64FromConstant(second.GetConstant());
3331 
3332   DataType::Type type = instruction->GetResultType();
3333   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3334 
3335   int64_t magic;
3336   int shift;
3337   CalculateMagicAndShiftForDivRem(
3338       imm, type == DataType::Type::kInt64 /* is_long */, &magic, &shift);
3339 
3340   UseScratchRegisterScope temps(GetVIXLAssembler());
3341   Register temp = temps.AcquireSameSizeAs(out);
3342 
3343   // temp = get_high(dividend * magic)
3344   __ Mov(temp, magic);
3345   if (type == DataType::Type::kInt64) {
3346     __ Smulh(temp, dividend, temp);
3347   } else {
3348     __ Smull(temp.X(), dividend, temp);
3349     __ Lsr(temp.X(), temp.X(), 32);
3350   }
3351 
3352   if (imm > 0 && magic < 0) {
3353     __ Add(temp, temp, dividend);
3354   } else if (imm < 0 && magic > 0) {
3355     __ Sub(temp, temp, dividend);
3356   }
3357 
3358   if (shift != 0) {
3359     __ Asr(temp, temp, shift);
3360   }
3361 
3362   if (instruction->IsDiv()) {
3363     __ Sub(out, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31));
3364   } else {
3365     __ Sub(temp, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31));
3366     // TODO: Strength reduction for msub.
3367     Register temp_imm = temps.AcquireSameSizeAs(out);
3368     __ Mov(temp_imm, imm);
3369     __ Msub(out, temp, temp_imm, dividend);
3370   }
3371 }
3372 
GenerateDivRemIntegral(HBinaryOperation * instruction)3373 void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3374   DCHECK(instruction->IsDiv() || instruction->IsRem());
3375   DataType::Type type = instruction->GetResultType();
3376   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3377 
3378   LocationSummary* locations = instruction->GetLocations();
3379   Register out = OutputRegister(instruction);
3380   Location second = locations->InAt(1);
3381 
3382   if (second.IsConstant()) {
3383     int64_t imm = Int64FromConstant(second.GetConstant());
3384 
3385     if (imm == 0) {
3386       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3387     } else if (imm == 1 || imm == -1) {
3388       DivRemOneOrMinusOne(instruction);
3389     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3390       DivRemByPowerOfTwo(instruction);
3391     } else {
3392       DCHECK(imm <= -2 || imm >= 2);
3393       GenerateDivRemWithAnyConstant(instruction);
3394     }
3395   } else {
3396     Register dividend = InputRegisterAt(instruction, 0);
3397     Register divisor = InputRegisterAt(instruction, 1);
3398     if (instruction->IsDiv()) {
3399       __ Sdiv(out, dividend, divisor);
3400     } else {
3401       UseScratchRegisterScope temps(GetVIXLAssembler());
3402       Register temp = temps.AcquireSameSizeAs(out);
3403       __ Sdiv(temp, dividend, divisor);
3404       __ Msub(out, temp, divisor, dividend);
3405     }
3406   }
3407 }
3408 
VisitDiv(HDiv * div)3409 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3410   LocationSummary* locations =
3411       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3412   switch (div->GetResultType()) {
3413     case DataType::Type::kInt32:
3414     case DataType::Type::kInt64:
3415       locations->SetInAt(0, Location::RequiresRegister());
3416       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3417       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3418       break;
3419 
3420     case DataType::Type::kFloat32:
3421     case DataType::Type::kFloat64:
3422       locations->SetInAt(0, Location::RequiresFpuRegister());
3423       locations->SetInAt(1, Location::RequiresFpuRegister());
3424       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3425       break;
3426 
3427     default:
3428       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3429   }
3430 }
3431 
VisitDiv(HDiv * div)3432 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3433   DataType::Type type = div->GetResultType();
3434   switch (type) {
3435     case DataType::Type::kInt32:
3436     case DataType::Type::kInt64:
3437       GenerateDivRemIntegral(div);
3438       break;
3439 
3440     case DataType::Type::kFloat32:
3441     case DataType::Type::kFloat64:
3442       __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3443       break;
3444 
3445     default:
3446       LOG(FATAL) << "Unexpected div type " << type;
3447   }
3448 }
3449 
VisitDivZeroCheck(HDivZeroCheck * instruction)3450 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3451   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3452   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3453 }
3454 
VisitDivZeroCheck(HDivZeroCheck * instruction)3455 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3456   SlowPathCodeARM64* slow_path =
3457       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction);
3458   codegen_->AddSlowPath(slow_path);
3459   Location value = instruction->GetLocations()->InAt(0);
3460 
3461   DataType::Type type = instruction->GetType();
3462 
3463   if (!DataType::IsIntegralType(type)) {
3464     LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3465     return;
3466   }
3467 
3468   if (value.IsConstant()) {
3469     int64_t divisor = Int64ConstantFrom(value);
3470     if (divisor == 0) {
3471       __ B(slow_path->GetEntryLabel());
3472     } else {
3473       // A division by a non-null constant is valid. We don't need to perform
3474       // any check, so simply fall through.
3475     }
3476   } else {
3477     __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3478   }
3479 }
3480 
VisitDoubleConstant(HDoubleConstant * constant)3481 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3482   LocationSummary* locations =
3483       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3484   locations->SetOut(Location::ConstantLocation(constant));
3485 }
3486 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)3487 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3488     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3489   // Will be generated at use site.
3490 }
3491 
VisitExit(HExit * exit)3492 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3493   exit->SetLocations(nullptr);
3494 }
3495 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)3496 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
3497 }
3498 
VisitFloatConstant(HFloatConstant * constant)3499 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3500   LocationSummary* locations =
3501       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3502   locations->SetOut(Location::ConstantLocation(constant));
3503 }
3504 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)3505 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
3506   // Will be generated at use site.
3507 }
3508 
HandleGoto(HInstruction * got,HBasicBlock * successor)3509 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3510   if (successor->IsExitBlock()) {
3511     DCHECK(got->GetPrevious()->AlwaysThrows());
3512     return;  // no code needed
3513   }
3514 
3515   HBasicBlock* block = got->GetBlock();
3516   HInstruction* previous = got->GetPrevious();
3517   HLoopInformation* info = block->GetLoopInformation();
3518 
3519   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3520     if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
3521       UseScratchRegisterScope temps(GetVIXLAssembler());
3522       Register temp1 = temps.AcquireX();
3523       Register temp2 = temps.AcquireX();
3524       __ Ldr(temp1, MemOperand(sp, 0));
3525       __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
3526       __ Add(temp2, temp2, 1);
3527       __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
3528     }
3529     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3530     return;
3531   }
3532   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3533     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3534     codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
3535   }
3536   if (!codegen_->GoesToNextBlock(block, successor)) {
3537     __ B(codegen_->GetLabelOf(successor));
3538   }
3539 }
3540 
VisitGoto(HGoto * got)3541 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3542   got->SetLocations(nullptr);
3543 }
3544 
VisitGoto(HGoto * got)3545 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3546   HandleGoto(got, got->GetSuccessor());
3547 }
3548 
VisitTryBoundary(HTryBoundary * try_boundary)3549 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3550   try_boundary->SetLocations(nullptr);
3551 }
3552 
VisitTryBoundary(HTryBoundary * try_boundary)3553 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3554   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3555   if (!successor->IsExitBlock()) {
3556     HandleGoto(try_boundary, successor);
3557   }
3558 }
3559 
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl::aarch64::Label * true_target,vixl::aarch64::Label * false_target)3560 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3561                                                           size_t condition_input_index,
3562                                                           vixl::aarch64::Label* true_target,
3563                                                           vixl::aarch64::Label* false_target) {
3564   HInstruction* cond = instruction->InputAt(condition_input_index);
3565 
3566   if (true_target == nullptr && false_target == nullptr) {
3567     // Nothing to do. The code always falls through.
3568     return;
3569   } else if (cond->IsIntConstant()) {
3570     // Constant condition, statically compared against "true" (integer value 1).
3571     if (cond->AsIntConstant()->IsTrue()) {
3572       if (true_target != nullptr) {
3573         __ B(true_target);
3574       }
3575     } else {
3576       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3577       if (false_target != nullptr) {
3578         __ B(false_target);
3579       }
3580     }
3581     return;
3582   }
3583 
3584   // The following code generates these patterns:
3585   //  (1) true_target == nullptr && false_target != nullptr
3586   //        - opposite condition true => branch to false_target
3587   //  (2) true_target != nullptr && false_target == nullptr
3588   //        - condition true => branch to true_target
3589   //  (3) true_target != nullptr && false_target != nullptr
3590   //        - condition true => branch to true_target
3591   //        - branch to false_target
3592   if (IsBooleanValueOrMaterializedCondition(cond)) {
3593     // The condition instruction has been materialized, compare the output to 0.
3594     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3595     DCHECK(cond_val.IsRegister());
3596       if (true_target == nullptr) {
3597       __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3598     } else {
3599       __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3600     }
3601   } else {
3602     // The condition instruction has not been materialized, use its inputs as
3603     // the comparison and its condition as the branch condition.
3604     HCondition* condition = cond->AsCondition();
3605 
3606     DataType::Type type = condition->InputAt(0)->GetType();
3607     if (DataType::IsFloatingPointType(type)) {
3608       GenerateFcmp(condition);
3609       if (true_target == nullptr) {
3610         IfCondition opposite_condition = condition->GetOppositeCondition();
3611         __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3612       } else {
3613         __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3614       }
3615     } else {
3616       // Integer cases.
3617       Register lhs = InputRegisterAt(condition, 0);
3618       Operand rhs = InputOperandAt(condition, 1);
3619 
3620       Condition arm64_cond;
3621       vixl::aarch64::Label* non_fallthrough_target;
3622       if (true_target == nullptr) {
3623         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3624         non_fallthrough_target = false_target;
3625       } else {
3626         arm64_cond = ARM64Condition(condition->GetCondition());
3627         non_fallthrough_target = true_target;
3628       }
3629 
3630       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3631           rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3632         switch (arm64_cond) {
3633           case eq:
3634             __ Cbz(lhs, non_fallthrough_target);
3635             break;
3636           case ne:
3637             __ Cbnz(lhs, non_fallthrough_target);
3638             break;
3639           case lt:
3640             // Test the sign bit and branch accordingly.
3641             __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3642             break;
3643           case ge:
3644             // Test the sign bit and branch accordingly.
3645             __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3646             break;
3647           default:
3648             // Without the `static_cast` the compiler throws an error for
3649             // `-Werror=sign-promo`.
3650             LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3651         }
3652       } else {
3653         __ Cmp(lhs, rhs);
3654         __ B(arm64_cond, non_fallthrough_target);
3655       }
3656     }
3657   }
3658 
3659   // If neither branch falls through (case 3), the conditional branch to `true_target`
3660   // was already emitted (case 2) and we need to emit a jump to `false_target`.
3661   if (true_target != nullptr && false_target != nullptr) {
3662     __ B(false_target);
3663   }
3664 }
3665 
VisitIf(HIf * if_instr)3666 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3667   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3668   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3669     locations->SetInAt(0, Location::RequiresRegister());
3670   }
3671 }
3672 
VisitIf(HIf * if_instr)3673 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3674   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3675   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3676   vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3677   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3678     true_target = nullptr;
3679   }
3680   vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3681   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3682     false_target = nullptr;
3683   }
3684   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
3685 }
3686 
VisitDeoptimize(HDeoptimize * deoptimize)3687 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3688   LocationSummary* locations = new (GetGraph()->GetAllocator())
3689       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3690   InvokeRuntimeCallingConvention calling_convention;
3691   RegisterSet caller_saves = RegisterSet::Empty();
3692   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3693   locations->SetCustomSlowPathCallerSaves(caller_saves);
3694   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3695     locations->SetInAt(0, Location::RequiresRegister());
3696   }
3697 }
3698 
VisitDeoptimize(HDeoptimize * deoptimize)3699 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3700   SlowPathCodeARM64* slow_path =
3701       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3702   GenerateTestAndBranch(deoptimize,
3703                         /* condition_input_index */ 0,
3704                         slow_path->GetEntryLabel(),
3705                         /* false_target */ nullptr);
3706 }
3707 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3708 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3709   LocationSummary* locations = new (GetGraph()->GetAllocator())
3710       LocationSummary(flag, LocationSummary::kNoCall);
3711   locations->SetOut(Location::RequiresRegister());
3712 }
3713 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3714 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3715   __ Ldr(OutputRegister(flag),
3716          MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3717 }
3718 
IsConditionOnFloatingPointValues(HInstruction * condition)3719 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3720   return condition->IsCondition() &&
3721          DataType::IsFloatingPointType(condition->InputAt(0)->GetType());
3722 }
3723 
GetConditionForSelect(HCondition * condition)3724 static inline Condition GetConditionForSelect(HCondition* condition) {
3725   IfCondition cond = condition->AsCondition()->GetCondition();
3726   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3727                                                      : ARM64Condition(cond);
3728 }
3729 
VisitSelect(HSelect * select)3730 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
3731   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3732   if (DataType::IsFloatingPointType(select->GetType())) {
3733     locations->SetInAt(0, Location::RequiresFpuRegister());
3734     locations->SetInAt(1, Location::RequiresFpuRegister());
3735     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3736   } else {
3737     HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
3738     HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
3739     bool is_true_value_constant = cst_true_value != nullptr;
3740     bool is_false_value_constant = cst_false_value != nullptr;
3741     // Ask VIXL whether we should synthesize constants in registers.
3742     // We give an arbitrary register to VIXL when dealing with non-constant inputs.
3743     Operand true_op = is_true_value_constant ?
3744         Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
3745     Operand false_op = is_false_value_constant ?
3746         Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
3747     bool true_value_in_register = false;
3748     bool false_value_in_register = false;
3749     MacroAssembler::GetCselSynthesisInformation(
3750         x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
3751     true_value_in_register |= !is_true_value_constant;
3752     false_value_in_register |= !is_false_value_constant;
3753 
3754     locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
3755                                                  : Location::ConstantLocation(cst_true_value));
3756     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
3757                                                   : Location::ConstantLocation(cst_false_value));
3758     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3759   }
3760 
3761   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3762     locations->SetInAt(2, Location::RequiresRegister());
3763   }
3764 }
3765 
VisitSelect(HSelect * select)3766 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
3767   HInstruction* cond = select->GetCondition();
3768   Condition csel_cond;
3769 
3770   if (IsBooleanValueOrMaterializedCondition(cond)) {
3771     if (cond->IsCondition() && cond->GetNext() == select) {
3772       // Use the condition flags set by the previous instruction.
3773       csel_cond = GetConditionForSelect(cond->AsCondition());
3774     } else {
3775       __ Cmp(InputRegisterAt(select, 2), 0);
3776       csel_cond = ne;
3777     }
3778   } else if (IsConditionOnFloatingPointValues(cond)) {
3779     GenerateFcmp(cond);
3780     csel_cond = GetConditionForSelect(cond->AsCondition());
3781   } else {
3782     __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
3783     csel_cond = GetConditionForSelect(cond->AsCondition());
3784   }
3785 
3786   if (DataType::IsFloatingPointType(select->GetType())) {
3787     __ Fcsel(OutputFPRegister(select),
3788              InputFPRegisterAt(select, 1),
3789              InputFPRegisterAt(select, 0),
3790              csel_cond);
3791   } else {
3792     __ Csel(OutputRegister(select),
3793             InputOperandAt(select, 1),
3794             InputOperandAt(select, 0),
3795             csel_cond);
3796   }
3797 }
3798 
VisitNativeDebugInfo(HNativeDebugInfo * info)3799 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
3800   new (GetGraph()->GetAllocator()) LocationSummary(info);
3801 }
3802 
VisitNativeDebugInfo(HNativeDebugInfo *)3803 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
3804   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
3805 }
3806 
GenerateNop()3807 void CodeGeneratorARM64::GenerateNop() {
3808   __ Nop();
3809 }
3810 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3811 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3812   HandleFieldGet(instruction, instruction->GetFieldInfo());
3813 }
3814 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3815 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3816   HandleFieldGet(instruction, instruction->GetFieldInfo());
3817 }
3818 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3819 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3820   HandleFieldSet(instruction);
3821 }
3822 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3823 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3824   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
3825 }
3826 
3827 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)3828 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
3829   if (kEmitCompilerReadBarrier &&
3830       (kUseBakerReadBarrier ||
3831           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
3832           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
3833           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
3834     return 1;
3835   }
3836   return 0;
3837 }
3838 
3839 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
3840 // interface pointer, one for loading the current interface.
3841 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)3842 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
3843   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
3844     return 3;
3845   }
3846   return 1 + NumberOfInstanceOfTemps(type_check_kind);
3847 }
3848 
VisitInstanceOf(HInstanceOf * instruction)3849 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
3850   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
3851   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3852   bool baker_read_barrier_slow_path = false;
3853   switch (type_check_kind) {
3854     case TypeCheckKind::kExactCheck:
3855     case TypeCheckKind::kAbstractClassCheck:
3856     case TypeCheckKind::kClassHierarchyCheck:
3857     case TypeCheckKind::kArrayObjectCheck: {
3858       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
3859       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
3860       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
3861       break;
3862     }
3863     case TypeCheckKind::kArrayCheck:
3864     case TypeCheckKind::kUnresolvedCheck:
3865     case TypeCheckKind::kInterfaceCheck:
3866       call_kind = LocationSummary::kCallOnSlowPath;
3867       break;
3868   }
3869 
3870   LocationSummary* locations =
3871       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
3872   if (baker_read_barrier_slow_path) {
3873     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3874   }
3875   locations->SetInAt(0, Location::RequiresRegister());
3876   locations->SetInAt(1, Location::RequiresRegister());
3877   // The "out" register is used as a temporary, so it overlaps with the inputs.
3878   // Note that TypeCheckSlowPathARM64 uses this register too.
3879   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3880   // Add temps if necessary for read barriers.
3881   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
3882 }
3883 
VisitInstanceOf(HInstanceOf * instruction)3884 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
3885   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3886   LocationSummary* locations = instruction->GetLocations();
3887   Location obj_loc = locations->InAt(0);
3888   Register obj = InputRegisterAt(instruction, 0);
3889   Register cls = InputRegisterAt(instruction, 1);
3890   Location out_loc = locations->Out();
3891   Register out = OutputRegister(instruction);
3892   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
3893   DCHECK_LE(num_temps, 1u);
3894   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
3895   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3896   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3897   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3898   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3899 
3900   vixl::aarch64::Label done, zero;
3901   SlowPathCodeARM64* slow_path = nullptr;
3902 
3903   // Return 0 if `obj` is null.
3904   // Avoid null check if we know `obj` is not null.
3905   if (instruction->MustDoNullCheck()) {
3906     __ Cbz(obj, &zero);
3907   }
3908 
3909   switch (type_check_kind) {
3910     case TypeCheckKind::kExactCheck: {
3911       ReadBarrierOption read_barrier_option =
3912           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3913       // /* HeapReference<Class> */ out = obj->klass_
3914       GenerateReferenceLoadTwoRegisters(instruction,
3915                                         out_loc,
3916                                         obj_loc,
3917                                         class_offset,
3918                                         maybe_temp_loc,
3919                                         read_barrier_option);
3920       __ Cmp(out, cls);
3921       __ Cset(out, eq);
3922       if (zero.IsLinked()) {
3923         __ B(&done);
3924       }
3925       break;
3926     }
3927 
3928     case TypeCheckKind::kAbstractClassCheck: {
3929       ReadBarrierOption read_barrier_option =
3930           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3931       // /* HeapReference<Class> */ out = obj->klass_
3932       GenerateReferenceLoadTwoRegisters(instruction,
3933                                         out_loc,
3934                                         obj_loc,
3935                                         class_offset,
3936                                         maybe_temp_loc,
3937                                         read_barrier_option);
3938       // If the class is abstract, we eagerly fetch the super class of the
3939       // object to avoid doing a comparison we know will fail.
3940       vixl::aarch64::Label loop, success;
3941       __ Bind(&loop);
3942       // /* HeapReference<Class> */ out = out->super_class_
3943       GenerateReferenceLoadOneRegister(instruction,
3944                                        out_loc,
3945                                        super_offset,
3946                                        maybe_temp_loc,
3947                                        read_barrier_option);
3948       // If `out` is null, we use it for the result, and jump to `done`.
3949       __ Cbz(out, &done);
3950       __ Cmp(out, cls);
3951       __ B(ne, &loop);
3952       __ Mov(out, 1);
3953       if (zero.IsLinked()) {
3954         __ B(&done);
3955       }
3956       break;
3957     }
3958 
3959     case TypeCheckKind::kClassHierarchyCheck: {
3960       ReadBarrierOption read_barrier_option =
3961           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3962       // /* HeapReference<Class> */ out = obj->klass_
3963       GenerateReferenceLoadTwoRegisters(instruction,
3964                                         out_loc,
3965                                         obj_loc,
3966                                         class_offset,
3967                                         maybe_temp_loc,
3968                                         read_barrier_option);
3969       // Walk over the class hierarchy to find a match.
3970       vixl::aarch64::Label loop, success;
3971       __ Bind(&loop);
3972       __ Cmp(out, cls);
3973       __ B(eq, &success);
3974       // /* HeapReference<Class> */ out = out->super_class_
3975       GenerateReferenceLoadOneRegister(instruction,
3976                                        out_loc,
3977                                        super_offset,
3978                                        maybe_temp_loc,
3979                                        read_barrier_option);
3980       __ Cbnz(out, &loop);
3981       // If `out` is null, we use it for the result, and jump to `done`.
3982       __ B(&done);
3983       __ Bind(&success);
3984       __ Mov(out, 1);
3985       if (zero.IsLinked()) {
3986         __ B(&done);
3987       }
3988       break;
3989     }
3990 
3991     case TypeCheckKind::kArrayObjectCheck: {
3992       ReadBarrierOption read_barrier_option =
3993           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3994       // /* HeapReference<Class> */ out = obj->klass_
3995       GenerateReferenceLoadTwoRegisters(instruction,
3996                                         out_loc,
3997                                         obj_loc,
3998                                         class_offset,
3999                                         maybe_temp_loc,
4000                                         read_barrier_option);
4001       // Do an exact check.
4002       vixl::aarch64::Label exact_check;
4003       __ Cmp(out, cls);
4004       __ B(eq, &exact_check);
4005       // Otherwise, we need to check that the object's class is a non-primitive array.
4006       // /* HeapReference<Class> */ out = out->component_type_
4007       GenerateReferenceLoadOneRegister(instruction,
4008                                        out_loc,
4009                                        component_offset,
4010                                        maybe_temp_loc,
4011                                        read_barrier_option);
4012       // If `out` is null, we use it for the result, and jump to `done`.
4013       __ Cbz(out, &done);
4014       __ Ldrh(out, HeapOperand(out, primitive_offset));
4015       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4016       __ Cbnz(out, &zero);
4017       __ Bind(&exact_check);
4018       __ Mov(out, 1);
4019       __ B(&done);
4020       break;
4021     }
4022 
4023     case TypeCheckKind::kArrayCheck: {
4024       // No read barrier since the slow path will retry upon failure.
4025       // /* HeapReference<Class> */ out = obj->klass_
4026       GenerateReferenceLoadTwoRegisters(instruction,
4027                                         out_loc,
4028                                         obj_loc,
4029                                         class_offset,
4030                                         maybe_temp_loc,
4031                                         kWithoutReadBarrier);
4032       __ Cmp(out, cls);
4033       DCHECK(locations->OnlyCallsOnSlowPath());
4034       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4035           instruction, /* is_fatal */ false);
4036       codegen_->AddSlowPath(slow_path);
4037       __ B(ne, slow_path->GetEntryLabel());
4038       __ Mov(out, 1);
4039       if (zero.IsLinked()) {
4040         __ B(&done);
4041       }
4042       break;
4043     }
4044 
4045     case TypeCheckKind::kUnresolvedCheck:
4046     case TypeCheckKind::kInterfaceCheck: {
4047       // Note that we indeed only call on slow path, but we always go
4048       // into the slow path for the unresolved and interface check
4049       // cases.
4050       //
4051       // We cannot directly call the InstanceofNonTrivial runtime
4052       // entry point without resorting to a type checking slow path
4053       // here (i.e. by calling InvokeRuntime directly), as it would
4054       // require to assign fixed registers for the inputs of this
4055       // HInstanceOf instruction (following the runtime calling
4056       // convention), which might be cluttered by the potential first
4057       // read barrier emission at the beginning of this method.
4058       //
4059       // TODO: Introduce a new runtime entry point taking the object
4060       // to test (instead of its class) as argument, and let it deal
4061       // with the read barrier issues. This will let us refactor this
4062       // case of the `switch` code as it was previously (with a direct
4063       // call to the runtime not using a type checking slow path).
4064       // This should also be beneficial for the other cases above.
4065       DCHECK(locations->OnlyCallsOnSlowPath());
4066       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4067           instruction, /* is_fatal */ false);
4068       codegen_->AddSlowPath(slow_path);
4069       __ B(slow_path->GetEntryLabel());
4070       if (zero.IsLinked()) {
4071         __ B(&done);
4072       }
4073       break;
4074     }
4075   }
4076 
4077   if (zero.IsLinked()) {
4078     __ Bind(&zero);
4079     __ Mov(out, 0);
4080   }
4081 
4082   if (done.IsLinked()) {
4083     __ Bind(&done);
4084   }
4085 
4086   if (slow_path != nullptr) {
4087     __ Bind(slow_path->GetExitLabel());
4088   }
4089 }
4090 
VisitCheckCast(HCheckCast * instruction)4091 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
4092   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4093   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
4094   LocationSummary* locations =
4095       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4096   locations->SetInAt(0, Location::RequiresRegister());
4097   locations->SetInAt(1, Location::RequiresRegister());
4098   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
4099   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
4100 }
4101 
VisitCheckCast(HCheckCast * instruction)4102 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
4103   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4104   LocationSummary* locations = instruction->GetLocations();
4105   Location obj_loc = locations->InAt(0);
4106   Register obj = InputRegisterAt(instruction, 0);
4107   Register cls = InputRegisterAt(instruction, 1);
4108   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
4109   DCHECK_GE(num_temps, 1u);
4110   DCHECK_LE(num_temps, 3u);
4111   Location temp_loc = locations->GetTemp(0);
4112   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
4113   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
4114   Register temp = WRegisterFrom(temp_loc);
4115   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4116   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4117   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4118   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4119   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4120   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4121   const uint32_t object_array_data_offset =
4122       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4123 
4124   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
4125   SlowPathCodeARM64* type_check_slow_path =
4126       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4127           instruction, is_type_check_slow_path_fatal);
4128   codegen_->AddSlowPath(type_check_slow_path);
4129 
4130   vixl::aarch64::Label done;
4131   // Avoid null check if we know obj is not null.
4132   if (instruction->MustDoNullCheck()) {
4133     __ Cbz(obj, &done);
4134   }
4135 
4136   switch (type_check_kind) {
4137     case TypeCheckKind::kExactCheck:
4138     case TypeCheckKind::kArrayCheck: {
4139       // /* HeapReference<Class> */ temp = obj->klass_
4140       GenerateReferenceLoadTwoRegisters(instruction,
4141                                         temp_loc,
4142                                         obj_loc,
4143                                         class_offset,
4144                                         maybe_temp2_loc,
4145                                         kWithoutReadBarrier);
4146 
4147       __ Cmp(temp, cls);
4148       // Jump to slow path for throwing the exception or doing a
4149       // more involved array check.
4150       __ B(ne, type_check_slow_path->GetEntryLabel());
4151       break;
4152     }
4153 
4154     case TypeCheckKind::kAbstractClassCheck: {
4155       // /* HeapReference<Class> */ temp = obj->klass_
4156       GenerateReferenceLoadTwoRegisters(instruction,
4157                                         temp_loc,
4158                                         obj_loc,
4159                                         class_offset,
4160                                         maybe_temp2_loc,
4161                                         kWithoutReadBarrier);
4162 
4163       // If the class is abstract, we eagerly fetch the super class of the
4164       // object to avoid doing a comparison we know will fail.
4165       vixl::aarch64::Label loop;
4166       __ Bind(&loop);
4167       // /* HeapReference<Class> */ temp = temp->super_class_
4168       GenerateReferenceLoadOneRegister(instruction,
4169                                        temp_loc,
4170                                        super_offset,
4171                                        maybe_temp2_loc,
4172                                        kWithoutReadBarrier);
4173 
4174       // If the class reference currently in `temp` is null, jump to the slow path to throw the
4175       // exception.
4176       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4177       // Otherwise, compare classes.
4178       __ Cmp(temp, cls);
4179       __ B(ne, &loop);
4180       break;
4181     }
4182 
4183     case TypeCheckKind::kClassHierarchyCheck: {
4184       // /* HeapReference<Class> */ temp = obj->klass_
4185       GenerateReferenceLoadTwoRegisters(instruction,
4186                                         temp_loc,
4187                                         obj_loc,
4188                                         class_offset,
4189                                         maybe_temp2_loc,
4190                                         kWithoutReadBarrier);
4191 
4192       // Walk over the class hierarchy to find a match.
4193       vixl::aarch64::Label loop;
4194       __ Bind(&loop);
4195       __ Cmp(temp, cls);
4196       __ B(eq, &done);
4197 
4198       // /* HeapReference<Class> */ temp = temp->super_class_
4199       GenerateReferenceLoadOneRegister(instruction,
4200                                        temp_loc,
4201                                        super_offset,
4202                                        maybe_temp2_loc,
4203                                        kWithoutReadBarrier);
4204 
4205       // If the class reference currently in `temp` is not null, jump
4206       // back at the beginning of the loop.
4207       __ Cbnz(temp, &loop);
4208       // Otherwise, jump to the slow path to throw the exception.
4209       __ B(type_check_slow_path->GetEntryLabel());
4210       break;
4211     }
4212 
4213     case TypeCheckKind::kArrayObjectCheck: {
4214       // /* HeapReference<Class> */ temp = obj->klass_
4215       GenerateReferenceLoadTwoRegisters(instruction,
4216                                         temp_loc,
4217                                         obj_loc,
4218                                         class_offset,
4219                                         maybe_temp2_loc,
4220                                         kWithoutReadBarrier);
4221 
4222       // Do an exact check.
4223       __ Cmp(temp, cls);
4224       __ B(eq, &done);
4225 
4226       // Otherwise, we need to check that the object's class is a non-primitive array.
4227       // /* HeapReference<Class> */ temp = temp->component_type_
4228       GenerateReferenceLoadOneRegister(instruction,
4229                                        temp_loc,
4230                                        component_offset,
4231                                        maybe_temp2_loc,
4232                                        kWithoutReadBarrier);
4233 
4234       // If the component type is null, jump to the slow path to throw the exception.
4235       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4236       // Otherwise, the object is indeed an array. Further check that this component type is not a
4237       // primitive type.
4238       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
4239       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4240       __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
4241       break;
4242     }
4243 
4244     case TypeCheckKind::kUnresolvedCheck:
4245       // We always go into the type check slow path for the unresolved check cases.
4246       //
4247       // We cannot directly call the CheckCast runtime entry point
4248       // without resorting to a type checking slow path here (i.e. by
4249       // calling InvokeRuntime directly), as it would require to
4250       // assign fixed registers for the inputs of this HInstanceOf
4251       // instruction (following the runtime calling convention), which
4252       // might be cluttered by the potential first read barrier
4253       // emission at the beginning of this method.
4254       __ B(type_check_slow_path->GetEntryLabel());
4255       break;
4256     case TypeCheckKind::kInterfaceCheck: {
4257       // /* HeapReference<Class> */ temp = obj->klass_
4258       GenerateReferenceLoadTwoRegisters(instruction,
4259                                         temp_loc,
4260                                         obj_loc,
4261                                         class_offset,
4262                                         maybe_temp2_loc,
4263                                         kWithoutReadBarrier);
4264 
4265       // /* HeapReference<Class> */ temp = temp->iftable_
4266       GenerateReferenceLoadTwoRegisters(instruction,
4267                                         temp_loc,
4268                                         temp_loc,
4269                                         iftable_offset,
4270                                         maybe_temp2_loc,
4271                                         kWithoutReadBarrier);
4272       // Iftable is never null.
4273       __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4274       // Loop through the iftable and check if any class matches.
4275       vixl::aarch64::Label start_loop;
4276       __ Bind(&start_loop);
4277       __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4278       __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4279       GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4280       // Go to next interface.
4281       __ Add(temp, temp, 2 * kHeapReferenceSize);
4282       __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4283       // Compare the classes and continue the loop if they do not match.
4284       __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4285       __ B(ne, &start_loop);
4286       break;
4287     }
4288   }
4289   __ Bind(&done);
4290 
4291   __ Bind(type_check_slow_path->GetExitLabel());
4292 }
4293 
VisitIntConstant(HIntConstant * constant)4294 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4295   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4296   locations->SetOut(Location::ConstantLocation(constant));
4297 }
4298 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)4299 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
4300   // Will be generated at use site.
4301 }
4302 
VisitNullConstant(HNullConstant * constant)4303 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4304   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4305   locations->SetOut(Location::ConstantLocation(constant));
4306 }
4307 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)4308 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
4309   // Will be generated at use site.
4310 }
4311 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4312 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4313   // The trampoline uses the same calling convention as dex calling conventions,
4314   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4315   // the method_idx.
4316   HandleInvoke(invoke);
4317 }
4318 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4319 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4320   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4321   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4322 }
4323 
HandleInvoke(HInvoke * invoke)4324 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4325   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4326   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4327 }
4328 
VisitInvokeInterface(HInvokeInterface * invoke)4329 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4330   HandleInvoke(invoke);
4331 }
4332 
VisitInvokeInterface(HInvokeInterface * invoke)4333 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4334   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4335   LocationSummary* locations = invoke->GetLocations();
4336   Register temp = XRegisterFrom(locations->GetTemp(0));
4337   Location receiver = locations->InAt(0);
4338   Offset class_offset = mirror::Object::ClassOffset();
4339   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4340 
4341   // The register ip1 is required to be used for the hidden argument in
4342   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4343   MacroAssembler* masm = GetVIXLAssembler();
4344   UseScratchRegisterScope scratch_scope(masm);
4345   scratch_scope.Exclude(ip1);
4346   __ Mov(ip1, invoke->GetDexMethodIndex());
4347 
4348   // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4349   if (receiver.IsStackSlot()) {
4350     __ Ldr(temp.W(), StackOperandFrom(receiver));
4351     {
4352       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4353       // /* HeapReference<Class> */ temp = temp->klass_
4354       __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4355       codegen_->MaybeRecordImplicitNullCheck(invoke);
4356     }
4357   } else {
4358     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4359     // /* HeapReference<Class> */ temp = receiver->klass_
4360     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4361     codegen_->MaybeRecordImplicitNullCheck(invoke);
4362   }
4363 
4364   // Instead of simply (possibly) unpoisoning `temp` here, we should
4365   // emit a read barrier for the previous class reference load.
4366   // However this is not required in practice, as this is an
4367   // intermediate/temporary reference and because the current
4368   // concurrent copying collector keeps the from-space memory
4369   // intact/accessible until the end of the marking phase (the
4370   // concurrent copying collector may not in the future).
4371   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4372   __ Ldr(temp,
4373       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4374   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4375       invoke->GetImtIndex(), kArm64PointerSize));
4376   // temp = temp->GetImtEntryAt(method_offset);
4377   __ Ldr(temp, MemOperand(temp, method_offset));
4378   // lr = temp->GetEntryPoint();
4379   __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4380 
4381   {
4382     // Ensure the pc position is recorded immediately after the `blr` instruction.
4383     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4384 
4385     // lr();
4386     __ blr(lr);
4387     DCHECK(!codegen_->IsLeafMethod());
4388     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4389   }
4390 
4391   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4392 }
4393 
VisitInvokeVirtual(HInvokeVirtual * invoke)4394 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4395   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4396   if (intrinsic.TryDispatch(invoke)) {
4397     return;
4398   }
4399 
4400   HandleInvoke(invoke);
4401 }
4402 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4403 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4404   // Explicit clinit checks triggered by static invokes must have been pruned by
4405   // art::PrepareForRegisterAllocation.
4406   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4407 
4408   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4409   if (intrinsic.TryDispatch(invoke)) {
4410     return;
4411   }
4412 
4413   HandleInvoke(invoke);
4414 }
4415 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARM64 * codegen)4416 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4417   if (invoke->GetLocations()->Intrinsified()) {
4418     IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4419     intrinsic.Dispatch(invoke);
4420     return true;
4421   }
4422   return false;
4423 }
4424 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,HInvokeStaticOrDirect * invoke ATTRIBUTE_UNUSED)4425 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4426       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4427       HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
4428   // On ARM64 we support all dispatch types.
4429   return desired_dispatch_info;
4430 }
4431 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4432 void CodeGeneratorARM64::GenerateStaticOrDirectCall(
4433     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4434   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4435   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
4436   switch (invoke->GetMethodLoadKind()) {
4437     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4438       uint32_t offset =
4439           GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4440       // temp = thread->string_init_entrypoint
4441       __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4442       break;
4443     }
4444     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4445       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4446       break;
4447     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
4448       DCHECK(GetCompilerOptions().IsBootImage());
4449       // Add ADRP with its PC-relative method patch.
4450       vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod());
4451       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4452       // Add ADD with its PC-relative method patch.
4453       vixl::aarch64::Label* add_label =
4454           NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label);
4455       EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
4456       break;
4457     }
4458     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
4459       // Load method address from literal pool.
4460       __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
4461       break;
4462     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
4463       // Add ADRP with its PC-relative DexCache access patch.
4464       MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
4465       vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
4466       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4467       // Add LDR with its PC-relative DexCache access patch.
4468       vixl::aarch64::Label* ldr_label =
4469           NewMethodBssEntryPatch(target_method, adrp_label);
4470       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4471       break;
4472     }
4473     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
4474       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4475       return;  // No code pointer retrieval; the runtime performs the call directly.
4476     }
4477   }
4478 
4479   switch (invoke->GetCodePtrLocation()) {
4480     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4481       {
4482         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4483         ExactAssemblyScope eas(GetVIXLAssembler(),
4484                                kInstructionSize,
4485                                CodeBufferCheckScope::kExactSize);
4486         __ bl(&frame_entry_label_);
4487         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4488       }
4489       break;
4490     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4491       // LR = callee_method->entry_point_from_quick_compiled_code_;
4492       __ Ldr(lr, MemOperand(
4493           XRegisterFrom(callee_method),
4494           ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
4495       {
4496         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4497         ExactAssemblyScope eas(GetVIXLAssembler(),
4498                                kInstructionSize,
4499                                CodeBufferCheckScope::kExactSize);
4500         // lr()
4501         __ blr(lr);
4502         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4503       }
4504       break;
4505   }
4506 
4507   DCHECK(!IsLeafMethod());
4508 }
4509 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)4510 void CodeGeneratorARM64::GenerateVirtualCall(
4511     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
4512   // Use the calling convention instead of the location of the receiver, as
4513   // intrinsics may have put the receiver in a different register. In the intrinsics
4514   // slow path, the arguments have been moved to the right place, so here we are
4515   // guaranteed that the receiver is the first register of the calling convention.
4516   InvokeDexCallingConvention calling_convention;
4517   Register receiver = calling_convention.GetRegisterAt(0);
4518   Register temp = XRegisterFrom(temp_in);
4519   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4520       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
4521   Offset class_offset = mirror::Object::ClassOffset();
4522   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4523 
4524   DCHECK(receiver.IsRegister());
4525 
4526   {
4527     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4528     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4529     // /* HeapReference<Class> */ temp = receiver->klass_
4530     __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
4531     MaybeRecordImplicitNullCheck(invoke);
4532   }
4533   // Instead of simply (possibly) unpoisoning `temp` here, we should
4534   // emit a read barrier for the previous class reference load.
4535   // intermediate/temporary reference and because the current
4536   // concurrent copying collector keeps the from-space memory
4537   // intact/accessible until the end of the marking phase (the
4538   // concurrent copying collector may not in the future).
4539   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4540   // temp = temp->GetMethodAt(method_offset);
4541   __ Ldr(temp, MemOperand(temp, method_offset));
4542   // lr = temp->GetEntryPoint();
4543   __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
4544   {
4545     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4546     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4547     // lr();
4548     __ blr(lr);
4549     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4550   }
4551 }
4552 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4553 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4554   HandleInvoke(invoke);
4555 }
4556 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4557 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4558   codegen_->GenerateInvokePolymorphicCall(invoke);
4559   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4560 }
4561 
NewBootImageMethodPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4562 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
4563     MethodReference target_method,
4564     vixl::aarch64::Label* adrp_label) {
4565   return NewPcRelativePatch(
4566       target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_);
4567 }
4568 
NewMethodBssEntryPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4569 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
4570     MethodReference target_method,
4571     vixl::aarch64::Label* adrp_label) {
4572   return NewPcRelativePatch(
4573       target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_);
4574 }
4575 
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4576 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch(
4577     const DexFile& dex_file,
4578     dex::TypeIndex type_index,
4579     vixl::aarch64::Label* adrp_label) {
4580   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_);
4581 }
4582 
NewBssEntryTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4583 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
4584     const DexFile& dex_file,
4585     dex::TypeIndex type_index,
4586     vixl::aarch64::Label* adrp_label) {
4587   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
4588 }
4589 
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4590 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch(
4591     const DexFile& dex_file,
4592     dex::StringIndex string_index,
4593     vixl::aarch64::Label* adrp_label) {
4594   return NewPcRelativePatch(
4595       &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_);
4596 }
4597 
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4598 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
4599     const DexFile& dex_file,
4600     dex::StringIndex string_index,
4601     vixl::aarch64::Label* adrp_label) {
4602   return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
4603 }
4604 
NewBakerReadBarrierPatch(uint32_t custom_data)4605 vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
4606   baker_read_barrier_patches_.emplace_back(custom_data);
4607   return &baker_read_barrier_patches_.back().label;
4608 }
4609 
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,vixl::aarch64::Label * adrp_label,ArenaDeque<PcRelativePatchInfo> * patches)4610 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
4611     const DexFile* dex_file,
4612     uint32_t offset_or_index,
4613     vixl::aarch64::Label* adrp_label,
4614     ArenaDeque<PcRelativePatchInfo>* patches) {
4615   // Add a patch entry and return the label.
4616   patches->emplace_back(dex_file, offset_or_index);
4617   PcRelativePatchInfo* info = &patches->back();
4618   vixl::aarch64::Label* label = &info->label;
4619   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
4620   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
4621   return label;
4622 }
4623 
DeduplicateBootImageAddressLiteral(uint64_t address)4624 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
4625     uint64_t address) {
4626   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
4627 }
4628 
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)4629 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
4630     const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
4631   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
4632   return jit_string_patches_.GetOrCreate(
4633       StringReference(&dex_file, string_index),
4634       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4635 }
4636 
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)4637 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
4638     const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
4639   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
4640   return jit_class_patches_.GetOrCreate(
4641       TypeReference(&dex_file, type_index),
4642       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4643 }
4644 
EmitAdrpPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register reg)4645 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
4646                                              vixl::aarch64::Register reg) {
4647   DCHECK(reg.IsX());
4648   SingleEmissionCheckScope guard(GetVIXLAssembler());
4649   __ Bind(fixup_label);
4650   __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
4651 }
4652 
EmitAddPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4653 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
4654                                             vixl::aarch64::Register out,
4655                                             vixl::aarch64::Register base) {
4656   DCHECK(out.IsX());
4657   DCHECK(base.IsX());
4658   SingleEmissionCheckScope guard(GetVIXLAssembler());
4659   __ Bind(fixup_label);
4660   __ add(out, base, Operand(/* offset placeholder */ 0));
4661 }
4662 
EmitLdrOffsetPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4663 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
4664                                                   vixl::aarch64::Register out,
4665                                                   vixl::aarch64::Register base) {
4666   DCHECK(base.IsX());
4667   SingleEmissionCheckScope guard(GetVIXLAssembler());
4668   __ Bind(fixup_label);
4669   __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
4670 }
4671 
4672 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)4673 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
4674     const ArenaDeque<PcRelativePatchInfo>& infos,
4675     ArenaVector<linker::LinkerPatch>* linker_patches) {
4676   for (const PcRelativePatchInfo& info : infos) {
4677     linker_patches->push_back(Factory(info.label.GetLocation(),
4678                                       info.target_dex_file,
4679                                       info.pc_insn_label->GetLocation(),
4680                                       info.offset_or_index));
4681   }
4682 }
4683 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)4684 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
4685   DCHECK(linker_patches->empty());
4686   size_t size =
4687       boot_image_method_patches_.size() +
4688       method_bss_entry_patches_.size() +
4689       boot_image_type_patches_.size() +
4690       type_bss_entry_patches_.size() +
4691       boot_image_string_patches_.size() +
4692       string_bss_entry_patches_.size() +
4693       baker_read_barrier_patches_.size();
4694   linker_patches->reserve(size);
4695   if (GetCompilerOptions().IsBootImage()) {
4696     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
4697         boot_image_method_patches_, linker_patches);
4698     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
4699         boot_image_type_patches_, linker_patches);
4700     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
4701         boot_image_string_patches_, linker_patches);
4702   } else {
4703     DCHECK(boot_image_method_patches_.empty());
4704     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
4705         boot_image_type_patches_, linker_patches);
4706     EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
4707         boot_image_string_patches_, linker_patches);
4708   }
4709   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
4710       method_bss_entry_patches_, linker_patches);
4711   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
4712       type_bss_entry_patches_, linker_patches);
4713   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
4714       string_bss_entry_patches_, linker_patches);
4715   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
4716     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
4717         info.label.GetLocation(), info.custom_data));
4718   }
4719   DCHECK_EQ(size, linker_patches->size());
4720 }
4721 
DeduplicateUint32Literal(uint32_t value)4722 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
4723   return uint32_literals_.GetOrCreate(
4724       value,
4725       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
4726 }
4727 
DeduplicateUint64Literal(uint64_t value)4728 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
4729   return uint64_literals_.GetOrCreate(
4730       value,
4731       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
4732 }
4733 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4734 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4735   // Explicit clinit checks triggered by static invokes must have been pruned by
4736   // art::PrepareForRegisterAllocation.
4737   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4738 
4739   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4740     codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4741     return;
4742   }
4743 
4744   {
4745     // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
4746     // are no pools emitted.
4747     EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4748     LocationSummary* locations = invoke->GetLocations();
4749     codegen_->GenerateStaticOrDirectCall(
4750         invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
4751   }
4752 
4753   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4754 }
4755 
VisitInvokeVirtual(HInvokeVirtual * invoke)4756 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4757   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4758     codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4759     return;
4760   }
4761 
4762   {
4763     // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
4764     // are no pools emitted.
4765     EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4766     codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
4767     DCHECK(!codegen_->IsLeafMethod());
4768   }
4769 
4770   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4771 }
4772 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)4773 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
4774     HLoadClass::LoadKind desired_class_load_kind) {
4775   switch (desired_class_load_kind) {
4776     case HLoadClass::LoadKind::kInvalid:
4777       LOG(FATAL) << "UNREACHABLE";
4778       UNREACHABLE();
4779     case HLoadClass::LoadKind::kReferrersClass:
4780       break;
4781     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
4782     case HLoadClass::LoadKind::kBootImageClassTable:
4783     case HLoadClass::LoadKind::kBssEntry:
4784       DCHECK(!Runtime::Current()->UseJitCompilation());
4785       break;
4786     case HLoadClass::LoadKind::kJitTableAddress:
4787       DCHECK(Runtime::Current()->UseJitCompilation());
4788       break;
4789     case HLoadClass::LoadKind::kBootImageAddress:
4790     case HLoadClass::LoadKind::kRuntimeCall:
4791       break;
4792   }
4793   return desired_class_load_kind;
4794 }
4795 
VisitLoadClass(HLoadClass * cls)4796 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
4797   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4798   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
4799     InvokeRuntimeCallingConvention calling_convention;
4800     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
4801         cls,
4802         LocationFrom(calling_convention.GetRegisterAt(0)),
4803         LocationFrom(vixl::aarch64::x0));
4804     DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
4805     return;
4806   }
4807   DCHECK(!cls->NeedsAccessCheck());
4808 
4809   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
4810   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
4811       ? LocationSummary::kCallOnSlowPath
4812       : LocationSummary::kNoCall;
4813   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
4814   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
4815     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4816   }
4817 
4818   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
4819     locations->SetInAt(0, Location::RequiresRegister());
4820   }
4821   locations->SetOut(Location::RequiresRegister());
4822   if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
4823     if (!kUseReadBarrier || kUseBakerReadBarrier) {
4824       // Rely on the type resolution or initialization and marking to save everything we need.
4825       RegisterSet caller_saves = RegisterSet::Empty();
4826       InvokeRuntimeCallingConvention calling_convention;
4827       caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
4828       DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
4829                 RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
4830                              DataType::Type::kReference).GetCode());
4831       locations->SetCustomSlowPathCallerSaves(caller_saves);
4832     } else {
4833       // For non-Baker read barrier we have a temp-clobbering call.
4834     }
4835   }
4836 }
4837 
4838 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
4839 // move.
VisitLoadClass(HLoadClass * cls)4840 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
4841   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4842   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
4843     codegen_->GenerateLoadClassRuntimeCall(cls);
4844     codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4845     return;
4846   }
4847   DCHECK(!cls->NeedsAccessCheck());
4848 
4849   Location out_loc = cls->GetLocations()->Out();
4850   Register out = OutputRegister(cls);
4851 
4852   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
4853       ? kWithoutReadBarrier
4854       : kCompilerReadBarrierOption;
4855   bool generate_null_check = false;
4856   switch (load_kind) {
4857     case HLoadClass::LoadKind::kReferrersClass: {
4858       DCHECK(!cls->CanCallRuntime());
4859       DCHECK(!cls->MustGenerateClinitCheck());
4860       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
4861       Register current_method = InputRegisterAt(cls, 0);
4862       GenerateGcRootFieldLoad(cls,
4863                               out_loc,
4864                               current_method,
4865                               ArtMethod::DeclaringClassOffset().Int32Value(),
4866                               /* fixup_label */ nullptr,
4867                               read_barrier_option);
4868       break;
4869     }
4870     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
4871       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4872       // Add ADRP with its PC-relative type patch.
4873       const DexFile& dex_file = cls->GetDexFile();
4874       dex::TypeIndex type_index = cls->GetTypeIndex();
4875       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
4876       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4877       // Add ADD with its PC-relative type patch.
4878       vixl::aarch64::Label* add_label =
4879           codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
4880       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
4881       break;
4882     }
4883     case HLoadClass::LoadKind::kBootImageAddress: {
4884       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4885       uint32_t address = dchecked_integral_cast<uint32_t>(
4886           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
4887       DCHECK_NE(address, 0u);
4888       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
4889       break;
4890     }
4891     case HLoadClass::LoadKind::kBootImageClassTable: {
4892       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
4893       // Add ADRP with its PC-relative type patch.
4894       const DexFile& dex_file = cls->GetDexFile();
4895       dex::TypeIndex type_index = cls->GetTypeIndex();
4896       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
4897       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4898       // Add LDR with its PC-relative type patch.
4899       vixl::aarch64::Label* ldr_label =
4900           codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
4901       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
4902       // Extract the reference from the slot data, i.e. clear the hash bits.
4903       int32_t masked_hash = ClassTable::TableSlot::MaskHash(
4904           ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index)));
4905       if (masked_hash != 0) {
4906         __ Sub(out.W(), out.W(), Operand(masked_hash));
4907       }
4908       break;
4909     }
4910     case HLoadClass::LoadKind::kBssEntry: {
4911       // Add ADRP with its PC-relative Class .bss entry patch.
4912       const DexFile& dex_file = cls->GetDexFile();
4913       dex::TypeIndex type_index = cls->GetTypeIndex();
4914       vixl::aarch64::Register temp = XRegisterFrom(out_loc);
4915       vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
4916       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
4917       // Add LDR with its PC-relative Class patch.
4918       vixl::aarch64::Label* ldr_label =
4919           codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
4920       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
4921       GenerateGcRootFieldLoad(cls,
4922                               out_loc,
4923                               temp,
4924                               /* offset placeholder */ 0u,
4925                               ldr_label,
4926                               read_barrier_option);
4927       generate_null_check = true;
4928       break;
4929     }
4930     case HLoadClass::LoadKind::kJitTableAddress: {
4931       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
4932                                                        cls->GetTypeIndex(),
4933                                                        cls->GetClass()));
4934       GenerateGcRootFieldLoad(cls,
4935                               out_loc,
4936                               out.X(),
4937                               /* offset */ 0,
4938                               /* fixup_label */ nullptr,
4939                               read_barrier_option);
4940       break;
4941     }
4942     case HLoadClass::LoadKind::kRuntimeCall:
4943     case HLoadClass::LoadKind::kInvalid:
4944       LOG(FATAL) << "UNREACHABLE";
4945       UNREACHABLE();
4946   }
4947 
4948   bool do_clinit = cls->MustGenerateClinitCheck();
4949   if (generate_null_check || do_clinit) {
4950     DCHECK(cls->CanCallRuntime());
4951     SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(
4952         cls, cls, cls->GetDexPc(), do_clinit);
4953     codegen_->AddSlowPath(slow_path);
4954     if (generate_null_check) {
4955       __ Cbz(out, slow_path->GetEntryLabel());
4956     }
4957     if (cls->MustGenerateClinitCheck()) {
4958       GenerateClassInitializationCheck(slow_path, out);
4959     } else {
4960       __ Bind(slow_path->GetExitLabel());
4961     }
4962     codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4963   }
4964 }
4965 
GetExceptionTlsAddress()4966 static MemOperand GetExceptionTlsAddress() {
4967   return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
4968 }
4969 
VisitLoadException(HLoadException * load)4970 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
4971   LocationSummary* locations =
4972       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
4973   locations->SetOut(Location::RequiresRegister());
4974 }
4975 
VisitLoadException(HLoadException * instruction)4976 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
4977   __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
4978 }
4979 
VisitClearException(HClearException * clear)4980 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
4981   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
4982 }
4983 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)4984 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
4985   __ Str(wzr, GetExceptionTlsAddress());
4986 }
4987 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)4988 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
4989     HLoadString::LoadKind desired_string_load_kind) {
4990   switch (desired_string_load_kind) {
4991     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
4992     case HLoadString::LoadKind::kBootImageInternTable:
4993     case HLoadString::LoadKind::kBssEntry:
4994       DCHECK(!Runtime::Current()->UseJitCompilation());
4995       break;
4996     case HLoadString::LoadKind::kJitTableAddress:
4997       DCHECK(Runtime::Current()->UseJitCompilation());
4998       break;
4999     case HLoadString::LoadKind::kBootImageAddress:
5000     case HLoadString::LoadKind::kRuntimeCall:
5001       break;
5002   }
5003   return desired_string_load_kind;
5004 }
5005 
VisitLoadString(HLoadString * load)5006 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
5007   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5008   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
5009   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
5010     InvokeRuntimeCallingConvention calling_convention;
5011     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
5012   } else {
5013     locations->SetOut(Location::RequiresRegister());
5014     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5015       if (!kUseReadBarrier || kUseBakerReadBarrier) {
5016         // Rely on the pResolveString and marking to save everything we need.
5017         RegisterSet caller_saves = RegisterSet::Empty();
5018         InvokeRuntimeCallingConvention calling_convention;
5019         caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
5020         DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
5021                   RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
5022                                DataType::Type::kReference).GetCode());
5023         locations->SetCustomSlowPathCallerSaves(caller_saves);
5024       } else {
5025         // For non-Baker read barrier we have a temp-clobbering call.
5026       }
5027     }
5028   }
5029 }
5030 
5031 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5032 // move.
VisitLoadString(HLoadString * load)5033 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5034   Register out = OutputRegister(load);
5035   Location out_loc = load->GetLocations()->Out();
5036 
5037   switch (load->GetLoadKind()) {
5038     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5039       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5040       // Add ADRP with its PC-relative String patch.
5041       const DexFile& dex_file = load->GetDexFile();
5042       const dex::StringIndex string_index = load->GetStringIndex();
5043       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
5044       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5045       // Add ADD with its PC-relative String patch.
5046       vixl::aarch64::Label* add_label =
5047           codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
5048       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5049       return;
5050     }
5051     case HLoadString::LoadKind::kBootImageAddress: {
5052       uint32_t address = dchecked_integral_cast<uint32_t>(
5053           reinterpret_cast<uintptr_t>(load->GetString().Get()));
5054       DCHECK_NE(address, 0u);
5055       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5056       return;
5057     }
5058     case HLoadString::LoadKind::kBootImageInternTable: {
5059       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5060       // Add ADRP with its PC-relative String patch.
5061       const DexFile& dex_file = load->GetDexFile();
5062       const dex::StringIndex string_index = load->GetStringIndex();
5063       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
5064       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5065       // Add LDR with its PC-relative String patch.
5066       vixl::aarch64::Label* ldr_label =
5067           codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
5068       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
5069       return;
5070     }
5071     case HLoadString::LoadKind::kBssEntry: {
5072       // Add ADRP with its PC-relative String .bss entry patch.
5073       const DexFile& dex_file = load->GetDexFile();
5074       const dex::StringIndex string_index = load->GetStringIndex();
5075       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5076       Register temp = XRegisterFrom(out_loc);
5077       vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
5078       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5079       // Add LDR with its .bss entry String patch.
5080       vixl::aarch64::Label* ldr_label =
5081           codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
5082       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
5083       GenerateGcRootFieldLoad(load,
5084                               out_loc,
5085                               temp,
5086                               /* offset placeholder */ 0u,
5087                               ldr_label,
5088                               kCompilerReadBarrierOption);
5089       SlowPathCodeARM64* slow_path =
5090           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
5091       codegen_->AddSlowPath(slow_path);
5092       __ Cbz(out.X(), slow_path->GetEntryLabel());
5093       __ Bind(slow_path->GetExitLabel());
5094       codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5095       return;
5096     }
5097     case HLoadString::LoadKind::kJitTableAddress: {
5098       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
5099                                                         load->GetStringIndex(),
5100                                                         load->GetString()));
5101       GenerateGcRootFieldLoad(load,
5102                               out_loc,
5103                               out.X(),
5104                               /* offset */ 0,
5105                               /* fixup_label */ nullptr,
5106                               kCompilerReadBarrierOption);
5107       return;
5108     }
5109     default:
5110       break;
5111   }
5112 
5113   // TODO: Re-add the compiler code to do string dex cache lookup again.
5114   InvokeRuntimeCallingConvention calling_convention;
5115   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
5116   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
5117   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
5118   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5119   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5120 }
5121 
VisitLongConstant(HLongConstant * constant)5122 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
5123   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
5124   locations->SetOut(Location::ConstantLocation(constant));
5125 }
5126 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)5127 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
5128   // Will be generated at use site.
5129 }
5130 
VisitMonitorOperation(HMonitorOperation * instruction)5131 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5132   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5133       instruction, LocationSummary::kCallOnMainOnly);
5134   InvokeRuntimeCallingConvention calling_convention;
5135   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5136 }
5137 
VisitMonitorOperation(HMonitorOperation * instruction)5138 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5139   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
5140                           instruction,
5141                           instruction->GetDexPc());
5142   if (instruction->IsEnter()) {
5143     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
5144   } else {
5145     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
5146   }
5147   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5148 }
5149 
VisitMul(HMul * mul)5150 void LocationsBuilderARM64::VisitMul(HMul* mul) {
5151   LocationSummary* locations =
5152       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
5153   switch (mul->GetResultType()) {
5154     case DataType::Type::kInt32:
5155     case DataType::Type::kInt64:
5156       locations->SetInAt(0, Location::RequiresRegister());
5157       locations->SetInAt(1, Location::RequiresRegister());
5158       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5159       break;
5160 
5161     case DataType::Type::kFloat32:
5162     case DataType::Type::kFloat64:
5163       locations->SetInAt(0, Location::RequiresFpuRegister());
5164       locations->SetInAt(1, Location::RequiresFpuRegister());
5165       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5166       break;
5167 
5168     default:
5169       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5170   }
5171 }
5172 
VisitMul(HMul * mul)5173 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
5174   switch (mul->GetResultType()) {
5175     case DataType::Type::kInt32:
5176     case DataType::Type::kInt64:
5177       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
5178       break;
5179 
5180     case DataType::Type::kFloat32:
5181     case DataType::Type::kFloat64:
5182       __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
5183       break;
5184 
5185     default:
5186       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5187   }
5188 }
5189 
VisitNeg(HNeg * neg)5190 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
5191   LocationSummary* locations =
5192       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
5193   switch (neg->GetResultType()) {
5194     case DataType::Type::kInt32:
5195     case DataType::Type::kInt64:
5196       locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
5197       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5198       break;
5199 
5200     case DataType::Type::kFloat32:
5201     case DataType::Type::kFloat64:
5202       locations->SetInAt(0, Location::RequiresFpuRegister());
5203       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5204       break;
5205 
5206     default:
5207       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5208   }
5209 }
5210 
VisitNeg(HNeg * neg)5211 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
5212   switch (neg->GetResultType()) {
5213     case DataType::Type::kInt32:
5214     case DataType::Type::kInt64:
5215       __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
5216       break;
5217 
5218     case DataType::Type::kFloat32:
5219     case DataType::Type::kFloat64:
5220       __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
5221       break;
5222 
5223     default:
5224       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5225   }
5226 }
5227 
VisitNewArray(HNewArray * instruction)5228 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5229   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5230       instruction, LocationSummary::kCallOnMainOnly);
5231   InvokeRuntimeCallingConvention calling_convention;
5232   locations->SetOut(LocationFrom(x0));
5233   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5234   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5235 }
5236 
VisitNewArray(HNewArray * instruction)5237 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5238   // Note: if heap poisoning is enabled, the entry point takes cares
5239   // of poisoning the reference.
5240   QuickEntrypointEnum entrypoint =
5241       CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
5242   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5243   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5244   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5245 }
5246 
VisitNewInstance(HNewInstance * instruction)5247 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5248   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5249       instruction, LocationSummary::kCallOnMainOnly);
5250   InvokeRuntimeCallingConvention calling_convention;
5251   if (instruction->IsStringAlloc()) {
5252     locations->AddTemp(LocationFrom(kArtMethodRegister));
5253   } else {
5254     locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5255   }
5256   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
5257 }
5258 
VisitNewInstance(HNewInstance * instruction)5259 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
5260   // Note: if heap poisoning is enabled, the entry point takes cares
5261   // of poisoning the reference.
5262   if (instruction->IsStringAlloc()) {
5263     // String is allocated through StringFactory. Call NewEmptyString entry point.
5264     Location temp = instruction->GetLocations()->GetTemp(0);
5265     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
5266     __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
5267     __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
5268 
5269     {
5270       // Ensure the pc position is recorded immediately after the `blr` instruction.
5271       ExactAssemblyScope eas(GetVIXLAssembler(),
5272                              kInstructionSize,
5273                              CodeBufferCheckScope::kExactSize);
5274       __ blr(lr);
5275       codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
5276     }
5277   } else {
5278     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5279     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5280   }
5281   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5282 }
5283 
VisitNot(HNot * instruction)5284 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
5285   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5286   locations->SetInAt(0, Location::RequiresRegister());
5287   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5288 }
5289 
VisitNot(HNot * instruction)5290 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
5291   switch (instruction->GetResultType()) {
5292     case DataType::Type::kInt32:
5293     case DataType::Type::kInt64:
5294       __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
5295       break;
5296 
5297     default:
5298       LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
5299   }
5300 }
5301 
VisitBooleanNot(HBooleanNot * instruction)5302 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
5303   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5304   locations->SetInAt(0, Location::RequiresRegister());
5305   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5306 }
5307 
VisitBooleanNot(HBooleanNot * instruction)5308 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
5309   __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
5310 }
5311 
VisitNullCheck(HNullCheck * instruction)5312 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
5313   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5314   locations->SetInAt(0, Location::RequiresRegister());
5315 }
5316 
GenerateImplicitNullCheck(HNullCheck * instruction)5317 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5318   if (CanMoveNullCheckToUser(instruction)) {
5319     return;
5320   }
5321   {
5322     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
5323     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5324     Location obj = instruction->GetLocations()->InAt(0);
5325     __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
5326     RecordPcInfo(instruction, instruction->GetDexPc());
5327   }
5328 }
5329 
GenerateExplicitNullCheck(HNullCheck * instruction)5330 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5331   SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction);
5332   AddSlowPath(slow_path);
5333 
5334   LocationSummary* locations = instruction->GetLocations();
5335   Location obj = locations->InAt(0);
5336 
5337   __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
5338 }
5339 
VisitNullCheck(HNullCheck * instruction)5340 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
5341   codegen_->GenerateNullCheck(instruction);
5342 }
5343 
VisitOr(HOr * instruction)5344 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
5345   HandleBinaryOp(instruction);
5346 }
5347 
VisitOr(HOr * instruction)5348 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
5349   HandleBinaryOp(instruction);
5350 }
5351 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5352 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5353   LOG(FATAL) << "Unreachable";
5354 }
5355 
VisitParallelMove(HParallelMove * instruction)5356 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
5357   if (instruction->GetNext()->IsSuspendCheck() &&
5358       instruction->GetBlock()->GetLoopInformation() != nullptr) {
5359     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5360     // The back edge will generate the suspend check.
5361     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5362   }
5363 
5364   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5365 }
5366 
VisitParameterValue(HParameterValue * instruction)5367 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
5368   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5369   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5370   if (location.IsStackSlot()) {
5371     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5372   } else if (location.IsDoubleStackSlot()) {
5373     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5374   }
5375   locations->SetOut(location);
5376 }
5377 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5378 void InstructionCodeGeneratorARM64::VisitParameterValue(
5379     HParameterValue* instruction ATTRIBUTE_UNUSED) {
5380   // Nothing to do, the parameter is already at its location.
5381 }
5382 
VisitCurrentMethod(HCurrentMethod * instruction)5383 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
5384   LocationSummary* locations =
5385       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5386   locations->SetOut(LocationFrom(kArtMethodRegister));
5387 }
5388 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5389 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
5390     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5391   // Nothing to do, the method is already at its location.
5392 }
5393 
VisitPhi(HPhi * instruction)5394 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
5395   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5396   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5397     locations->SetInAt(i, Location::Any());
5398   }
5399   locations->SetOut(Location::Any());
5400 }
5401 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5402 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5403   LOG(FATAL) << "Unreachable";
5404 }
5405 
VisitRem(HRem * rem)5406 void LocationsBuilderARM64::VisitRem(HRem* rem) {
5407   DataType::Type type = rem->GetResultType();
5408   LocationSummary::CallKind call_kind =
5409       DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
5410                                            : LocationSummary::kNoCall;
5411   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
5412 
5413   switch (type) {
5414     case DataType::Type::kInt32:
5415     case DataType::Type::kInt64:
5416       locations->SetInAt(0, Location::RequiresRegister());
5417       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
5418       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5419       break;
5420 
5421     case DataType::Type::kFloat32:
5422     case DataType::Type::kFloat64: {
5423       InvokeRuntimeCallingConvention calling_convention;
5424       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
5425       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
5426       locations->SetOut(calling_convention.GetReturnLocation(type));
5427 
5428       break;
5429     }
5430 
5431     default:
5432       LOG(FATAL) << "Unexpected rem type " << type;
5433   }
5434 }
5435 
VisitRem(HRem * rem)5436 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
5437   DataType::Type type = rem->GetResultType();
5438 
5439   switch (type) {
5440     case DataType::Type::kInt32:
5441     case DataType::Type::kInt64: {
5442       GenerateDivRemIntegral(rem);
5443       break;
5444     }
5445 
5446     case DataType::Type::kFloat32:
5447     case DataType::Type::kFloat64: {
5448       QuickEntrypointEnum entrypoint =
5449           (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
5450       codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
5451       if (type == DataType::Type::kFloat32) {
5452         CheckEntrypointTypes<kQuickFmodf, float, float, float>();
5453       } else {
5454         CheckEntrypointTypes<kQuickFmod, double, double, double>();
5455       }
5456       break;
5457     }
5458 
5459     default:
5460       LOG(FATAL) << "Unexpected rem type " << type;
5461       UNREACHABLE();
5462   }
5463 }
5464 
VisitConstructorFence(HConstructorFence * constructor_fence)5465 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
5466   constructor_fence->SetLocations(nullptr);
5467 }
5468 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)5469 void InstructionCodeGeneratorARM64::VisitConstructorFence(
5470     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
5471   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
5472 }
5473 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5474 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5475   memory_barrier->SetLocations(nullptr);
5476 }
5477 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5478 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5479   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
5480 }
5481 
VisitReturn(HReturn * instruction)5482 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
5483   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5484   DataType::Type return_type = instruction->InputAt(0)->GetType();
5485   locations->SetInAt(0, ARM64ReturnLocation(return_type));
5486 }
5487 
VisitReturn(HReturn * instruction ATTRIBUTE_UNUSED)5488 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) {
5489   codegen_->GenerateFrameExit();
5490 }
5491 
VisitReturnVoid(HReturnVoid * instruction)5492 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
5493   instruction->SetLocations(nullptr);
5494 }
5495 
VisitReturnVoid(HReturnVoid * instruction ATTRIBUTE_UNUSED)5496 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
5497   codegen_->GenerateFrameExit();
5498 }
5499 
VisitRor(HRor * ror)5500 void LocationsBuilderARM64::VisitRor(HRor* ror) {
5501   HandleBinaryOp(ror);
5502 }
5503 
VisitRor(HRor * ror)5504 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
5505   HandleBinaryOp(ror);
5506 }
5507 
VisitShl(HShl * shl)5508 void LocationsBuilderARM64::VisitShl(HShl* shl) {
5509   HandleShift(shl);
5510 }
5511 
VisitShl(HShl * shl)5512 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
5513   HandleShift(shl);
5514 }
5515 
VisitShr(HShr * shr)5516 void LocationsBuilderARM64::VisitShr(HShr* shr) {
5517   HandleShift(shr);
5518 }
5519 
VisitShr(HShr * shr)5520 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
5521   HandleShift(shr);
5522 }
5523 
VisitSub(HSub * instruction)5524 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
5525   HandleBinaryOp(instruction);
5526 }
5527 
VisitSub(HSub * instruction)5528 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
5529   HandleBinaryOp(instruction);
5530 }
5531 
VisitStaticFieldGet(HStaticFieldGet * instruction)5532 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5533   HandleFieldGet(instruction, instruction->GetFieldInfo());
5534 }
5535 
VisitStaticFieldGet(HStaticFieldGet * instruction)5536 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5537   HandleFieldGet(instruction, instruction->GetFieldInfo());
5538 }
5539 
VisitStaticFieldSet(HStaticFieldSet * instruction)5540 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5541   HandleFieldSet(instruction);
5542 }
5543 
VisitStaticFieldSet(HStaticFieldSet * instruction)5544 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5545   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5546 }
5547 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5548 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
5549     HUnresolvedInstanceFieldGet* instruction) {
5550   FieldAccessCallingConventionARM64 calling_convention;
5551   codegen_->CreateUnresolvedFieldLocationSummary(
5552       instruction, instruction->GetFieldType(), calling_convention);
5553 }
5554 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5555 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
5556     HUnresolvedInstanceFieldGet* instruction) {
5557   FieldAccessCallingConventionARM64 calling_convention;
5558   codegen_->GenerateUnresolvedFieldAccess(instruction,
5559                                           instruction->GetFieldType(),
5560                                           instruction->GetFieldIndex(),
5561                                           instruction->GetDexPc(),
5562                                           calling_convention);
5563 }
5564 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5565 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
5566     HUnresolvedInstanceFieldSet* instruction) {
5567   FieldAccessCallingConventionARM64 calling_convention;
5568   codegen_->CreateUnresolvedFieldLocationSummary(
5569       instruction, instruction->GetFieldType(), calling_convention);
5570 }
5571 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5572 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
5573     HUnresolvedInstanceFieldSet* instruction) {
5574   FieldAccessCallingConventionARM64 calling_convention;
5575   codegen_->GenerateUnresolvedFieldAccess(instruction,
5576                                           instruction->GetFieldType(),
5577                                           instruction->GetFieldIndex(),
5578                                           instruction->GetDexPc(),
5579                                           calling_convention);
5580 }
5581 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5582 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
5583     HUnresolvedStaticFieldGet* instruction) {
5584   FieldAccessCallingConventionARM64 calling_convention;
5585   codegen_->CreateUnresolvedFieldLocationSummary(
5586       instruction, instruction->GetFieldType(), calling_convention);
5587 }
5588 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5589 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
5590     HUnresolvedStaticFieldGet* instruction) {
5591   FieldAccessCallingConventionARM64 calling_convention;
5592   codegen_->GenerateUnresolvedFieldAccess(instruction,
5593                                           instruction->GetFieldType(),
5594                                           instruction->GetFieldIndex(),
5595                                           instruction->GetDexPc(),
5596                                           calling_convention);
5597 }
5598 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5599 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
5600     HUnresolvedStaticFieldSet* instruction) {
5601   FieldAccessCallingConventionARM64 calling_convention;
5602   codegen_->CreateUnresolvedFieldLocationSummary(
5603       instruction, instruction->GetFieldType(), calling_convention);
5604 }
5605 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5606 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
5607     HUnresolvedStaticFieldSet* instruction) {
5608   FieldAccessCallingConventionARM64 calling_convention;
5609   codegen_->GenerateUnresolvedFieldAccess(instruction,
5610                                           instruction->GetFieldType(),
5611                                           instruction->GetFieldIndex(),
5612                                           instruction->GetDexPc(),
5613                                           calling_convention);
5614 }
5615 
VisitSuspendCheck(HSuspendCheck * instruction)5616 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5617   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5618       instruction, LocationSummary::kCallOnSlowPath);
5619   // In suspend check slow path, usually there are no caller-save registers at all.
5620   // If SIMD instructions are present, however, we force spilling all live SIMD
5621   // registers in full width (since the runtime only saves/restores lower part).
5622   locations->SetCustomSlowPathCallerSaves(
5623       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5624 }
5625 
VisitSuspendCheck(HSuspendCheck * instruction)5626 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5627   HBasicBlock* block = instruction->GetBlock();
5628   if (block->GetLoopInformation() != nullptr) {
5629     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5630     // The back edge will generate the suspend check.
5631     return;
5632   }
5633   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5634     // The goto will generate the suspend check.
5635     return;
5636   }
5637   GenerateSuspendCheck(instruction, nullptr);
5638   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5639 }
5640 
VisitThrow(HThrow * instruction)5641 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
5642   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5643       instruction, LocationSummary::kCallOnMainOnly);
5644   InvokeRuntimeCallingConvention calling_convention;
5645   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5646 }
5647 
VisitThrow(HThrow * instruction)5648 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
5649   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5650   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5651 }
5652 
VisitTypeConversion(HTypeConversion * conversion)5653 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
5654   LocationSummary* locations =
5655       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
5656   DataType::Type input_type = conversion->GetInputType();
5657   DataType::Type result_type = conversion->GetResultType();
5658   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
5659       << input_type << " -> " << result_type;
5660   if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
5661       (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
5662     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
5663   }
5664 
5665   if (DataType::IsFloatingPointType(input_type)) {
5666     locations->SetInAt(0, Location::RequiresFpuRegister());
5667   } else {
5668     locations->SetInAt(0, Location::RequiresRegister());
5669   }
5670 
5671   if (DataType::IsFloatingPointType(result_type)) {
5672     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5673   } else {
5674     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5675   }
5676 }
5677 
VisitTypeConversion(HTypeConversion * conversion)5678 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
5679   DataType::Type result_type = conversion->GetResultType();
5680   DataType::Type input_type = conversion->GetInputType();
5681 
5682   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
5683       << input_type << " -> " << result_type;
5684 
5685   if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
5686     int result_size = DataType::Size(result_type);
5687     int input_size = DataType::Size(input_type);
5688     int min_size = std::min(result_size, input_size);
5689     Register output = OutputRegister(conversion);
5690     Register source = InputRegisterAt(conversion, 0);
5691     if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) {
5692       // 'int' values are used directly as W registers, discarding the top
5693       // bits, so we don't need to sign-extend and can just perform a move.
5694       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
5695       // top 32 bits of the target register. We theoretically could leave those
5696       // bits unchanged, but we would have to make sure that no code uses a
5697       // 32bit input value as a 64bit value assuming that the top 32 bits are
5698       // zero.
5699       __ Mov(output.W(), source.W());
5700     } else if (DataType::IsUnsignedType(result_type) ||
5701                (DataType::IsUnsignedType(input_type) && input_size < result_size)) {
5702       __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte);
5703     } else {
5704       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
5705     }
5706   } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
5707     __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
5708   } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
5709     CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
5710     __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
5711   } else if (DataType::IsFloatingPointType(result_type) &&
5712              DataType::IsFloatingPointType(input_type)) {
5713     __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
5714   } else {
5715     LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
5716                 << " to " << result_type;
5717   }
5718 }
5719 
VisitUShr(HUShr * ushr)5720 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
5721   HandleShift(ushr);
5722 }
5723 
VisitUShr(HUShr * ushr)5724 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
5725   HandleShift(ushr);
5726 }
5727 
VisitXor(HXor * instruction)5728 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
5729   HandleBinaryOp(instruction);
5730 }
5731 
VisitXor(HXor * instruction)5732 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
5733   HandleBinaryOp(instruction);
5734 }
5735 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)5736 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5737   // Nothing to do, this should be removed during prepare for register allocator.
5738   LOG(FATAL) << "Unreachable";
5739 }
5740 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)5741 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5742   // Nothing to do, this should be removed during prepare for register allocator.
5743   LOG(FATAL) << "Unreachable";
5744 }
5745 
5746 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)5747 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5748   LocationSummary* locations =
5749       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
5750   locations->SetInAt(0, Location::RequiresRegister());
5751 }
5752 
VisitPackedSwitch(HPackedSwitch * switch_instr)5753 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5754   int32_t lower_bound = switch_instr->GetStartValue();
5755   uint32_t num_entries = switch_instr->GetNumEntries();
5756   Register value_reg = InputRegisterAt(switch_instr, 0);
5757   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
5758 
5759   // Roughly set 16 as max average assemblies generated per HIR in a graph.
5760   static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
5761   // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
5762   // make sure we don't emit it if the target may run out of range.
5763   // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
5764   // ranges and emit the tables only as required.
5765   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
5766 
5767   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
5768       // Current instruction id is an upper bound of the number of HIRs in the graph.
5769       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
5770     // Create a series of compare/jumps.
5771     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5772     Register temp = temps.AcquireW();
5773     __ Subs(temp, value_reg, Operand(lower_bound));
5774 
5775     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
5776     // Jump to successors[0] if value == lower_bound.
5777     __ B(eq, codegen_->GetLabelOf(successors[0]));
5778     int32_t last_index = 0;
5779     for (; num_entries - last_index > 2; last_index += 2) {
5780       __ Subs(temp, temp, Operand(2));
5781       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
5782       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
5783       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
5784       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
5785     }
5786     if (num_entries - last_index == 2) {
5787       // The last missing case_value.
5788       __ Cmp(temp, Operand(1));
5789       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
5790     }
5791 
5792     // And the default for any other value.
5793     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
5794       __ B(codegen_->GetLabelOf(default_block));
5795     }
5796   } else {
5797     JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
5798 
5799     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5800 
5801     // Below instructions should use at most one blocked register. Since there are two blocked
5802     // registers, we are free to block one.
5803     Register temp_w = temps.AcquireW();
5804     Register index;
5805     // Remove the bias.
5806     if (lower_bound != 0) {
5807       index = temp_w;
5808       __ Sub(index, value_reg, Operand(lower_bound));
5809     } else {
5810       index = value_reg;
5811     }
5812 
5813     // Jump to default block if index is out of the range.
5814     __ Cmp(index, Operand(num_entries));
5815     __ B(hs, codegen_->GetLabelOf(default_block));
5816 
5817     // In current VIXL implementation, it won't require any blocked registers to encode the
5818     // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
5819     // register pressure.
5820     Register table_base = temps.AcquireX();
5821     // Load jump offset from the table.
5822     __ Adr(table_base, jump_table->GetTableStartLabel());
5823     Register jump_offset = temp_w;
5824     __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
5825 
5826     // Jump to target block by branching to table_base(pc related) + offset.
5827     Register target_address = table_base;
5828     __ Add(target_address, table_base, Operand(jump_offset, SXTW));
5829     __ Br(target_address);
5830   }
5831 }
5832 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)5833 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
5834     HInstruction* instruction,
5835     Location out,
5836     uint32_t offset,
5837     Location maybe_temp,
5838     ReadBarrierOption read_barrier_option) {
5839   DataType::Type type = DataType::Type::kReference;
5840   Register out_reg = RegisterFrom(out, type);
5841   if (read_barrier_option == kWithReadBarrier) {
5842     CHECK(kEmitCompilerReadBarrier);
5843     if (kUseBakerReadBarrier) {
5844       // Load with fast path based Baker's read barrier.
5845       // /* HeapReference<Object> */ out = *(out + offset)
5846       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5847                                                       out,
5848                                                       out_reg,
5849                                                       offset,
5850                                                       maybe_temp,
5851                                                       /* needs_null_check */ false,
5852                                                       /* use_load_acquire */ false);
5853     } else {
5854       // Load with slow path based read barrier.
5855       // Save the value of `out` into `maybe_temp` before overwriting it
5856       // in the following move operation, as we will need it for the
5857       // read barrier below.
5858       Register temp_reg = RegisterFrom(maybe_temp, type);
5859       __ Mov(temp_reg, out_reg);
5860       // /* HeapReference<Object> */ out = *(out + offset)
5861       __ Ldr(out_reg, HeapOperand(out_reg, offset));
5862       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
5863     }
5864   } else {
5865     // Plain load with no read barrier.
5866     // /* HeapReference<Object> */ out = *(out + offset)
5867     __ Ldr(out_reg, HeapOperand(out_reg, offset));
5868     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5869   }
5870 }
5871 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)5872 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
5873     HInstruction* instruction,
5874     Location out,
5875     Location obj,
5876     uint32_t offset,
5877     Location maybe_temp,
5878     ReadBarrierOption read_barrier_option) {
5879   DataType::Type type = DataType::Type::kReference;
5880   Register out_reg = RegisterFrom(out, type);
5881   Register obj_reg = RegisterFrom(obj, type);
5882   if (read_barrier_option == kWithReadBarrier) {
5883     CHECK(kEmitCompilerReadBarrier);
5884     if (kUseBakerReadBarrier) {
5885       // Load with fast path based Baker's read barrier.
5886       // /* HeapReference<Object> */ out = *(obj + offset)
5887       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5888                                                       out,
5889                                                       obj_reg,
5890                                                       offset,
5891                                                       maybe_temp,
5892                                                       /* needs_null_check */ false,
5893                                                       /* use_load_acquire */ false);
5894     } else {
5895       // Load with slow path based read barrier.
5896       // /* HeapReference<Object> */ out = *(obj + offset)
5897       __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5898       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
5899     }
5900   } else {
5901     // Plain load with no read barrier.
5902     // /* HeapReference<Object> */ out = *(obj + offset)
5903     __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5904     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5905   }
5906 }
5907 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,Register obj,uint32_t offset,vixl::aarch64::Label * fixup_label,ReadBarrierOption read_barrier_option)5908 void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
5909     HInstruction* instruction,
5910     Location root,
5911     Register obj,
5912     uint32_t offset,
5913     vixl::aarch64::Label* fixup_label,
5914     ReadBarrierOption read_barrier_option) {
5915   DCHECK(fixup_label == nullptr || offset == 0u);
5916   Register root_reg = RegisterFrom(root, DataType::Type::kReference);
5917   if (read_barrier_option == kWithReadBarrier) {
5918     DCHECK(kEmitCompilerReadBarrier);
5919     if (kUseBakerReadBarrier) {
5920       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
5921       // Baker's read barrier are used.
5922       if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
5923           !Runtime::Current()->UseJitCompilation()) {
5924         // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
5925         // the Marking Register) to decide whether we need to enter
5926         // the slow path to mark the GC root.
5927         //
5928         // We use link-time generated thunks for the slow path. That thunk
5929         // checks the reference and jumps to the entrypoint if needed.
5930         //
5931         //     lr = &return_address;
5932         //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
5933         //     if (mr) {  // Thread::Current()->GetIsGcMarking()
5934         //       goto gc_root_thunk<root_reg>(lr)
5935         //     }
5936         //   return_address:
5937 
5938         UseScratchRegisterScope temps(GetVIXLAssembler());
5939         DCHECK(temps.IsAvailable(ip0));
5940         DCHECK(temps.IsAvailable(ip1));
5941         temps.Exclude(ip0, ip1);
5942         uint32_t custom_data =
5943             linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
5944         vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
5945 
5946         EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
5947         vixl::aarch64::Label return_address;
5948         __ adr(lr, &return_address);
5949         if (fixup_label != nullptr) {
5950           __ Bind(fixup_label);
5951         }
5952         static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
5953                       "GC root LDR must be 2 instruction (8B) before the return address label.");
5954         __ ldr(root_reg, MemOperand(obj.X(), offset));
5955         __ Bind(cbnz_label);
5956         __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
5957         __ Bind(&return_address);
5958       } else {
5959         // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
5960         // the Marking Register) to decide whether we need to enter
5961         // the slow path to mark the GC root.
5962         //
5963         //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
5964         //   if (mr) {  // Thread::Current()->GetIsGcMarking()
5965         //     // Slow path.
5966         //     entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
5967         //     root = entrypoint(root);  // root = ReadBarrier::Mark(root);  // Entry point call.
5968         //   }
5969 
5970         // Slow path marking the GC root `root`. The entrypoint will
5971         // be loaded by the slow path code.
5972         SlowPathCodeARM64* slow_path =
5973             new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root);
5974         codegen_->AddSlowPath(slow_path);
5975 
5976         // /* GcRoot<mirror::Object> */ root = *(obj + offset)
5977         if (fixup_label == nullptr) {
5978           __ Ldr(root_reg, MemOperand(obj, offset));
5979         } else {
5980           codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
5981         }
5982         static_assert(
5983             sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
5984             "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
5985             "have different sizes.");
5986         static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
5987                       "art::mirror::CompressedReference<mirror::Object> and int32_t "
5988                       "have different sizes.");
5989 
5990         __ Cbnz(mr, slow_path->GetEntryLabel());
5991         __ Bind(slow_path->GetExitLabel());
5992       }
5993     } else {
5994       // GC root loaded through a slow path for read barriers other
5995       // than Baker's.
5996       // /* GcRoot<mirror::Object>* */ root = obj + offset
5997       if (fixup_label == nullptr) {
5998         __ Add(root_reg.X(), obj.X(), offset);
5999       } else {
6000         codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
6001       }
6002       // /* mirror::Object* */ root = root->Read()
6003       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6004     }
6005   } else {
6006     // Plain GC root load with no read barrier.
6007     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6008     if (fixup_label == nullptr) {
6009       __ Ldr(root_reg, MemOperand(obj, offset));
6010     } else {
6011       codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
6012     }
6013     // Note that GC roots are not affected by heap poisoning, thus we
6014     // do not have to unpoison `root_reg` here.
6015   }
6016   codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
6017 }
6018 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check,bool use_load_acquire)6019 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6020                                                                Location ref,
6021                                                                Register obj,
6022                                                                uint32_t offset,
6023                                                                Location maybe_temp,
6024                                                                bool needs_null_check,
6025                                                                bool use_load_acquire) {
6026   DCHECK(kEmitCompilerReadBarrier);
6027   DCHECK(kUseBakerReadBarrier);
6028 
6029   if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
6030       !use_load_acquire &&
6031       !Runtime::Current()->UseJitCompilation()) {
6032     // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6033     // Marking Register) to decide whether we need to enter the slow
6034     // path to mark the reference. Then, in the slow path, check the
6035     // gray bit in the lock word of the reference's holder (`obj`) to
6036     // decide whether to mark `ref` or not.
6037     //
6038     // We use link-time generated thunks for the slow path. That thunk checks
6039     // the holder and jumps to the entrypoint if needed. If the holder is not
6040     // gray, it creates a fake dependency and returns to the LDR instruction.
6041     //
6042     //     lr = &gray_return_address;
6043     //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6044     //       goto field_thunk<holder_reg, base_reg>(lr)
6045     //     }
6046     //   not_gray_return_address:
6047     //     // Original reference load. If the offset is too large to fit
6048     //     // into LDR, we use an adjusted base register here.
6049     //     HeapReference<mirror::Object> reference = *(obj+offset);
6050     //   gray_return_address:
6051 
6052     DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
6053     Register base = obj;
6054     if (offset >= kReferenceLoadMinFarOffset) {
6055       DCHECK(maybe_temp.IsRegister());
6056       base = WRegisterFrom(maybe_temp);
6057       static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
6058       __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
6059       offset &= (kReferenceLoadMinFarOffset - 1u);
6060     }
6061     UseScratchRegisterScope temps(GetVIXLAssembler());
6062     DCHECK(temps.IsAvailable(ip0));
6063     DCHECK(temps.IsAvailable(ip1));
6064     temps.Exclude(ip0, ip1);
6065     uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
6066         base.GetCode(),
6067         obj.GetCode());
6068     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
6069 
6070     {
6071       EmissionCheckScope guard(GetVIXLAssembler(),
6072                                (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6073       vixl::aarch64::Label return_address;
6074       __ adr(lr, &return_address);
6075       __ Bind(cbnz_label);
6076       __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
6077       static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6078                     "Field LDR must be 1 instruction (4B) before the return address label; "
6079                     " 2 instructions (8B) for heap poisoning.");
6080       Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6081       __ ldr(ref_reg, MemOperand(base.X(), offset));
6082       if (needs_null_check) {
6083         MaybeRecordImplicitNullCheck(instruction);
6084       }
6085       GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6086       __ Bind(&return_address);
6087     }
6088     MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
6089     return;
6090   }
6091 
6092   // /* HeapReference<Object> */ ref = *(obj + offset)
6093   Register temp = WRegisterFrom(maybe_temp);
6094   Location no_index = Location::NoLocation();
6095   size_t no_scale_factor = 0u;
6096   GenerateReferenceLoadWithBakerReadBarrier(instruction,
6097                                             ref,
6098                                             obj,
6099                                             offset,
6100                                             no_index,
6101                                             no_scale_factor,
6102                                             temp,
6103                                             needs_null_check,
6104                                             use_load_acquire);
6105 }
6106 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,Register temp,bool needs_null_check)6107 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6108                                                                Location ref,
6109                                                                Register obj,
6110                                                                uint32_t data_offset,
6111                                                                Location index,
6112                                                                Register temp,
6113                                                                bool needs_null_check) {
6114   DCHECK(kEmitCompilerReadBarrier);
6115   DCHECK(kUseBakerReadBarrier);
6116 
6117   static_assert(
6118       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6119       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6120   size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
6121 
6122   if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
6123       !Runtime::Current()->UseJitCompilation()) {
6124     // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6125     // Marking Register) to decide whether we need to enter the slow
6126     // path to mark the reference. Then, in the slow path, check the
6127     // gray bit in the lock word of the reference's holder (`obj`) to
6128     // decide whether to mark `ref` or not.
6129     //
6130     // We use link-time generated thunks for the slow path. That thunk checks
6131     // the holder and jumps to the entrypoint if needed. If the holder is not
6132     // gray, it creates a fake dependency and returns to the LDR instruction.
6133     //
6134     //     lr = &gray_return_address;
6135     //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6136     //       goto array_thunk<base_reg>(lr)
6137     //     }
6138     //   not_gray_return_address:
6139     //     // Original reference load. If the offset is too large to fit
6140     //     // into LDR, we use an adjusted base register here.
6141     //     HeapReference<mirror::Object> reference = data[index];
6142     //   gray_return_address:
6143 
6144     DCHECK(index.IsValid());
6145     Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
6146     Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6147 
6148     UseScratchRegisterScope temps(GetVIXLAssembler());
6149     DCHECK(temps.IsAvailable(ip0));
6150     DCHECK(temps.IsAvailable(ip1));
6151     temps.Exclude(ip0, ip1);
6152     uint32_t custom_data =
6153         linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
6154     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
6155 
6156     __ Add(temp.X(), obj.X(), Operand(data_offset));
6157     {
6158       EmissionCheckScope guard(GetVIXLAssembler(),
6159                                (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6160       vixl::aarch64::Label return_address;
6161       __ adr(lr, &return_address);
6162       __ Bind(cbnz_label);
6163       __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
6164       static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6165                     "Array LDR must be 1 instruction (4B) before the return address label; "
6166                     " 2 instructions (8B) for heap poisoning.");
6167       __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
6168       DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
6169       GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6170       __ Bind(&return_address);
6171     }
6172     MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
6173     return;
6174   }
6175 
6176   // Array cells are never volatile variables, therefore array loads
6177   // never use Load-Acquire instructions on ARM64.
6178   const bool use_load_acquire = false;
6179 
6180   // /* HeapReference<Object> */ ref =
6181   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6182   GenerateReferenceLoadWithBakerReadBarrier(instruction,
6183                                             ref,
6184                                             obj,
6185                                             data_offset,
6186                                             index,
6187                                             scale_factor,
6188                                             temp,
6189                                             needs_null_check,
6190                                             use_load_acquire);
6191 }
6192 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,Register temp,bool needs_null_check,bool use_load_acquire)6193 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6194                                                                    Location ref,
6195                                                                    Register obj,
6196                                                                    uint32_t offset,
6197                                                                    Location index,
6198                                                                    size_t scale_factor,
6199                                                                    Register temp,
6200                                                                    bool needs_null_check,
6201                                                                    bool use_load_acquire) {
6202   DCHECK(kEmitCompilerReadBarrier);
6203   DCHECK(kUseBakerReadBarrier);
6204   // If we are emitting an array load, we should not be using a
6205   // Load Acquire instruction.  In other words:
6206   // `instruction->IsArrayGet()` => `!use_load_acquire`.
6207   DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
6208 
6209   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6210   // Marking Register) to decide whether we need to enter the slow
6211   // path to mark the reference. Then, in the slow path, check the
6212   // gray bit in the lock word of the reference's holder (`obj`) to
6213   // decide whether to mark `ref` or not.
6214   //
6215   //   if (mr) {  // Thread::Current()->GetIsGcMarking()
6216   //     // Slow path.
6217   //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6218   //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
6219   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
6220   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
6221   //     if (is_gray) {
6222   //       entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
6223   //       ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
6224   //     }
6225   //   } else {
6226   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
6227   //   }
6228 
6229   // Slow path marking the object `ref` when the GC is marking. The
6230   // entrypoint will be loaded by the slow path code.
6231   SlowPathCodeARM64* slow_path =
6232       new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
6233           instruction,
6234           ref,
6235           obj,
6236           offset,
6237           index,
6238           scale_factor,
6239           needs_null_check,
6240           use_load_acquire,
6241           temp);
6242   AddSlowPath(slow_path);
6243 
6244   __ Cbnz(mr, slow_path->GetEntryLabel());
6245   // Fast path: the GC is not marking: just load the reference.
6246   GenerateRawReferenceLoad(
6247       instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
6248   __ Bind(slow_path->GetExitLabel());
6249   MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
6250 }
6251 
UpdateReferenceFieldWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,Location field_offset,Register temp,bool needs_null_check,bool use_load_acquire)6252 void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
6253                                                                   Location ref,
6254                                                                   Register obj,
6255                                                                   Location field_offset,
6256                                                                   Register temp,
6257                                                                   bool needs_null_check,
6258                                                                   bool use_load_acquire) {
6259   DCHECK(kEmitCompilerReadBarrier);
6260   DCHECK(kUseBakerReadBarrier);
6261   // If we are emitting an array load, we should not be using a
6262   // Load Acquire instruction.  In other words:
6263   // `instruction->IsArrayGet()` => `!use_load_acquire`.
6264   DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
6265 
6266   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6267   // Marking Register) to decide whether we need to enter the slow
6268   // path to update the reference field within `obj`. Then, in the
6269   // slow path, check the gray bit in the lock word of the reference's
6270   // holder (`obj`) to decide whether to mark `ref` and update the
6271   // field or not.
6272   //
6273   //   if (mr) {  // Thread::Current()->GetIsGcMarking()
6274   //     // Slow path.
6275   //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6276   //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
6277   //     HeapReference<mirror::Object> ref = *(obj + field_offset);  // Reference load.
6278   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
6279   //     if (is_gray) {
6280   //       old_ref = ref;
6281   //       entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
6282   //       ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
6283   //       compareAndSwapObject(obj, field_offset, old_ref, ref);
6284   //     }
6285   //   }
6286 
6287   // Slow path updating the object reference at address `obj + field_offset`
6288   // when the GC is marking. The entrypoint will be loaded by the slow path code.
6289   SlowPathCodeARM64* slow_path =
6290       new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
6291           instruction,
6292           ref,
6293           obj,
6294           /* offset */ 0u,
6295           /* index */ field_offset,
6296           /* scale_factor */ 0u /* "times 1" */,
6297           needs_null_check,
6298           use_load_acquire,
6299           temp);
6300   AddSlowPath(slow_path);
6301 
6302   __ Cbnz(mr, slow_path->GetEntryLabel());
6303   // Fast path: the GC is not marking: nothing to do (the field is
6304   // up-to-date, and we don't need to load the reference).
6305   __ Bind(slow_path->GetExitLabel());
6306   MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
6307 }
6308 
GenerateRawReferenceLoad(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire)6309 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
6310                                                   Location ref,
6311                                                   Register obj,
6312                                                   uint32_t offset,
6313                                                   Location index,
6314                                                   size_t scale_factor,
6315                                                   bool needs_null_check,
6316                                                   bool use_load_acquire) {
6317   DCHECK(obj.IsW());
6318   DataType::Type type = DataType::Type::kReference;
6319   Register ref_reg = RegisterFrom(ref, type);
6320 
6321   // If needed, vixl::EmissionCheckScope guards are used to ensure
6322   // that no pools are emitted between the load (macro) instruction
6323   // and MaybeRecordImplicitNullCheck.
6324 
6325   if (index.IsValid()) {
6326     // Load types involving an "index": ArrayGet,
6327     // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
6328     // intrinsics.
6329     if (use_load_acquire) {
6330       // UnsafeGetObjectVolatile intrinsic case.
6331       // Register `index` is not an index in an object array, but an
6332       // offset to an object reference field within object `obj`.
6333       DCHECK(instruction->IsInvoke()) << instruction->DebugName();
6334       DCHECK(instruction->GetLocations()->Intrinsified());
6335       DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
6336           << instruction->AsInvoke()->GetIntrinsic();
6337       DCHECK_EQ(offset, 0u);
6338       DCHECK_EQ(scale_factor, 0u);
6339       DCHECK_EQ(needs_null_check, false);
6340       // /* HeapReference<mirror::Object> */ ref = *(obj + index)
6341       MemOperand field = HeapOperand(obj, XRegisterFrom(index));
6342       LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
6343     } else {
6344       // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases.
6345       // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
6346       if (index.IsConstant()) {
6347         uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
6348         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6349         Load(type, ref_reg, HeapOperand(obj, computed_offset));
6350         if (needs_null_check) {
6351           MaybeRecordImplicitNullCheck(instruction);
6352         }
6353       } else {
6354         UseScratchRegisterScope temps(GetVIXLAssembler());
6355         Register temp = temps.AcquireW();
6356         __ Add(temp, obj, offset);
6357         {
6358           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6359           Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor));
6360           if (needs_null_check) {
6361             MaybeRecordImplicitNullCheck(instruction);
6362           }
6363         }
6364       }
6365     }
6366   } else {
6367     // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
6368     MemOperand field = HeapOperand(obj, offset);
6369     if (use_load_acquire) {
6370       // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire.
6371       LoadAcquire(instruction, ref_reg, field, needs_null_check);
6372     } else {
6373       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6374       Load(type, ref_reg, field);
6375       if (needs_null_check) {
6376         MaybeRecordImplicitNullCheck(instruction);
6377       }
6378     }
6379   }
6380 
6381   // Object* ref = ref_addr->AsMirrorPtr()
6382   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6383 }
6384 
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)6385 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
6386   // The following condition is a compile-time one, so it does not have a run-time cost.
6387   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
6388     // The following condition is a run-time one; it is executed after the
6389     // previous compile-time test, to avoid penalizing non-debug builds.
6390     if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
6391       UseScratchRegisterScope temps(GetVIXLAssembler());
6392       Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
6393       GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
6394     }
6395   }
6396 }
6397 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6398 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
6399                                                  Location out,
6400                                                  Location ref,
6401                                                  Location obj,
6402                                                  uint32_t offset,
6403                                                  Location index) {
6404   DCHECK(kEmitCompilerReadBarrier);
6405 
6406   // Insert a slow path based read barrier *after* the reference load.
6407   //
6408   // If heap poisoning is enabled, the unpoisoning of the loaded
6409   // reference will be carried out by the runtime within the slow
6410   // path.
6411   //
6412   // Note that `ref` currently does not get unpoisoned (when heap
6413   // poisoning is enabled), which is alright as the `ref` argument is
6414   // not used by the artReadBarrierSlow entry point.
6415   //
6416   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6417   SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
6418       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
6419   AddSlowPath(slow_path);
6420 
6421   __ B(slow_path->GetEntryLabel());
6422   __ Bind(slow_path->GetExitLabel());
6423 }
6424 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6425 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6426                                                       Location out,
6427                                                       Location ref,
6428                                                       Location obj,
6429                                                       uint32_t offset,
6430                                                       Location index) {
6431   if (kEmitCompilerReadBarrier) {
6432     // Baker's read barriers shall be handled by the fast path
6433     // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
6434     DCHECK(!kUseBakerReadBarrier);
6435     // If heap poisoning is enabled, unpoisoning will be taken care of
6436     // by the runtime within the slow path.
6437     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6438   } else if (kPoisonHeapReferences) {
6439     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
6440   }
6441 }
6442 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6443 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6444                                                         Location out,
6445                                                         Location root) {
6446   DCHECK(kEmitCompilerReadBarrier);
6447 
6448   // Insert a slow path based read barrier *after* the GC root load.
6449   //
6450   // Note that GC roots are not affected by heap poisoning, so we do
6451   // not need to do anything special for this here.
6452   SlowPathCodeARM64* slow_path =
6453       new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
6454   AddSlowPath(slow_path);
6455 
6456   __ B(slow_path->GetEntryLabel());
6457   __ Bind(slow_path->GetExitLabel());
6458 }
6459 
VisitClassTableGet(HClassTableGet * instruction)6460 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
6461   LocationSummary* locations =
6462       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6463   locations->SetInAt(0, Location::RequiresRegister());
6464   locations->SetOut(Location::RequiresRegister());
6465 }
6466 
VisitClassTableGet(HClassTableGet * instruction)6467 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
6468   LocationSummary* locations = instruction->GetLocations();
6469   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
6470     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
6471         instruction->GetIndex(), kArm64PointerSize).SizeValue();
6472     __ Ldr(XRegisterFrom(locations->Out()),
6473            MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
6474   } else {
6475     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
6476         instruction->GetIndex(), kArm64PointerSize));
6477     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
6478         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
6479     __ Ldr(XRegisterFrom(locations->Out()),
6480            MemOperand(XRegisterFrom(locations->Out()), method_offset));
6481   }
6482 }
6483 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,vixl::aarch64::Literal<uint32_t> * literal,uint64_t index_in_table)6484 static void PatchJitRootUse(uint8_t* code,
6485                             const uint8_t* roots_data,
6486                             vixl::aarch64::Literal<uint32_t>* literal,
6487                             uint64_t index_in_table) {
6488   uint32_t literal_offset = literal->GetOffset();
6489   uintptr_t address =
6490       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
6491   uint8_t* data = code + literal_offset;
6492   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
6493 }
6494 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)6495 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
6496   for (const auto& entry : jit_string_patches_) {
6497     const StringReference& string_reference = entry.first;
6498     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6499     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
6500     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6501   }
6502   for (const auto& entry : jit_class_patches_) {
6503     const TypeReference& type_reference = entry.first;
6504     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6505     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
6506     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6507   }
6508 }
6509 
6510 #undef __
6511 #undef QUICK_ENTRY_POINT
6512 
6513 }  // namespace arm64
6514 }  // namespace art
6515