1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "aarch64/assembler-aarch64.h"
20 #include "aarch64/registers-aarch64.h"
21 #include "arch/arm64/asm_support_arm64.h"
22 #include "arch/arm64/instruction_set_features_arm64.h"
23 #include "arch/arm64/jni_frame_arm64.h"
24 #include "art_method-inl.h"
25 #include "base/bit_utils.h"
26 #include "base/bit_utils_iterator.h"
27 #include "class_root-inl.h"
28 #include "class_table.h"
29 #include "code_generator_utils.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "entrypoints/quick/quick_entrypoints_enum.h"
32 #include "gc/accounting/card_table.h"
33 #include "gc/space/image_space.h"
34 #include "heap_poisoning.h"
35 #include "interpreter/mterp/nterp.h"
36 #include "intrinsics.h"
37 #include "intrinsics_arm64.h"
38 #include "intrinsics_list.h"
39 #include "intrinsics_utils.h"
40 #include "jit/profiling_info.h"
41 #include "linker/linker_patch.h"
42 #include "lock_word.h"
43 #include "mirror/array-inl.h"
44 #include "mirror/class-inl.h"
45 #include "mirror/var_handle.h"
46 #include "offsets.h"
47 #include "optimizing/common_arm64.h"
48 #include "optimizing/nodes.h"
49 #include "profiling_info_builder.h"
50 #include "thread.h"
51 #include "trace.h"
52 #include "utils/arm64/assembler_arm64.h"
53 #include "utils/assembler.h"
54 #include "utils/stack_checks.h"
55 
56 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
57 using vixl::ExactAssemblyScope;
58 using vixl::CodeBufferCheckScope;
59 using vixl::EmissionCheckScope;
60 
61 #ifdef __
62 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
63 #endif
64 
65 namespace art HIDDEN {
66 
67 template<class MirrorType>
68 class GcRoot;
69 
70 namespace arm64 {
71 
72 using helpers::ARM64EncodableConstantOrRegister;
73 using helpers::ArtVixlRegCodeCoherentForRegSet;
74 using helpers::CPURegisterFrom;
75 using helpers::DRegisterFrom;
76 using helpers::FPRegisterFrom;
77 using helpers::HeapOperand;
78 using helpers::HeapOperandFrom;
79 using helpers::InputCPURegisterOrZeroRegAt;
80 using helpers::InputFPRegisterAt;
81 using helpers::InputOperandAt;
82 using helpers::InputRegisterAt;
83 using helpers::Int64FromLocation;
84 using helpers::LocationFrom;
85 using helpers::OperandFromMemOperand;
86 using helpers::OutputCPURegister;
87 using helpers::OutputFPRegister;
88 using helpers::OutputRegister;
89 using helpers::RegisterFrom;
90 using helpers::StackOperandFrom;
91 using helpers::VIXLRegCodeFromART;
92 using helpers::WRegisterFrom;
93 using helpers::XRegisterFrom;
94 
95 // TODO(mythria): Expand SystemRegister in vixl to include this value.
96 uint16_t SYS_CNTVCT_EL0 = SystemRegisterEncoder<1, 3, 14, 0, 2>::value;
97 
98 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
99 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
100 // generates less code/data with a small num_entries.
101 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
102 
103 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
104 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
105 // For the Baker read barrier implementation using link-time generated thunks we need to split
106 // the offset explicitly.
107 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
108 
ARM64Condition(IfCondition cond)109 inline Condition ARM64Condition(IfCondition cond) {
110   switch (cond) {
111     case kCondEQ: return eq;
112     case kCondNE: return ne;
113     case kCondLT: return lt;
114     case kCondLE: return le;
115     case kCondGT: return gt;
116     case kCondGE: return ge;
117     case kCondB:  return lo;
118     case kCondBE: return ls;
119     case kCondA:  return hi;
120     case kCondAE: return hs;
121   }
122   LOG(FATAL) << "Unreachable";
123   UNREACHABLE();
124 }
125 
ARM64FPCondition(IfCondition cond,bool gt_bias)126 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
127   // The ARM64 condition codes can express all the necessary branches, see the
128   // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
129   // There is no dex instruction or HIR that would need the missing conditions
130   // "equal or unordered" or "not equal".
131   switch (cond) {
132     case kCondEQ: return eq;
133     case kCondNE: return ne /* unordered */;
134     case kCondLT: return gt_bias ? cc : lt /* unordered */;
135     case kCondLE: return gt_bias ? ls : le /* unordered */;
136     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
137     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
138     default:
139       LOG(FATAL) << "UNREACHABLE";
140       UNREACHABLE();
141   }
142 }
143 
ARM64ReturnLocation(DataType::Type return_type)144 Location ARM64ReturnLocation(DataType::Type return_type) {
145   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
146   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
147   // but we use the exact registers for clarity.
148   if (return_type == DataType::Type::kFloat32) {
149     return LocationFrom(s0);
150   } else if (return_type == DataType::Type::kFloat64) {
151     return LocationFrom(d0);
152   } else if (return_type == DataType::Type::kInt64) {
153     return LocationFrom(x0);
154   } else if (return_type == DataType::Type::kVoid) {
155     return Location::NoLocation();
156   } else {
157     return LocationFrom(w0);
158   }
159 }
160 
GetReturnLocation(DataType::Type return_type)161 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
162   return ARM64ReturnLocation(return_type);
163 }
164 
OneRegInReferenceOutSaveEverythingCallerSaves()165 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
166   InvokeRuntimeCallingConvention calling_convention;
167   RegisterSet caller_saves = RegisterSet::Empty();
168   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
169   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
170             RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
171                          DataType::Type::kReference).GetCode());
172   return caller_saves;
173 }
174 
175 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
176 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
177 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
178 
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)179 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
180   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
181   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
182   for (uint32_t i : LowToHighBits(core_spills)) {
183     // If the register holds an object, update the stack mask.
184     if (locations->RegisterContainsObject(i)) {
185       locations->SetStackBit(stack_offset / kVRegSize);
186     }
187     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
188     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
189     saved_core_stack_offsets_[i] = stack_offset;
190     stack_offset += kXRegSizeInBytes;
191   }
192 
193   const size_t fp_reg_size = codegen->GetSlowPathFPWidth();
194   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
195   for (uint32_t i : LowToHighBits(fp_spills)) {
196     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
197     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
198     saved_fpu_stack_offsets_[i] = stack_offset;
199     stack_offset += fp_reg_size;
200   }
201 
202   InstructionCodeGeneratorARM64* visitor =
203       down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
204   visitor->SaveLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
205 }
206 
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)207 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
208   InstructionCodeGeneratorARM64* visitor =
209       down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
210   visitor->RestoreLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
211 }
212 
213 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
214  public:
BoundsCheckSlowPathARM64(HBoundsCheck * instruction)215   explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
216 
EmitNativeCode(CodeGenerator * codegen)217   void EmitNativeCode(CodeGenerator* codegen) override {
218     LocationSummary* locations = instruction_->GetLocations();
219     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
220 
221     __ Bind(GetEntryLabel());
222     if (instruction_->CanThrowIntoCatchBlock()) {
223       // Live registers will be restored in the catch block if caught.
224       SaveLiveRegisters(codegen, instruction_->GetLocations());
225     }
226     // We're moving two locations to locations that could overlap, so we need a parallel
227     // move resolver.
228     InvokeRuntimeCallingConvention calling_convention;
229     codegen->EmitParallelMoves(locations->InAt(0),
230                                LocationFrom(calling_convention.GetRegisterAt(0)),
231                                DataType::Type::kInt32,
232                                locations->InAt(1),
233                                LocationFrom(calling_convention.GetRegisterAt(1)),
234                                DataType::Type::kInt32);
235     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
236         ? kQuickThrowStringBounds
237         : kQuickThrowArrayBounds;
238     arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
239     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
240     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
241   }
242 
IsFatal() const243   bool IsFatal() const override { return true; }
244 
GetDescription() const245   const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; }
246 
247  private:
248   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
249 };
250 
251 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
252  public:
DivZeroCheckSlowPathARM64(HDivZeroCheck * instruction)253   explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
254 
EmitNativeCode(CodeGenerator * codegen)255   void EmitNativeCode(CodeGenerator* codegen) override {
256     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
257     __ Bind(GetEntryLabel());
258     arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
259     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
260   }
261 
IsFatal() const262   bool IsFatal() const override { return true; }
263 
GetDescription() const264   const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; }
265 
266  private:
267   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
268 };
269 
270 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
271  public:
LoadClassSlowPathARM64(HLoadClass * cls,HInstruction * at)272   LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at)
273       : SlowPathCodeARM64(at), cls_(cls) {
274     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
275     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
276   }
277 
EmitNativeCode(CodeGenerator * codegen)278   void EmitNativeCode(CodeGenerator* codegen) override {
279     LocationSummary* locations = instruction_->GetLocations();
280     Location out = locations->Out();
281     const uint32_t dex_pc = instruction_->GetDexPc();
282     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
283     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
284 
285     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
286     __ Bind(GetEntryLabel());
287     SaveLiveRegisters(codegen, locations);
288 
289     InvokeRuntimeCallingConvention calling_convention;
290     if (must_resolve_type) {
291       DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile()) ||
292              arm64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
293              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
294                              &cls_->GetDexFile()));
295       dex::TypeIndex type_index = cls_->GetTypeIndex();
296       __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
297       if (cls_->NeedsAccessCheck()) {
298         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
299         arm64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
300       } else {
301         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
302         arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
303       }
304       // If we also must_do_clinit, the resolved type is now in the correct register.
305     } else {
306       DCHECK(must_do_clinit);
307       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
308       arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)),
309                                   source,
310                                   cls_->GetType());
311     }
312     if (must_do_clinit) {
313       arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
314       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
315     }
316 
317     // Move the class to the desired location.
318     if (out.IsValid()) {
319       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
320       DataType::Type type = instruction_->GetType();
321       arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
322     }
323     RestoreLiveRegisters(codegen, locations);
324     __ B(GetExitLabel());
325   }
326 
GetDescription() const327   const char* GetDescription() const override { return "LoadClassSlowPathARM64"; }
328 
329  private:
330   // The class this slow path will load.
331   HLoadClass* const cls_;
332 
333   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
334 };
335 
336 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
337  public:
LoadStringSlowPathARM64(HLoadString * instruction)338   explicit LoadStringSlowPathARM64(HLoadString* instruction)
339       : SlowPathCodeARM64(instruction) {}
340 
EmitNativeCode(CodeGenerator * codegen)341   void EmitNativeCode(CodeGenerator* codegen) override {
342     LocationSummary* locations = instruction_->GetLocations();
343     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
344     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
345 
346     __ Bind(GetEntryLabel());
347     SaveLiveRegisters(codegen, locations);
348 
349     InvokeRuntimeCallingConvention calling_convention;
350     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
351     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
352     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
353     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
354     DataType::Type type = instruction_->GetType();
355     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
356 
357     RestoreLiveRegisters(codegen, locations);
358 
359     __ B(GetExitLabel());
360   }
361 
GetDescription() const362   const char* GetDescription() const override { return "LoadStringSlowPathARM64"; }
363 
364  private:
365   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
366 };
367 
368 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
369  public:
NullCheckSlowPathARM64(HNullCheck * instr)370   explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
371 
EmitNativeCode(CodeGenerator * codegen)372   void EmitNativeCode(CodeGenerator* codegen) override {
373     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
374     __ Bind(GetEntryLabel());
375     if (instruction_->CanThrowIntoCatchBlock()) {
376       // Live registers will be restored in the catch block if caught.
377       SaveLiveRegisters(codegen, instruction_->GetLocations());
378     }
379     arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
380                                  instruction_,
381                                  instruction_->GetDexPc(),
382                                  this);
383     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
384   }
385 
IsFatal() const386   bool IsFatal() const override { return true; }
387 
GetDescription() const388   const char* GetDescription() const override { return "NullCheckSlowPathARM64"; }
389 
390  private:
391   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
392 };
393 
394 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
395  public:
SuspendCheckSlowPathARM64(HSuspendCheck * instruction,HBasicBlock * successor)396   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
397       : SlowPathCodeARM64(instruction), successor_(successor) {}
398 
EmitNativeCode(CodeGenerator * codegen)399   void EmitNativeCode(CodeGenerator* codegen) override {
400     LocationSummary* locations = instruction_->GetLocations();
401     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
402     __ Bind(GetEntryLabel());
403     SaveLiveRegisters(codegen, locations);  // Only saves live vector regs for SIMD.
404     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
405     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
406     RestoreLiveRegisters(codegen, locations);  // Only restores live vector regs for SIMD.
407     if (successor_ == nullptr) {
408       __ B(GetReturnLabel());
409     } else {
410       __ B(arm64_codegen->GetLabelOf(successor_));
411     }
412   }
413 
GetReturnLabel()414   vixl::aarch64::Label* GetReturnLabel() {
415     DCHECK(successor_ == nullptr);
416     return &return_label_;
417   }
418 
GetSuccessor() const419   HBasicBlock* GetSuccessor() const {
420     return successor_;
421   }
422 
GetDescription() const423   const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; }
424 
425  private:
426   // If not null, the block to branch to after the suspend check.
427   HBasicBlock* const successor_;
428 
429   // If `successor_` is null, the label to branch to after the suspend check.
430   vixl::aarch64::Label return_label_;
431 
432   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
433 };
434 
435 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
436  public:
TypeCheckSlowPathARM64(HInstruction * instruction,bool is_fatal)437   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
438       : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
439 
EmitNativeCode(CodeGenerator * codegen)440   void EmitNativeCode(CodeGenerator* codegen) override {
441     LocationSummary* locations = instruction_->GetLocations();
442 
443     DCHECK(instruction_->IsCheckCast()
444            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
445     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
446     uint32_t dex_pc = instruction_->GetDexPc();
447 
448     __ Bind(GetEntryLabel());
449 
450     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
451       SaveLiveRegisters(codegen, locations);
452     }
453 
454     // We're moving two locations to locations that could overlap, so we need a parallel
455     // move resolver.
456     InvokeRuntimeCallingConvention calling_convention;
457     codegen->EmitParallelMoves(locations->InAt(0),
458                                LocationFrom(calling_convention.GetRegisterAt(0)),
459                                DataType::Type::kReference,
460                                locations->InAt(1),
461                                LocationFrom(calling_convention.GetRegisterAt(1)),
462                                DataType::Type::kReference);
463     if (instruction_->IsInstanceOf()) {
464       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
465       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
466       DataType::Type ret_type = instruction_->GetType();
467       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
468       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
469     } else {
470       DCHECK(instruction_->IsCheckCast());
471       arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
472       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
473     }
474 
475     if (!is_fatal_) {
476       RestoreLiveRegisters(codegen, locations);
477       __ B(GetExitLabel());
478     }
479   }
480 
GetDescription() const481   const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; }
IsFatal() const482   bool IsFatal() const override { return is_fatal_; }
483 
484  private:
485   const bool is_fatal_;
486 
487   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
488 };
489 
490 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
491  public:
DeoptimizationSlowPathARM64(HDeoptimize * instruction)492   explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
493       : SlowPathCodeARM64(instruction) {}
494 
EmitNativeCode(CodeGenerator * codegen)495   void EmitNativeCode(CodeGenerator* codegen) override {
496     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
497     __ Bind(GetEntryLabel());
498     LocationSummary* locations = instruction_->GetLocations();
499     SaveLiveRegisters(codegen, locations);
500     InvokeRuntimeCallingConvention calling_convention;
501     __ Mov(calling_convention.GetRegisterAt(0),
502            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
503     arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
504     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
505   }
506 
GetDescription() const507   const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; }
508 
509  private:
510   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
511 };
512 
513 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
514  public:
ArraySetSlowPathARM64(HInstruction * instruction)515   explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
516 
EmitNativeCode(CodeGenerator * codegen)517   void EmitNativeCode(CodeGenerator* codegen) override {
518     LocationSummary* locations = instruction_->GetLocations();
519     __ Bind(GetEntryLabel());
520     SaveLiveRegisters(codegen, locations);
521 
522     InvokeRuntimeCallingConvention calling_convention;
523     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
524     parallel_move.AddMove(
525         locations->InAt(0),
526         LocationFrom(calling_convention.GetRegisterAt(0)),
527         DataType::Type::kReference,
528         nullptr);
529     parallel_move.AddMove(
530         locations->InAt(1),
531         LocationFrom(calling_convention.GetRegisterAt(1)),
532         DataType::Type::kInt32,
533         nullptr);
534     parallel_move.AddMove(
535         locations->InAt(2),
536         LocationFrom(calling_convention.GetRegisterAt(2)),
537         DataType::Type::kReference,
538         nullptr);
539     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
540 
541     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
542     arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
543     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
544     RestoreLiveRegisters(codegen, locations);
545     __ B(GetExitLabel());
546   }
547 
GetDescription() const548   const char* GetDescription() const override { return "ArraySetSlowPathARM64"; }
549 
550  private:
551   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
552 };
553 
EmitTable(CodeGeneratorARM64 * codegen)554 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
555   uint32_t num_entries = switch_instr_->GetNumEntries();
556   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
557 
558   // We are about to use the assembler to place literals directly. Make sure we have enough
559   // underlying code buffer and we have generated the jump table with right size.
560   EmissionCheckScope scope(codegen->GetVIXLAssembler(),
561                            num_entries * sizeof(int32_t),
562                            CodeBufferCheckScope::kExactSize);
563 
564   __ Bind(&table_start_);
565   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
566   for (uint32_t i = 0; i < num_entries; i++) {
567     vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
568     DCHECK(target_label->IsBound());
569     ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
570     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
571     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
572     Literal<int32_t> literal(jump_offset);
573     __ place(&literal);
574   }
575 }
576 
577 // Slow path generating a read barrier for a heap reference.
578 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
579  public:
ReadBarrierForHeapReferenceSlowPathARM64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)580   ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
581                                            Location out,
582                                            Location ref,
583                                            Location obj,
584                                            uint32_t offset,
585                                            Location index)
586       : SlowPathCodeARM64(instruction),
587         out_(out),
588         ref_(ref),
589         obj_(obj),
590         offset_(offset),
591         index_(index) {
592     // If `obj` is equal to `out` or `ref`, it means the initial object
593     // has been overwritten by (or after) the heap object reference load
594     // to be instrumented, e.g.:
595     //
596     //   __ Ldr(out, HeapOperand(out, class_offset);
597     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
598     //
599     // In that case, we have lost the information about the original
600     // object, and the emitted read barrier cannot work properly.
601     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
602     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
603   }
604 
EmitNativeCode(CodeGenerator * codegen)605   void EmitNativeCode(CodeGenerator* codegen) override {
606     DCHECK(codegen->EmitReadBarrier());
607     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
608     LocationSummary* locations = instruction_->GetLocations();
609     DataType::Type type = DataType::Type::kReference;
610     DCHECK(locations->CanCall());
611     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
612     DCHECK(instruction_->IsInstanceFieldGet() ||
613            instruction_->IsStaticFieldGet() ||
614            instruction_->IsArrayGet() ||
615            instruction_->IsInstanceOf() ||
616            instruction_->IsCheckCast() ||
617            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
618         << "Unexpected instruction in read barrier for heap reference slow path: "
619         << instruction_->DebugName();
620     // The read barrier instrumentation of object ArrayGet
621     // instructions does not support the HIntermediateAddress
622     // instruction.
623     DCHECK(!(instruction_->IsArrayGet() &&
624              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
625 
626     __ Bind(GetEntryLabel());
627 
628     SaveLiveRegisters(codegen, locations);
629 
630     // We may have to change the index's value, but as `index_` is a
631     // constant member (like other "inputs" of this slow path),
632     // introduce a copy of it, `index`.
633     Location index = index_;
634     if (index_.IsValid()) {
635       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
636       if (instruction_->IsArrayGet()) {
637         // Compute the actual memory offset and store it in `index`.
638         Register index_reg = RegisterFrom(index_, DataType::Type::kInt32);
639         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
640         if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
641           // We are about to change the value of `index_reg` (see the
642           // calls to vixl::MacroAssembler::Lsl and
643           // vixl::MacroAssembler::Mov below), but it has
644           // not been saved by the previous call to
645           // art::SlowPathCode::SaveLiveRegisters, as it is a
646           // callee-save register --
647           // art::SlowPathCode::SaveLiveRegisters does not consider
648           // callee-save registers, as it has been designed with the
649           // assumption that callee-save registers are supposed to be
650           // handled by the called function.  So, as a callee-save
651           // register, `index_reg` _would_ eventually be saved onto
652           // the stack, but it would be too late: we would have
653           // changed its value earlier.  Therefore, we manually save
654           // it here into another freely available register,
655           // `free_reg`, chosen of course among the caller-save
656           // registers (as a callee-save `free_reg` register would
657           // exhibit the same problem).
658           //
659           // Note we could have requested a temporary register from
660           // the register allocator instead; but we prefer not to, as
661           // this is a slow path, and we know we can find a
662           // caller-save register that is available.
663           Register free_reg = FindAvailableCallerSaveRegister(codegen);
664           __ Mov(free_reg.W(), index_reg);
665           index_reg = free_reg;
666           index = LocationFrom(index_reg);
667         } else {
668           // The initial register stored in `index_` has already been
669           // saved in the call to art::SlowPathCode::SaveLiveRegisters
670           // (as it is not a callee-save register), so we can freely
671           // use it.
672         }
673         // Shifting the index value contained in `index_reg` by the scale
674         // factor (2) cannot overflow in practice, as the runtime is
675         // unable to allocate object arrays with a size larger than
676         // 2^26 - 1 (that is, 2^28 - 4 bytes).
677         __ Lsl(index_reg, index_reg, DataType::SizeShift(type));
678         static_assert(
679             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
680             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
681         __ Add(index_reg, index_reg, Operand(offset_));
682       } else {
683         // In the case of the following intrinsics `index_` is not shifted by a scale factor of 2
684         // (as in the case of ArrayGet), as it is actually an offset to an object field within an
685         // object.
686         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
687         DCHECK(instruction_->GetLocations()->Intrinsified());
688         HInvoke* invoke = instruction_->AsInvoke();
689         DCHECK(IsUnsafeGetReference(invoke) ||
690                IsVarHandleGet(invoke) ||
691                IsUnsafeCASReference(invoke) ||
692                IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
693         DCHECK_EQ(offset_, 0u);
694         DCHECK(index_.IsRegister());
695       }
696     }
697 
698     // We're moving two or three locations to locations that could
699     // overlap, so we need a parallel move resolver.
700     InvokeRuntimeCallingConvention calling_convention;
701     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
702     parallel_move.AddMove(ref_,
703                           LocationFrom(calling_convention.GetRegisterAt(0)),
704                           type,
705                           nullptr);
706     parallel_move.AddMove(obj_,
707                           LocationFrom(calling_convention.GetRegisterAt(1)),
708                           type,
709                           nullptr);
710     if (index.IsValid()) {
711       parallel_move.AddMove(index,
712                             LocationFrom(calling_convention.GetRegisterAt(2)),
713                             DataType::Type::kInt32,
714                             nullptr);
715       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
716     } else {
717       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
718       arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
719     }
720     arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
721                                  instruction_,
722                                  instruction_->GetDexPc(),
723                                  this);
724     CheckEntrypointTypes<
725         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
726     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
727 
728     RestoreLiveRegisters(codegen, locations);
729 
730     __ B(GetExitLabel());
731   }
732 
GetDescription() const733   const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
734 
735  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)736   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
737     size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
738     size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
739     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
740       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
741         return Register(VIXLRegCodeFromART(i), kXRegSize);
742       }
743     }
744     // We shall never fail to find a free caller-save register, as
745     // there are more than two core caller-save registers on ARM64
746     // (meaning it is possible to find one which is different from
747     // `ref` and `obj`).
748     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
749     LOG(FATAL) << "Could not find a free register";
750     UNREACHABLE();
751   }
752 
753   const Location out_;
754   const Location ref_;
755   const Location obj_;
756   const uint32_t offset_;
757   // An additional location containing an index to an array.
758   // Only used for HArrayGet and the UnsafeGetObject &
759   // UnsafeGetObjectVolatile intrinsics.
760   const Location index_;
761 
762   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
763 };
764 
765 // Slow path generating a read barrier for a GC root.
766 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
767  public:
ReadBarrierForRootSlowPathARM64(HInstruction * instruction,Location out,Location root)768   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
769       : SlowPathCodeARM64(instruction), out_(out), root_(root) {
770   }
771 
EmitNativeCode(CodeGenerator * codegen)772   void EmitNativeCode(CodeGenerator* codegen) override {
773     DCHECK(codegen->EmitReadBarrier());
774     LocationSummary* locations = instruction_->GetLocations();
775     DataType::Type type = DataType::Type::kReference;
776     DCHECK(locations->CanCall());
777     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
778     DCHECK(instruction_->IsLoadClass() ||
779            instruction_->IsLoadString() ||
780            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
781         << "Unexpected instruction in read barrier for GC root slow path: "
782         << instruction_->DebugName();
783 
784     __ Bind(GetEntryLabel());
785     SaveLiveRegisters(codegen, locations);
786 
787     InvokeRuntimeCallingConvention calling_convention;
788     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
789     // The argument of the ReadBarrierForRootSlow is not a managed
790     // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
791     // thus we need a 64-bit move here, and we cannot use
792     //
793     //   arm64_codegen->MoveLocation(
794     //       LocationFrom(calling_convention.GetRegisterAt(0)),
795     //       root_,
796     //       type);
797     //
798     // which would emit a 32-bit move, as `type` is a (32-bit wide)
799     // reference type (`DataType::Type::kReference`).
800     __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
801     arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
802                                  instruction_,
803                                  instruction_->GetDexPc(),
804                                  this);
805     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
806     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
807 
808     RestoreLiveRegisters(codegen, locations);
809     __ B(GetExitLabel());
810   }
811 
GetDescription() const812   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; }
813 
814  private:
815   const Location out_;
816   const Location root_;
817 
818   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
819 };
820 
821 class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
822  public:
MethodEntryExitHooksSlowPathARM64(HInstruction * instruction)823   explicit MethodEntryExitHooksSlowPathARM64(HInstruction* instruction)
824       : SlowPathCodeARM64(instruction) {}
825 
EmitNativeCode(CodeGenerator * codegen)826   void EmitNativeCode(CodeGenerator* codegen) override {
827     LocationSummary* locations = instruction_->GetLocations();
828     QuickEntrypointEnum entry_point =
829         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
830     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
831     __ Bind(GetEntryLabel());
832     SaveLiveRegisters(codegen, locations);
833     if (instruction_->IsMethodExitHook()) {
834       __ Mov(vixl::aarch64::x4, arm64_codegen->GetFrameSize());
835     }
836     arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
837     RestoreLiveRegisters(codegen, locations);
838     __ B(GetExitLabel());
839   }
840 
GetDescription() const841   const char* GetDescription() const override {
842     return "MethodEntryExitHooksSlowPath";
843   }
844 
845  private:
846   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARM64);
847 };
848 
849 class CompileOptimizedSlowPathARM64 : public SlowPathCodeARM64 {
850  public:
CompileOptimizedSlowPathARM64(HSuspendCheck * check,Register profiling_info)851   CompileOptimizedSlowPathARM64(HSuspendCheck* check, Register profiling_info)
852       : SlowPathCodeARM64(check),
853         profiling_info_(profiling_info) {}
854 
EmitNativeCode(CodeGenerator * codegen)855   void EmitNativeCode(CodeGenerator* codegen) override {
856     uint32_t entrypoint_offset =
857         GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
858     __ Bind(GetEntryLabel());
859     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
860     UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
861     Register counter = temps.AcquireW();
862     __ Mov(counter, ProfilingInfo::GetOptimizeThreshold());
863     __ Strh(counter,
864             MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
865     if (instruction_ != nullptr) {
866       // Only saves live vector regs for SIMD.
867       SaveLiveRegisters(codegen, instruction_->GetLocations());
868     }
869     __ Ldr(lr, MemOperand(tr, entrypoint_offset));
870     // Note: we don't record the call here (and therefore don't generate a stack
871     // map), as the entrypoint should never be suspended.
872     __ Blr(lr);
873     if (instruction_ != nullptr) {
874       // Only restores live vector regs for SIMD.
875       RestoreLiveRegisters(codegen, instruction_->GetLocations());
876     }
877     __ B(GetExitLabel());
878   }
879 
GetDescription() const880   const char* GetDescription() const override {
881     return "CompileOptimizedSlowPath";
882   }
883 
884  private:
885   // The register where the profiling info is stored when entering the slow
886   // path.
887   Register profiling_info_;
888 
889   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARM64);
890 };
891 
892 #undef __
893 
GetNextLocation(DataType::Type type)894 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
895   Location next_location;
896   if (type == DataType::Type::kVoid) {
897     LOG(FATAL) << "Unreachable type " << type;
898   }
899 
900   if (DataType::IsFloatingPointType(type) &&
901       (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
902     next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
903   } else if (!DataType::IsFloatingPointType(type) &&
904              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
905     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
906   } else {
907     size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
908     next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
909                                                 : Location::StackSlot(stack_offset);
910   }
911 
912   // Space on the stack is reserved for all arguments.
913   stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
914   return next_location;
915 }
916 
GetMethodLocation() const917 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
918   return LocationFrom(kArtMethodRegister);
919 }
920 
GetNextLocation(DataType::Type type)921 Location CriticalNativeCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
922   DCHECK_NE(type, DataType::Type::kReference);
923 
924   Location location = Location::NoLocation();
925   if (DataType::IsFloatingPointType(type)) {
926     if (fpr_index_ < kParameterFPRegistersLength) {
927       location = LocationFrom(kParameterFPRegisters[fpr_index_]);
928       ++fpr_index_;
929     }
930   } else {
931     // Native ABI uses the same registers as managed, except that the method register x0
932     // is a normal argument.
933     if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
934       location = LocationFrom(gpr_index_ == 0u ? x0 : kParameterCoreRegisters[gpr_index_ - 1u]);
935       ++gpr_index_;
936     }
937   }
938   if (location.IsInvalid()) {
939     if (DataType::Is64BitType(type)) {
940       location = Location::DoubleStackSlot(stack_offset_);
941     } else {
942       location = Location::StackSlot(stack_offset_);
943     }
944     stack_offset_ += kFramePointerSize;
945 
946     if (for_register_allocation_) {
947       location = Location::Any();
948     }
949   }
950   return location;
951 }
952 
GetReturnLocation(DataType::Type type) const953 Location CriticalNativeCallingConventionVisitorARM64::GetReturnLocation(DataType::Type type) const {
954   // We perform conversion to the managed ABI return register after the call if needed.
955   InvokeDexCallingConventionVisitorARM64 dex_calling_convention;
956   return dex_calling_convention.GetReturnLocation(type);
957 }
958 
GetMethodLocation() const959 Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const {
960   // Pass the method in the hidden argument x15.
961   return Location::RegisterLocation(x15.GetCode());
962 }
963 
964 namespace detail {
965 
966 // Mark which intrinsics we don't have handcrafted code for.
967 template <Intrinsics T>
968 struct IsUnimplemented {
969   bool is_unimplemented = false;
970 };
971 
972 #define TRUE_OVERRIDE(Name)                     \
973   template <>                                   \
974   struct IsUnimplemented<Intrinsics::k##Name> { \
975     bool is_unimplemented = true;               \
976   };
977 UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE)
978 #undef TRUE_OVERRIDE
979 
980 static constexpr bool kIsIntrinsicUnimplemented[] = {
981     false,  // kNone
982 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
983     IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
984     ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
985 #undef IS_UNIMPLEMENTED
986 };
987 
988 }  // namespace detail
989 
CodeGeneratorARM64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)990 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
991                                        const CompilerOptions& compiler_options,
992                                        OptimizingCompilerStats* stats)
993     : CodeGenerator(graph,
994                     kNumberOfAllocatableRegisters,
995                     kNumberOfAllocatableFPRegisters,
996                     kNumberOfAllocatableRegisterPairs,
997                     callee_saved_core_registers.GetList(),
998                     callee_saved_fp_registers.GetList(),
999                     compiler_options,
1000                     stats,
1001                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1002       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1003       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1004       location_builder_neon_(graph, this),
1005       instruction_visitor_neon_(graph, this),
1006       location_builder_sve_(graph, this),
1007       instruction_visitor_sve_(graph, this),
1008       move_resolver_(graph->GetAllocator(), this),
1009       assembler_(graph->GetAllocator(),
1010                  compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
1011       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1012       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1013       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1014       app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1015       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1016       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1017       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1018       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1019       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1020       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1021       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1022       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1023       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1024       jit_patches_(&assembler_, graph->GetAllocator()),
1025       jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1026                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1027   // Save the link register (containing the return address) to mimic Quick.
1028   AddAllocatedRegister(LocationFrom(lr));
1029 
1030   bool use_sve = ShouldUseSVE();
1031   if (use_sve) {
1032     location_builder_ = &location_builder_sve_;
1033     instruction_visitor_ = &instruction_visitor_sve_;
1034   } else {
1035     location_builder_ = &location_builder_neon_;
1036     instruction_visitor_ = &instruction_visitor_neon_;
1037   }
1038 }
1039 
ShouldUseSVE() const1040 bool CodeGeneratorARM64::ShouldUseSVE() const {
1041   return GetInstructionSetFeatures().HasSVE();
1042 }
1043 
GetSIMDRegisterWidth() const1044 size_t CodeGeneratorARM64::GetSIMDRegisterWidth() const {
1045   return SupportsPredicatedSIMD()
1046       ? GetInstructionSetFeatures().GetSVEVectorLength() / kBitsPerByte
1047       : vixl::aarch64::kQRegSizeInBytes;
1048 }
1049 
1050 #define __ GetVIXLAssembler()->
1051 
EmitJumpTables()1052 void CodeGeneratorARM64::EmitJumpTables() {
1053   for (auto&& jump_table : jump_tables_) {
1054     jump_table->EmitTable(this);
1055   }
1056 }
1057 
Finalize()1058 void CodeGeneratorARM64::Finalize() {
1059   EmitJumpTables();
1060 
1061   // Emit JIT baker read barrier slow paths.
1062   DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
1063   for (auto& entry : jit_baker_read_barrier_slow_paths_) {
1064     uint32_t encoded_data = entry.first;
1065     vixl::aarch64::Label* slow_path_entry = &entry.second.label;
1066     __ Bind(slow_path_entry);
1067     CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
1068   }
1069 
1070   // Ensure we emit the literal pool.
1071   __ FinalizeCode();
1072 
1073   CodeGenerator::Finalize();
1074 
1075   // Verify Baker read barrier linker patches.
1076   if (kIsDebugBuild) {
1077     ArrayRef<const uint8_t> code(GetCode());
1078     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
1079       DCHECK(info.label.IsBound());
1080       uint32_t literal_offset = info.label.GetLocation();
1081       DCHECK_ALIGNED(literal_offset, 4u);
1082 
1083       auto GetInsn = [&code](uint32_t offset) {
1084         DCHECK_ALIGNED(offset, 4u);
1085         return
1086             (static_cast<uint32_t>(code[offset + 0]) << 0) +
1087             (static_cast<uint32_t>(code[offset + 1]) << 8) +
1088             (static_cast<uint32_t>(code[offset + 2]) << 16)+
1089             (static_cast<uint32_t>(code[offset + 3]) << 24);
1090       };
1091 
1092       const uint32_t encoded_data = info.custom_data;
1093       BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
1094       // Check that the next instruction matches the expected LDR.
1095       switch (kind) {
1096         case BakerReadBarrierKind::kField:
1097         case BakerReadBarrierKind::kAcquire: {
1098           DCHECK_GE(code.size() - literal_offset, 8u);
1099           uint32_t next_insn = GetInsn(literal_offset + 4u);
1100           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
1101           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1102           if (kind == BakerReadBarrierKind::kField) {
1103             // LDR (immediate) with correct base_reg.
1104             CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
1105           } else {
1106             DCHECK(kind == BakerReadBarrierKind::kAcquire);
1107             // LDAR with correct base_reg.
1108             CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5));
1109           }
1110           break;
1111         }
1112         case BakerReadBarrierKind::kArray: {
1113           DCHECK_GE(code.size() - literal_offset, 8u);
1114           uint32_t next_insn = GetInsn(literal_offset + 4u);
1115           // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
1116           // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
1117           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
1118           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1119           CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
1120           CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
1121           break;
1122         }
1123         case BakerReadBarrierKind::kGcRoot: {
1124           DCHECK_GE(literal_offset, 4u);
1125           uint32_t prev_insn = GetInsn(literal_offset - 4u);
1126           const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1127           // Usually LDR (immediate) with correct root_reg but
1128           // we may have a "MOV marked, old_value" for intrinsic CAS.
1129           if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) {    // MOV?
1130             CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);  // LDR?
1131           }
1132           break;
1133         }
1134         default:
1135           LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
1136           UNREACHABLE();
1137       }
1138     }
1139   }
1140 }
1141 
PrepareForEmitNativeCode()1142 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1143   // Note: There are 6 kinds of moves:
1144   // 1. constant -> GPR/FPR (non-cycle)
1145   // 2. constant -> stack (non-cycle)
1146   // 3. GPR/FPR -> GPR/FPR
1147   // 4. GPR/FPR -> stack
1148   // 5. stack -> GPR/FPR
1149   // 6. stack -> stack (non-cycle)
1150   // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1151   // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1152   // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1153   // dependency.
1154   vixl_temps_.Open(GetVIXLAssembler());
1155 }
1156 
FinishEmitNativeCode()1157 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1158   vixl_temps_.Close();
1159 }
1160 
AllocateScratchLocationFor(Location::Kind kind)1161 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1162   DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1163          || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1164          || kind == Location::kSIMDStackSlot);
1165   kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1166       ? Location::kFpuRegister
1167       : Location::kRegister;
1168   Location scratch = GetScratchLocation(kind);
1169   if (!scratch.Equals(Location::NoLocation())) {
1170     return scratch;
1171   }
1172   // Allocate from VIXL temp registers.
1173   if (kind == Location::kRegister) {
1174     scratch = LocationFrom(vixl_temps_.AcquireX());
1175   } else {
1176     DCHECK_EQ(kind, Location::kFpuRegister);
1177     scratch = codegen_->GetGraph()->HasSIMD()
1178         ? codegen_->GetInstructionCodeGeneratorArm64()->AllocateSIMDScratchLocation(&vixl_temps_)
1179         : LocationFrom(vixl_temps_.AcquireD());
1180   }
1181   AddScratchLocation(scratch);
1182   return scratch;
1183 }
1184 
FreeScratchLocation(Location loc)1185 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1186   if (loc.IsRegister()) {
1187     vixl_temps_.Release(XRegisterFrom(loc));
1188   } else {
1189     DCHECK(loc.IsFpuRegister());
1190     if (codegen_->GetGraph()->HasSIMD()) {
1191       codegen_->GetInstructionCodeGeneratorArm64()->FreeSIMDScratchLocation(loc, &vixl_temps_);
1192     } else {
1193       vixl_temps_.Release(DRegisterFrom(loc));
1194     }
1195   }
1196   RemoveScratchLocation(loc);
1197 }
1198 
EmitMove(size_t index)1199 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1200   MoveOperands* move = moves_[index];
1201   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
1202 }
1203 
VisitMethodExitHook(HMethodExitHook * method_hook)1204 void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1205   LocationSummary* locations = new (GetGraph()->GetAllocator())
1206       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1207   DataType::Type return_type = method_hook->InputAt(0)->GetType();
1208   locations->SetInAt(0, ARM64ReturnLocation(return_type));
1209 }
1210 
GenerateMethodEntryExitHook(HInstruction * instruction)1211 void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1212   MacroAssembler* masm = GetVIXLAssembler();
1213   UseScratchRegisterScope temps(masm);
1214   Register addr = temps.AcquireX();
1215   Register index = temps.AcquireX();
1216   Register value = index.W();
1217 
1218   SlowPathCodeARM64* slow_path =
1219       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction);
1220   codegen_->AddSlowPath(slow_path);
1221 
1222   if (instruction->IsMethodExitHook()) {
1223     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1224     // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1225     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1226     // disabled in debuggable runtime. The other bit is used when this method itself requires a
1227     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1228     __ Ldr(value, MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1229     __ Cbnz(value, slow_path->GetEntryLabel());
1230   }
1231 
1232   uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1233   MemberOffset  offset = instruction->IsMethodExitHook() ?
1234       instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
1235       instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1236   __ Mov(addr, address + offset.Int32Value());
1237   __ Ldrb(value, MemOperand(addr, 0));
1238   __ Cmp(value, Operand(instrumentation::Instrumentation::kFastTraceListeners));
1239   // Check if there are any method entry / exit listeners. If no, continue.
1240   __ B(lt, slow_path->GetExitLabel());
1241   // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
1242   // If yes, just take the slow path.
1243   __ B(gt, slow_path->GetEntryLabel());
1244 
1245   // Check if there is place in the buffer to store a new entry, if no, take slow path.
1246   uint32_t trace_buffer_index_offset =
1247       Thread::TraceBufferIndexOffset<kArm64PointerSize>().Int32Value();
1248   __ Ldr(index, MemOperand(tr, trace_buffer_index_offset));
1249   __ Subs(index, index, kNumEntriesForWallClock);
1250   __ B(lt, slow_path->GetEntryLabel());
1251 
1252   // Update the index in the `Thread`.
1253   __ Str(index, MemOperand(tr, trace_buffer_index_offset));
1254   // Calculate the entry address in the buffer.
1255   // addr = base_addr + sizeof(void*) * index;
1256   __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue()));
1257   __ ComputeAddress(addr, MemOperand(addr, index, LSL, TIMES_8));
1258 
1259   Register tmp = index;
1260   // Record method pointer and trace action.
1261   __ Ldr(tmp, MemOperand(sp, 0));
1262   // Use last two bits to encode trace method action. For MethodEntry it is 0
1263   // so no need to set the bits since they are 0 already.
1264   if (instruction->IsMethodExitHook()) {
1265     DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1266     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1267     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1268     __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1269   }
1270   __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes));
1271   // Record the timestamp.
1272   __ Mrs(tmp, (SystemRegister)SYS_CNTVCT_EL0);
1273   __ Str(tmp, MemOperand(addr, kTimestampOffsetInBytes));
1274   __ Bind(slow_path->GetExitLabel());
1275 }
1276 
VisitMethodExitHook(HMethodExitHook * instruction)1277 void InstructionCodeGeneratorARM64::VisitMethodExitHook(HMethodExitHook* instruction) {
1278   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1279   DCHECK(codegen_->RequiresCurrentMethod());
1280   GenerateMethodEntryExitHook(instruction);
1281 }
1282 
VisitMethodEntryHook(HMethodEntryHook * method_hook)1283 void LocationsBuilderARM64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1284   new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1285 }
1286 
VisitMethodEntryHook(HMethodEntryHook * instruction)1287 void InstructionCodeGeneratorARM64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1288   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1289   DCHECK(codegen_->RequiresCurrentMethod());
1290   GenerateMethodEntryExitHook(instruction);
1291 }
1292 
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1293 void CodeGeneratorARM64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1294   MacroAssembler* masm = GetVIXLAssembler();
1295   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1296     UseScratchRegisterScope temps(masm);
1297     Register counter = temps.AcquireX();
1298     Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX();
1299     if (!is_frame_entry) {
1300       __ Ldr(method, MemOperand(sp, 0));
1301     }
1302     __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1303     vixl::aarch64::Label done;
1304     DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
1305     __ Cbz(counter, &done);
1306     __ Add(counter, counter, -1);
1307     __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1308     __ Bind(&done);
1309   }
1310 
1311   if (GetGraph()->IsCompilingBaseline() &&
1312       GetGraph()->IsUsefulOptimizing() &&
1313       !Runtime::Current()->IsAotCompiler()) {
1314     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1315     DCHECK(info != nullptr);
1316     DCHECK(!HasEmptyFrame());
1317     uint64_t address = reinterpret_cast64<uint64_t>(info);
1318     UseScratchRegisterScope temps(masm);
1319     Register counter = temps.AcquireW();
1320     SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARM64(
1321         suspend_check, /* profiling_info= */ lr);
1322     AddSlowPath(slow_path);
1323     __ Ldr(lr, jit_patches_.DeduplicateUint64Literal(address));
1324     __ Ldrh(counter, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1325     __ Cbz(counter, slow_path->GetEntryLabel());
1326     __ Add(counter, counter, -1);
1327     __ Strh(counter, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1328     __ Bind(slow_path->GetExitLabel());
1329   }
1330 }
1331 
GenerateFrameEntry()1332 void CodeGeneratorARM64::GenerateFrameEntry() {
1333   MacroAssembler* masm = GetVIXLAssembler();
1334 
1335   // Check if we need to generate the clinit check. We will jump to the
1336   // resolution stub if the class is not initialized and the executing thread is
1337   // not the thread initializing it.
1338   // We do this before constructing the frame to get the correct stack trace if
1339   // an exception is thrown.
1340   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1341     UseScratchRegisterScope temps(masm);
1342     vixl::aarch64::Label resolution;
1343     vixl::aarch64::Label memory_barrier;
1344 
1345     Register temp1 = temps.AcquireW();
1346     Register temp2 = temps.AcquireW();
1347 
1348     // Check if we're visibly initialized.
1349 
1350     // We don't emit a read barrier here to save on code size. We rely on the
1351     // resolution trampoline to do a suspend check before re-entering this code.
1352     __ Ldr(temp1, MemOperand(kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
1353     __ Ldrb(temp2, HeapOperand(temp1, kClassStatusByteOffset));
1354     __ Cmp(temp2, kShiftedVisiblyInitializedValue);
1355     __ B(hs, &frame_entry_label_);
1356 
1357     // Check if we're initialized and jump to code that does a memory barrier if
1358     // so.
1359     __ Cmp(temp2, kShiftedInitializedValue);
1360     __ B(hs, &memory_barrier);
1361 
1362     // Check if we're initializing and the thread initializing is the one
1363     // executing the code.
1364     __ Cmp(temp2, kShiftedInitializingValue);
1365     __ B(lo, &resolution);
1366 
1367     __ Ldr(temp1, HeapOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
1368     __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArm64PointerSize>().Int32Value()));
1369     __ Cmp(temp1, temp2);
1370     __ B(eq, &frame_entry_label_);
1371     __ Bind(&resolution);
1372 
1373     // Jump to the resolution stub.
1374     ThreadOffset64 entrypoint_offset =
1375         GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline);
1376     __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value()));
1377     __ Br(temp1.X());
1378 
1379     __ Bind(&memory_barrier);
1380     GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
1381   }
1382   __ Bind(&frame_entry_label_);
1383 
1384   bool do_overflow_check =
1385       FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
1386   if (do_overflow_check) {
1387     UseScratchRegisterScope temps(masm);
1388     Register temp = temps.AcquireX();
1389     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1390     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64)));
1391     {
1392       // Ensure that between load and RecordPcInfo there are no pools emitted.
1393       ExactAssemblyScope eas(GetVIXLAssembler(),
1394                              kInstructionSize,
1395                              CodeBufferCheckScope::kExactSize);
1396       __ ldr(wzr, MemOperand(temp, 0));
1397       RecordPcInfo(nullptr, 0);
1398     }
1399   }
1400 
1401   if (!HasEmptyFrame()) {
1402     // Make sure the frame size isn't unreasonably large.
1403     DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1404 
1405     // Stack layout:
1406     //      sp[frame_size - 8]        : lr.
1407     //      ...                       : other preserved core registers.
1408     //      ...                       : other preserved fp registers.
1409     //      ...                       : reserved frame space.
1410     //      sp[0]                     : current method.
1411     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1412     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1413     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1414     DCHECK(!preserved_core_registers.IsEmpty());
1415     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1416     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1417 
1418     // Save the current method if we need it, or if using STP reduces code
1419     // size. Note that we do not do this in HCurrentMethod, as the
1420     // instruction might have been removed in the SSA graph.
1421     CPURegister lowest_spill;
1422     if (core_spills_offset == kXRegSizeInBytes) {
1423       // If there is no gap between the method and the lowest core spill, use
1424       // aligned STP pre-index to store both. Max difference is 512. We do
1425       // that to reduce code size even if we do not have to save the method.
1426       DCHECK_LE(frame_size, 512);  // 32 core registers are only 256 bytes.
1427       lowest_spill = preserved_core_registers.PopLowestIndex();
1428       __ Stp(kArtMethodRegister, lowest_spill, MemOperand(sp, -frame_size, PreIndex));
1429     } else if (RequiresCurrentMethod()) {
1430       __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1431     } else {
1432       __ Claim(frame_size);
1433     }
1434     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1435     if (lowest_spill.IsValid()) {
1436       GetAssembler()->cfi().RelOffset(DWARFReg(lowest_spill), core_spills_offset);
1437       core_spills_offset += kXRegSizeInBytes;
1438     }
1439     GetAssembler()->SpillRegisters(preserved_core_registers, core_spills_offset);
1440     GetAssembler()->SpillRegisters(preserved_fp_registers, fp_spills_offset);
1441 
1442     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1443       // Initialize should_deoptimize flag to 0.
1444       Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1445       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1446     }
1447   }
1448   MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1449   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
1450 }
1451 
GenerateFrameExit()1452 void CodeGeneratorARM64::GenerateFrameExit() {
1453   GetAssembler()->cfi().RememberState();
1454   if (!HasEmptyFrame()) {
1455     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1456     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1457     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1458     DCHECK(!preserved_core_registers.IsEmpty());
1459     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1460     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1461 
1462     CPURegister lowest_spill;
1463     if (core_spills_offset == kXRegSizeInBytes) {
1464       // If there is no gap between the method and the lowest core spill, use
1465       // aligned LDP pre-index to pop both. Max difference is 504. We do
1466       // that to reduce code size even though the loaded method is unused.
1467       DCHECK_LE(frame_size, 504);  // 32 core registers are only 256 bytes.
1468       lowest_spill = preserved_core_registers.PopLowestIndex();
1469       core_spills_offset += kXRegSizeInBytes;
1470     }
1471     GetAssembler()->UnspillRegisters(preserved_fp_registers, fp_spills_offset);
1472     GetAssembler()->UnspillRegisters(preserved_core_registers, core_spills_offset);
1473     if (lowest_spill.IsValid()) {
1474       __ Ldp(xzr, lowest_spill, MemOperand(sp, frame_size, PostIndex));
1475       GetAssembler()->cfi().Restore(DWARFReg(lowest_spill));
1476     } else {
1477       __ Drop(frame_size);
1478     }
1479     GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1480   }
1481   __ Ret();
1482   GetAssembler()->cfi().RestoreState();
1483   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1484 }
1485 
GetFramePreservedCoreRegisters() const1486 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1487   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1488   return CPURegList(CPURegister::kRegister, kXRegSize,
1489                     core_spill_mask_);
1490 }
1491 
GetFramePreservedFPRegisters() const1492 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1493   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1494                                          GetNumberOfFloatingPointRegisters()));
1495   return CPURegList(CPURegister::kVRegister, kDRegSize,
1496                     fpu_spill_mask_);
1497 }
1498 
Bind(HBasicBlock * block)1499 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1500   __ Bind(GetLabelOf(block));
1501 }
1502 
MoveConstant(Location location,int32_t value)1503 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1504   DCHECK(location.IsRegister());
1505   __ Mov(RegisterFrom(location, DataType::Type::kInt32), value);
1506 }
1507 
AddLocationAsTemp(Location location,LocationSummary * locations)1508 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1509   if (location.IsRegister()) {
1510     locations->AddTemp(location);
1511   } else {
1512     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1513   }
1514 }
1515 
MaybeMarkGCCard(Register object,Register value,bool emit_null_check)1516 void CodeGeneratorARM64::MaybeMarkGCCard(Register object, Register value, bool emit_null_check) {
1517   vixl::aarch64::Label done;
1518   if (emit_null_check) {
1519     __ Cbz(value, &done);
1520   }
1521   MarkGCCard(object);
1522   if (emit_null_check) {
1523     __ Bind(&done);
1524   }
1525 }
1526 
MarkGCCard(Register object)1527 void CodeGeneratorARM64::MarkGCCard(Register object) {
1528   UseScratchRegisterScope temps(GetVIXLAssembler());
1529   Register card = temps.AcquireX();
1530   Register temp = temps.AcquireW();  // Index within the CardTable - 32bit.
1531   // Load the address of the card table into `card`.
1532   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1533   // Calculate the offset (in the card table) of the card corresponding to `object`.
1534   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1535   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
1536   // `object`'s card.
1537   //
1538   // Register `card` contains the address of the card table. Note that the card
1539   // table's base is biased during its creation so that it always starts at an
1540   // address whose least-significant byte is equal to `kCardDirty` (see
1541   // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
1542   // below writes the `kCardDirty` (byte) value into the `object`'s card
1543   // (located at `card + object >> kCardShift`).
1544   //
1545   // This dual use of the value in register `card` (1. to calculate the location
1546   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
1547   // (no need to explicitly load `kCardDirty` as an immediate value).
1548   __ Strb(card, MemOperand(card, temp.X()));
1549 }
1550 
CheckGCCardIsValid(Register object)1551 void CodeGeneratorARM64::CheckGCCardIsValid(Register object) {
1552   UseScratchRegisterScope temps(GetVIXLAssembler());
1553   Register card = temps.AcquireX();
1554   Register temp = temps.AcquireW();  // Index within the CardTable - 32bit.
1555   vixl::aarch64::Label done;
1556   // Load the address of the card table into `card`.
1557   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1558   // Calculate the offset (in the card table) of the card corresponding to `object`.
1559   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1560   // assert (!clean || !self->is_gc_marking)
1561   __ Ldrb(temp, MemOperand(card, temp.X()));
1562   static_assert(gc::accounting::CardTable::kCardClean == 0);
1563   __ Cbnz(temp, &done);
1564   __ Cbz(mr, &done);
1565   __ Unreachable();
1566   __ Bind(&done);
1567 }
1568 
SetupBlockedRegisters() const1569 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1570   // Blocked core registers:
1571   //      lr        : Runtime reserved.
1572   //      tr        : Runtime reserved.
1573   //      mr        : Runtime reserved.
1574   //      ip1       : VIXL core temp.
1575   //      ip0       : VIXL core temp.
1576   //      x18       : Platform register.
1577   //
1578   // Blocked fp registers:
1579   //      d31       : VIXL fp temp.
1580   CPURegList reserved_core_registers = vixl_reserved_core_registers;
1581   reserved_core_registers.Combine(runtime_reserved_core_registers);
1582   while (!reserved_core_registers.IsEmpty()) {
1583     blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1584   }
1585   blocked_core_registers_[X18] = true;
1586 
1587   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1588   while (!reserved_fp_registers.IsEmpty()) {
1589     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1590   }
1591 
1592   if (GetGraph()->IsDebuggable()) {
1593     // Stubs do not save callee-save floating point registers. If the graph
1594     // is debuggable, we need to deal with these registers differently. For
1595     // now, just block them.
1596     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1597     while (!reserved_fp_registers_debuggable.IsEmpty()) {
1598       blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1599     }
1600   }
1601 }
1602 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1603 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1604   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1605   __ Str(reg, MemOperand(sp, stack_index));
1606   return kArm64WordSize;
1607 }
1608 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1609 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1610   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1611   __ Ldr(reg, MemOperand(sp, stack_index));
1612   return kArm64WordSize;
1613 }
1614 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1615 size_t CodeGeneratorARM64::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
1616                                                      [[maybe_unused]] uint32_t reg_id) {
1617   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1618              << "use SaveRestoreLiveRegistersHelper";
1619   UNREACHABLE();
1620 }
1621 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1622 size_t CodeGeneratorARM64::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
1623                                                         [[maybe_unused]] uint32_t reg_id) {
1624   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1625              << "use SaveRestoreLiveRegistersHelper";
1626   UNREACHABLE();
1627 }
1628 
DumpCoreRegister(std::ostream & stream,int reg) const1629 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1630   stream << XRegister(reg);
1631 }
1632 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1633 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1634   stream << DRegister(reg);
1635 }
1636 
GetInstructionSetFeatures() const1637 const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const {
1638   return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures();
1639 }
1640 
MoveConstant(CPURegister destination,HConstant * constant)1641 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1642   if (constant->IsIntConstant()) {
1643     __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1644   } else if (constant->IsLongConstant()) {
1645     __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1646   } else if (constant->IsNullConstant()) {
1647     __ Mov(Register(destination), 0);
1648   } else if (constant->IsFloatConstant()) {
1649     __ Fmov(VRegister(destination), constant->AsFloatConstant()->GetValue());
1650   } else {
1651     DCHECK(constant->IsDoubleConstant());
1652     __ Fmov(VRegister(destination), constant->AsDoubleConstant()->GetValue());
1653   }
1654 }
1655 
1656 
CoherentConstantAndType(Location constant,DataType::Type type)1657 static bool CoherentConstantAndType(Location constant, DataType::Type type) {
1658   DCHECK(constant.IsConstant());
1659   HConstant* cst = constant.GetConstant();
1660   return (cst->IsIntConstant() && type == DataType::Type::kInt32) ||
1661          // Null is mapped to a core W register, which we associate with kPrimInt.
1662          (cst->IsNullConstant() && type == DataType::Type::kInt32) ||
1663          (cst->IsLongConstant() && type == DataType::Type::kInt64) ||
1664          (cst->IsFloatConstant() && type == DataType::Type::kFloat32) ||
1665          (cst->IsDoubleConstant() && type == DataType::Type::kFloat64);
1666 }
1667 
1668 // Allocate a scratch register from the VIXL pool, querying first
1669 // the floating-point register pool, and then the core register
1670 // pool. This is essentially a reimplementation of
1671 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1672 // using a different allocation strategy.
AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler * masm,vixl::aarch64::UseScratchRegisterScope * temps,int size_in_bits)1673 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1674                                                     vixl::aarch64::UseScratchRegisterScope* temps,
1675                                                     int size_in_bits) {
1676   return masm->GetScratchVRegisterList()->IsEmpty()
1677       ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1678       : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1679 }
1680 
MoveLocation(Location destination,Location source,DataType::Type dst_type)1681 void CodeGeneratorARM64::MoveLocation(Location destination,
1682                                       Location source,
1683                                       DataType::Type dst_type) {
1684   if (source.Equals(destination)) {
1685     return;
1686   }
1687 
1688   // A valid move can always be inferred from the destination and source
1689   // locations. When moving from and to a register, the argument type can be
1690   // used to generate 32bit instead of 64bit moves. In debug mode we also
1691   // checks the coherency of the locations and the type.
1692   bool unspecified_type = (dst_type == DataType::Type::kVoid);
1693 
1694   if (destination.IsRegister() || destination.IsFpuRegister()) {
1695     if (unspecified_type) {
1696       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1697       if (source.IsStackSlot() ||
1698           (src_cst != nullptr && (src_cst->IsIntConstant()
1699                                   || src_cst->IsFloatConstant()
1700                                   || src_cst->IsNullConstant()))) {
1701         // For stack slots and 32bit constants, a 64bit type is appropriate.
1702         dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
1703       } else {
1704         // If the source is a double stack slot or a 64bit constant, a 64bit
1705         // type is appropriate. Else the source is a register, and since the
1706         // type has not been specified, we chose a 64bit type to force a 64bit
1707         // move.
1708         dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
1709       }
1710     }
1711     DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
1712            (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
1713     CPURegister dst = CPURegisterFrom(destination, dst_type);
1714     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1715       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1716       __ Ldr(dst, StackOperandFrom(source));
1717     } else if (source.IsSIMDStackSlot()) {
1718       GetInstructionCodeGeneratorArm64()->LoadSIMDRegFromStack(destination, source);
1719     } else if (source.IsConstant()) {
1720       DCHECK(CoherentConstantAndType(source, dst_type));
1721       MoveConstant(dst, source.GetConstant());
1722     } else if (source.IsRegister()) {
1723       if (destination.IsRegister()) {
1724         __ Mov(Register(dst), RegisterFrom(source, dst_type));
1725       } else {
1726         DCHECK(destination.IsFpuRegister());
1727         DataType::Type source_type = DataType::Is64BitType(dst_type)
1728             ? DataType::Type::kInt64
1729             : DataType::Type::kInt32;
1730         __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1731       }
1732     } else {
1733       DCHECK(source.IsFpuRegister());
1734       if (destination.IsRegister()) {
1735         DataType::Type source_type = DataType::Is64BitType(dst_type)
1736             ? DataType::Type::kFloat64
1737             : DataType::Type::kFloat32;
1738         __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1739       } else {
1740         DCHECK(destination.IsFpuRegister());
1741         if (GetGraph()->HasSIMD()) {
1742           GetInstructionCodeGeneratorArm64()->MoveSIMDRegToSIMDReg(destination, source);
1743         } else {
1744           __ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type));
1745         }
1746       }
1747     }
1748   } else if (destination.IsSIMDStackSlot()) {
1749     GetInstructionCodeGeneratorArm64()->MoveToSIMDStackSlot(destination, source);
1750   } else {  // The destination is not a register. It must be a stack slot.
1751     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1752     if (source.IsRegister() || source.IsFpuRegister()) {
1753       if (unspecified_type) {
1754         if (source.IsRegister()) {
1755           dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
1756         } else {
1757           dst_type =
1758               destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
1759         }
1760       }
1761       DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
1762              (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
1763       __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1764     } else if (source.IsConstant()) {
1765       DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1766           << source << " " << dst_type;
1767       UseScratchRegisterScope temps(GetVIXLAssembler());
1768       HConstant* src_cst = source.GetConstant();
1769       CPURegister temp;
1770       if (src_cst->IsZeroBitPattern()) {
1771         temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1772             ? Register(xzr)
1773             : Register(wzr);
1774       } else {
1775         if (src_cst->IsIntConstant()) {
1776           temp = temps.AcquireW();
1777         } else if (src_cst->IsLongConstant()) {
1778           temp = temps.AcquireX();
1779         } else if (src_cst->IsFloatConstant()) {
1780           temp = temps.AcquireS();
1781         } else {
1782           DCHECK(src_cst->IsDoubleConstant());
1783           temp = temps.AcquireD();
1784         }
1785         MoveConstant(temp, src_cst);
1786       }
1787       __ Str(temp, StackOperandFrom(destination));
1788     } else {
1789       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1790       DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1791       UseScratchRegisterScope temps(GetVIXLAssembler());
1792       // Use any scratch register (a core or a floating-point one)
1793       // from VIXL scratch register pools as a temporary.
1794       //
1795       // We used to only use the FP scratch register pool, but in some
1796       // rare cases the only register from this pool (D31) would
1797       // already be used (e.g. within a ParallelMove instruction, when
1798       // a move is blocked by a another move requiring a scratch FP
1799       // register, which would reserve D31). To prevent this issue, we
1800       // ask for a scratch register of any type (core or FP).
1801       //
1802       // Also, we start by asking for a FP scratch register first, as the
1803       // demand of scratch core registers is higher. This is why we
1804       // use AcquireFPOrCoreCPURegisterOfSize instead of
1805       // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1806       // allocates core scratch registers first.
1807       CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1808           GetVIXLAssembler(),
1809           &temps,
1810           (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1811       __ Ldr(temp, StackOperandFrom(source));
1812       __ Str(temp, StackOperandFrom(destination));
1813     }
1814   }
1815 }
1816 
Load(DataType::Type type,CPURegister dst,const MemOperand & src)1817 void CodeGeneratorARM64::Load(DataType::Type type,
1818                               CPURegister dst,
1819                               const MemOperand& src) {
1820   switch (type) {
1821     case DataType::Type::kBool:
1822     case DataType::Type::kUint8:
1823       __ Ldrb(Register(dst), src);
1824       break;
1825     case DataType::Type::kInt8:
1826       __ Ldrsb(Register(dst), src);
1827       break;
1828     case DataType::Type::kUint16:
1829       __ Ldrh(Register(dst), src);
1830       break;
1831     case DataType::Type::kInt16:
1832       __ Ldrsh(Register(dst), src);
1833       break;
1834     case DataType::Type::kInt32:
1835     case DataType::Type::kReference:
1836     case DataType::Type::kInt64:
1837     case DataType::Type::kFloat32:
1838     case DataType::Type::kFloat64:
1839       DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1840       __ Ldr(dst, src);
1841       break;
1842     case DataType::Type::kUint32:
1843     case DataType::Type::kUint64:
1844     case DataType::Type::kVoid:
1845       LOG(FATAL) << "Unreachable type " << type;
1846   }
1847 }
1848 
LoadAcquire(HInstruction * instruction,DataType::Type type,CPURegister dst,const MemOperand & src,bool needs_null_check)1849 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1850                                      DataType::Type type,
1851                                      CPURegister dst,
1852                                      const MemOperand& src,
1853                                      bool needs_null_check) {
1854   MacroAssembler* masm = GetVIXLAssembler();
1855   UseScratchRegisterScope temps(masm);
1856   Register temp_base = temps.AcquireX();
1857 
1858   DCHECK(!src.IsPreIndex());
1859   DCHECK(!src.IsPostIndex());
1860 
1861   // TODO(vixl): Let the MacroAssembler handle MemOperand.
1862   __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1863   {
1864     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1865     MemOperand base = MemOperand(temp_base);
1866     switch (type) {
1867       case DataType::Type::kBool:
1868       case DataType::Type::kUint8:
1869       case DataType::Type::kInt8:
1870         {
1871           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1872           __ ldarb(Register(dst), base);
1873           if (needs_null_check) {
1874             MaybeRecordImplicitNullCheck(instruction);
1875           }
1876         }
1877         if (type == DataType::Type::kInt8) {
1878           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1879         }
1880         break;
1881       case DataType::Type::kUint16:
1882       case DataType::Type::kInt16:
1883         {
1884           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1885           __ ldarh(Register(dst), base);
1886           if (needs_null_check) {
1887             MaybeRecordImplicitNullCheck(instruction);
1888           }
1889         }
1890         if (type == DataType::Type::kInt16) {
1891           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1892         }
1893         break;
1894       case DataType::Type::kInt32:
1895       case DataType::Type::kReference:
1896       case DataType::Type::kInt64:
1897         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1898         {
1899           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1900           __ ldar(Register(dst), base);
1901           if (needs_null_check) {
1902             MaybeRecordImplicitNullCheck(instruction);
1903           }
1904         }
1905         break;
1906       case DataType::Type::kFloat32:
1907       case DataType::Type::kFloat64: {
1908         DCHECK(dst.IsFPRegister());
1909         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1910 
1911         Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1912         {
1913           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1914           __ ldar(temp, base);
1915           if (needs_null_check) {
1916             MaybeRecordImplicitNullCheck(instruction);
1917           }
1918         }
1919         __ Fmov(VRegister(dst), temp);
1920         break;
1921       }
1922       case DataType::Type::kUint32:
1923       case DataType::Type::kUint64:
1924       case DataType::Type::kVoid:
1925         LOG(FATAL) << "Unreachable type " << type;
1926     }
1927   }
1928 }
1929 
Store(DataType::Type type,CPURegister src,const MemOperand & dst)1930 void CodeGeneratorARM64::Store(DataType::Type type,
1931                                CPURegister src,
1932                                const MemOperand& dst) {
1933   switch (type) {
1934     case DataType::Type::kBool:
1935     case DataType::Type::kUint8:
1936     case DataType::Type::kInt8:
1937       __ Strb(Register(src), dst);
1938       break;
1939     case DataType::Type::kUint16:
1940     case DataType::Type::kInt16:
1941       __ Strh(Register(src), dst);
1942       break;
1943     case DataType::Type::kInt32:
1944     case DataType::Type::kReference:
1945     case DataType::Type::kInt64:
1946     case DataType::Type::kFloat32:
1947     case DataType::Type::kFloat64:
1948       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1949       __ Str(src, dst);
1950       break;
1951     case DataType::Type::kUint32:
1952     case DataType::Type::kUint64:
1953     case DataType::Type::kVoid:
1954       LOG(FATAL) << "Unreachable type " << type;
1955   }
1956 }
1957 
StoreRelease(HInstruction * instruction,DataType::Type type,CPURegister src,const MemOperand & dst,bool needs_null_check)1958 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
1959                                       DataType::Type type,
1960                                       CPURegister src,
1961                                       const MemOperand& dst,
1962                                       bool needs_null_check) {
1963   MacroAssembler* masm = GetVIXLAssembler();
1964   UseScratchRegisterScope temps(GetVIXLAssembler());
1965   Register temp_base = temps.AcquireX();
1966 
1967   DCHECK(!dst.IsPreIndex());
1968   DCHECK(!dst.IsPostIndex());
1969 
1970   // TODO(vixl): Let the MacroAssembler handle this.
1971   Operand op = OperandFromMemOperand(dst);
1972   __ Add(temp_base, dst.GetBaseRegister(), op);
1973   MemOperand base = MemOperand(temp_base);
1974   // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
1975   switch (type) {
1976     case DataType::Type::kBool:
1977     case DataType::Type::kUint8:
1978     case DataType::Type::kInt8:
1979       {
1980         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1981         __ stlrb(Register(src), base);
1982         if (needs_null_check) {
1983           MaybeRecordImplicitNullCheck(instruction);
1984         }
1985       }
1986       break;
1987     case DataType::Type::kUint16:
1988     case DataType::Type::kInt16:
1989       {
1990         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1991         __ stlrh(Register(src), base);
1992         if (needs_null_check) {
1993           MaybeRecordImplicitNullCheck(instruction);
1994         }
1995       }
1996       break;
1997     case DataType::Type::kInt32:
1998     case DataType::Type::kReference:
1999     case DataType::Type::kInt64:
2000       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
2001       {
2002         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2003         __ stlr(Register(src), base);
2004         if (needs_null_check) {
2005           MaybeRecordImplicitNullCheck(instruction);
2006         }
2007       }
2008       break;
2009     case DataType::Type::kFloat32:
2010     case DataType::Type::kFloat64: {
2011       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
2012       Register temp_src;
2013       if (src.IsZero()) {
2014         // The zero register is used to avoid synthesizing zero constants.
2015         temp_src = Register(src);
2016       } else {
2017         DCHECK(src.IsFPRegister());
2018         temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2019         __ Fmov(temp_src, VRegister(src));
2020       }
2021       {
2022         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2023         __ stlr(temp_src, base);
2024         if (needs_null_check) {
2025           MaybeRecordImplicitNullCheck(instruction);
2026         }
2027       }
2028       break;
2029     }
2030     case DataType::Type::kUint32:
2031     case DataType::Type::kUint64:
2032     case DataType::Type::kVoid:
2033       LOG(FATAL) << "Unreachable type " << type;
2034   }
2035 }
2036 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2037 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
2038                                        HInstruction* instruction,
2039                                        uint32_t dex_pc,
2040                                        SlowPathCode* slow_path) {
2041   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2042 
2043   ThreadOffset64 entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entrypoint);
2044   // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
2045   // entire oat file. This adds an extra branch and we do not want to slow down the main path.
2046   // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
2047   if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
2048     __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
2049     // Ensure the pc position is recorded immediately after the `blr` instruction.
2050     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
2051     __ blr(lr);
2052     if (EntrypointRequiresStackMap(entrypoint)) {
2053       RecordPcInfo(instruction, dex_pc, slow_path);
2054     }
2055   } else {
2056     // Ensure the pc position is recorded immediately after the `bl` instruction.
2057     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
2058     EmitEntrypointThunkCall(entrypoint_offset);
2059     if (EntrypointRequiresStackMap(entrypoint)) {
2060       RecordPcInfo(instruction, dex_pc, slow_path);
2061     }
2062   }
2063 }
2064 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2065 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2066                                                              HInstruction* instruction,
2067                                                              SlowPathCode* slow_path) {
2068   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2069   __ Ldr(lr, MemOperand(tr, entry_point_offset));
2070   __ Blr(lr);
2071 }
2072 
GenerateClassInitializationCheck(SlowPathCodeARM64 * slow_path,Register class_reg)2073 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
2074                                                                      Register class_reg) {
2075   UseScratchRegisterScope temps(GetVIXLAssembler());
2076   Register temp = temps.AcquireW();
2077 
2078   // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize
2079   // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code
2080   // size, load only the high byte of the field and compare with 0xf0.
2081   // Note: The same code size could be achieved with LDR+MNV(asr #24)+CBNZ but benchmarks
2082   // show that this pattern is slower (tested on little cores).
2083   __ Ldrb(temp, HeapOperand(class_reg, kClassStatusByteOffset));
2084   __ Cmp(temp, kShiftedVisiblyInitializedValue);
2085   __ B(lo, slow_path->GetEntryLabel());
2086   __ Bind(slow_path->GetExitLabel());
2087 }
2088 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl::aarch64::Register temp)2089 void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
2090     HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
2091   uint32_t path_to_root = check->GetBitstringPathToRoot();
2092   uint32_t mask = check->GetBitstringMask();
2093   DCHECK(IsPowerOfTwo(mask + 1));
2094   size_t mask_bits = WhichPowerOf2(mask + 1);
2095 
2096   if (mask_bits == 16u) {
2097     // Load only the bitstring part of the status word.
2098     __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
2099   } else {
2100     // /* uint32_t */ temp = temp->status_
2101     __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
2102     // Extract the bitstring bits.
2103     __ Ubfx(temp, temp, 0, mask_bits);
2104   }
2105   // Compare the bitstring bits to `path_to_root`.
2106   __ Cmp(temp, path_to_root);
2107 }
2108 
GenerateMemoryBarrier(MemBarrierKind kind)2109 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
2110   BarrierType type = BarrierAll;
2111 
2112   switch (kind) {
2113     case MemBarrierKind::kAnyAny:
2114     case MemBarrierKind::kAnyStore: {
2115       type = BarrierAll;
2116       break;
2117     }
2118     case MemBarrierKind::kLoadAny: {
2119       type = BarrierReads;
2120       break;
2121     }
2122     case MemBarrierKind::kStoreStore: {
2123       type = BarrierWrites;
2124       break;
2125     }
2126     default:
2127       LOG(FATAL) << "Unexpected memory barrier " << kind;
2128   }
2129   __ Dmb(InnerShareable, type);
2130 }
2131 
CanUseImplicitSuspendCheck() const2132 bool CodeGeneratorARM64::CanUseImplicitSuspendCheck() const {
2133   // Use implicit suspend checks if requested in compiler options unless there are SIMD
2134   // instructions in the graph. The implicit suspend check saves all FP registers as
2135   // 64-bit (in line with the calling convention) but SIMD instructions can use 128-bit
2136   // registers, so they need to be saved in an explicit slow path.
2137   return GetCompilerOptions().GetImplicitSuspendChecks() && !GetGraph()->HasSIMD();
2138 }
2139 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)2140 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
2141                                                          HBasicBlock* successor) {
2142   if (instruction->IsNoOp()) {
2143     if (successor != nullptr) {
2144       __ B(codegen_->GetLabelOf(successor));
2145     }
2146     return;
2147   }
2148 
2149   if (codegen_->CanUseImplicitSuspendCheck()) {
2150     __ Ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister));
2151     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
2152     if (successor != nullptr) {
2153       __ B(codegen_->GetLabelOf(successor));
2154     }
2155     return;
2156   }
2157 
2158   SuspendCheckSlowPathARM64* slow_path =
2159       down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
2160   if (slow_path == nullptr) {
2161     slow_path =
2162         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor);
2163     instruction->SetSlowPath(slow_path);
2164     codegen_->AddSlowPath(slow_path);
2165     if (successor != nullptr) {
2166       DCHECK(successor->IsLoopHeader());
2167     }
2168   } else {
2169     DCHECK_EQ(slow_path->GetSuccessor(), successor);
2170   }
2171 
2172   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
2173   Register temp = temps.AcquireW();
2174 
2175   __ Ldr(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
2176   __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
2177   if (successor == nullptr) {
2178     __ B(ne, slow_path->GetEntryLabel());
2179     __ Bind(slow_path->GetReturnLabel());
2180   } else {
2181     __ B(eq, codegen_->GetLabelOf(successor));
2182     __ B(slow_path->GetEntryLabel());
2183     // slow_path will return to GetLabelOf(successor).
2184   }
2185 }
2186 
InstructionCodeGeneratorARM64(HGraph * graph,CodeGeneratorARM64 * codegen)2187 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
2188                                                              CodeGeneratorARM64* codegen)
2189       : InstructionCodeGenerator(graph, codegen),
2190         assembler_(codegen->GetAssembler()),
2191         codegen_(codegen) {}
2192 
HandleBinaryOp(HBinaryOperation * instr)2193 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
2194   DCHECK_EQ(instr->InputCount(), 2U);
2195   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2196   DataType::Type type = instr->GetResultType();
2197   switch (type) {
2198     case DataType::Type::kInt32:
2199     case DataType::Type::kInt64:
2200       locations->SetInAt(0, Location::RequiresRegister());
2201       locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
2202       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2203       break;
2204 
2205     case DataType::Type::kFloat32:
2206     case DataType::Type::kFloat64:
2207       locations->SetInAt(0, Location::RequiresFpuRegister());
2208       locations->SetInAt(1, Location::RequiresFpuRegister());
2209       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2210       break;
2211 
2212     default:
2213       LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
2214   }
2215 }
2216 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2217 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
2218                                            const FieldInfo& field_info) {
2219   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2220 
2221   bool object_field_get_with_read_barrier =
2222       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
2223   LocationSummary* locations =
2224       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2225                                                        object_field_get_with_read_barrier
2226                                                            ? LocationSummary::kCallOnSlowPath
2227                                                            : LocationSummary::kNoCall);
2228   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
2229     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2230     // We need a temporary register for the read barrier load in
2231     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2232     // only if the field is volatile or the offset is too big.
2233     if (field_info.IsVolatile() ||
2234         field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
2235       locations->AddTemp(FixedTempLocation());
2236     }
2237   }
2238   // Input for object receiver.
2239   locations->SetInAt(0, Location::RequiresRegister());
2240   if (DataType::IsFloatingPointType(instruction->GetType())) {
2241     locations->SetOut(Location::RequiresFpuRegister());
2242   } else {
2243     // The output overlaps for an object field get when read barriers
2244     // are enabled: we do not want the load to overwrite the object's
2245     // location, as we need it to emit the read barrier.
2246     locations->SetOut(
2247         Location::RequiresRegister(),
2248         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2249   }
2250 }
2251 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2252 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
2253                                                    const FieldInfo& field_info) {
2254   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2255   LocationSummary* locations = instruction->GetLocations();
2256   uint32_t receiver_input = 0;
2257   Location base_loc = locations->InAt(receiver_input);
2258   Location out = locations->Out();
2259   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
2260   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
2261   DataType::Type load_type = instruction->GetType();
2262   MemOperand field =
2263       HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset());
2264 
2265   if (load_type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) {
2266     // Object FieldGet with Baker's read barrier case.
2267     // /* HeapReference<Object> */ out = *(base + offset)
2268     Register base = RegisterFrom(base_loc, DataType::Type::kReference);
2269     Location maybe_temp =
2270         (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2271     // Note that potential implicit null checks are handled in this
2272     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
2273     codegen_->GenerateFieldLoadWithBakerReadBarrier(
2274         instruction,
2275         out,
2276         base,
2277         offset,
2278         maybe_temp,
2279         /* needs_null_check= */ true,
2280         field_info.IsVolatile());
2281   } else {
2282     // General case.
2283     if (field_info.IsVolatile()) {
2284       // Note that a potential implicit null check is handled in this
2285       // CodeGeneratorARM64::LoadAcquire call.
2286       // NB: LoadAcquire will record the pc info if needed.
2287       codegen_->LoadAcquire(instruction,
2288                             load_type,
2289                             OutputCPURegister(instruction),
2290                             field,
2291                             /* needs_null_check= */ true);
2292     } else {
2293       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2294       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2295       codegen_->Load(load_type, OutputCPURegister(instruction), field);
2296       codegen_->MaybeRecordImplicitNullCheck(instruction);
2297     }
2298     if (load_type == DataType::Type::kReference) {
2299       // If read barriers are enabled, emit read barriers other than
2300       // Baker's using a slow path (and also unpoison the loaded
2301       // reference, if heap poisoning is enabled).
2302       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2303     }
2304   }
2305 }
2306 
HandleFieldSet(HInstruction * instruction)2307 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2308   LocationSummary* locations =
2309       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2310   locations->SetInAt(0, Location::RequiresRegister());
2311   HInstruction* value = instruction->InputAt(1);
2312   if (IsZeroBitPattern(value)) {
2313     locations->SetInAt(1, Location::ConstantLocation(value));
2314   } else if (DataType::IsFloatingPointType(value->GetType())) {
2315     locations->SetInAt(1, Location::RequiresFpuRegister());
2316   } else {
2317     locations->SetInAt(1, Location::RequiresRegister());
2318   }
2319 }
2320 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)2321 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2322                                                    const FieldInfo& field_info,
2323                                                    bool value_can_be_null,
2324                                                    WriteBarrierKind write_barrier_kind) {
2325   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2326 
2327   Register obj = InputRegisterAt(instruction, 0);
2328   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2329   CPURegister source = value;
2330   Offset offset = field_info.GetFieldOffset();
2331   DataType::Type field_type = field_info.GetFieldType();
2332   {
2333     // We use a block to end the scratch scope before the write barrier, thus
2334     // freeing the temporary registers so they can be used in `MarkGCCard`.
2335     UseScratchRegisterScope temps(GetVIXLAssembler());
2336 
2337     if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
2338       DCHECK(value.IsW());
2339       Register temp = temps.AcquireW();
2340       __ Mov(temp, value.W());
2341       GetAssembler()->PoisonHeapReference(temp.W());
2342       source = temp;
2343     }
2344 
2345     if (field_info.IsVolatile()) {
2346       codegen_->StoreRelease(
2347           instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true);
2348     } else {
2349       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2350       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2351       codegen_->Store(field_type, source, HeapOperand(obj, offset));
2352       codegen_->MaybeRecordImplicitNullCheck(instruction);
2353     }
2354   }
2355 
2356   const bool needs_write_barrier =
2357       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
2358 
2359   if (needs_write_barrier) {
2360     DCHECK_IMPLIES(Register(value).IsZero(),
2361                    write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn);
2362     codegen_->MaybeMarkGCCard(
2363         obj,
2364         Register(value),
2365         value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
2366   } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
2367     codegen_->CheckGCCardIsValid(obj);
2368   }
2369 }
2370 
HandleBinaryOp(HBinaryOperation * instr)2371 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2372   DataType::Type type = instr->GetType();
2373 
2374   switch (type) {
2375     case DataType::Type::kInt32:
2376     case DataType::Type::kInt64: {
2377       Register dst = OutputRegister(instr);
2378       Register lhs = InputRegisterAt(instr, 0);
2379       Operand rhs = InputOperandAt(instr, 1);
2380       if (instr->IsAdd()) {
2381         __ Add(dst, lhs, rhs);
2382       } else if (instr->IsAnd()) {
2383         __ And(dst, lhs, rhs);
2384       } else if (instr->IsOr()) {
2385         __ Orr(dst, lhs, rhs);
2386       } else if (instr->IsSub()) {
2387         __ Sub(dst, lhs, rhs);
2388       } else if (instr->IsRor()) {
2389         if (rhs.IsImmediate()) {
2390           uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2391           __ Ror(dst, lhs, shift);
2392         } else {
2393           // Ensure shift distance is in the same size register as the result. If
2394           // we are rotating a long and the shift comes in a w register originally,
2395           // we don't need to sxtw for use as an x since the shift distances are
2396           // all & reg_bits - 1.
2397           __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2398         }
2399       } else if (instr->IsMin() || instr->IsMax()) {
2400           __ Cmp(lhs, rhs);
2401           __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt);
2402       } else {
2403         DCHECK(instr->IsXor());
2404         __ Eor(dst, lhs, rhs);
2405       }
2406       break;
2407     }
2408     case DataType::Type::kFloat32:
2409     case DataType::Type::kFloat64: {
2410       VRegister dst = OutputFPRegister(instr);
2411       VRegister lhs = InputFPRegisterAt(instr, 0);
2412       VRegister rhs = InputFPRegisterAt(instr, 1);
2413       if (instr->IsAdd()) {
2414         __ Fadd(dst, lhs, rhs);
2415       } else if (instr->IsSub()) {
2416         __ Fsub(dst, lhs, rhs);
2417       } else if (instr->IsMin()) {
2418         __ Fmin(dst, lhs, rhs);
2419       } else if (instr->IsMax()) {
2420         __ Fmax(dst, lhs, rhs);
2421       } else {
2422         LOG(FATAL) << "Unexpected floating-point binary operation";
2423       }
2424       break;
2425     }
2426     default:
2427       LOG(FATAL) << "Unexpected binary operation type " << type;
2428   }
2429 }
2430 
HandleShift(HBinaryOperation * instr)2431 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2432   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2433 
2434   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2435   DataType::Type type = instr->GetResultType();
2436   switch (type) {
2437     case DataType::Type::kInt32:
2438     case DataType::Type::kInt64: {
2439       locations->SetInAt(0, Location::RequiresRegister());
2440       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2441       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2442       break;
2443     }
2444     default:
2445       LOG(FATAL) << "Unexpected shift type " << type;
2446   }
2447 }
2448 
HandleShift(HBinaryOperation * instr)2449 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2450   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2451 
2452   DataType::Type type = instr->GetType();
2453   switch (type) {
2454     case DataType::Type::kInt32:
2455     case DataType::Type::kInt64: {
2456       Register dst = OutputRegister(instr);
2457       Register lhs = InputRegisterAt(instr, 0);
2458       Operand rhs = InputOperandAt(instr, 1);
2459       if (rhs.IsImmediate()) {
2460         uint32_t shift_value = rhs.GetImmediate() &
2461             (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2462         if (instr->IsShl()) {
2463           __ Lsl(dst, lhs, shift_value);
2464         } else if (instr->IsShr()) {
2465           __ Asr(dst, lhs, shift_value);
2466         } else {
2467           __ Lsr(dst, lhs, shift_value);
2468         }
2469       } else {
2470         Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2471 
2472         if (instr->IsShl()) {
2473           __ Lsl(dst, lhs, rhs_reg);
2474         } else if (instr->IsShr()) {
2475           __ Asr(dst, lhs, rhs_reg);
2476         } else {
2477           __ Lsr(dst, lhs, rhs_reg);
2478         }
2479       }
2480       break;
2481     }
2482     default:
2483       LOG(FATAL) << "Unexpected shift operation type " << type;
2484   }
2485 }
2486 
VisitAdd(HAdd * instruction)2487 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2488   HandleBinaryOp(instruction);
2489 }
2490 
VisitAdd(HAdd * instruction)2491 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2492   HandleBinaryOp(instruction);
2493 }
2494 
VisitAnd(HAnd * instruction)2495 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2496   HandleBinaryOp(instruction);
2497 }
2498 
VisitAnd(HAnd * instruction)2499 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2500   HandleBinaryOp(instruction);
2501 }
2502 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2503 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2504   DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType();
2505   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2506   locations->SetInAt(0, Location::RequiresRegister());
2507   // There is no immediate variant of negated bitwise instructions in AArch64.
2508   locations->SetInAt(1, Location::RequiresRegister());
2509   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2510 }
2511 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2512 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2513   Register dst = OutputRegister(instr);
2514   Register lhs = InputRegisterAt(instr, 0);
2515   Register rhs = InputRegisterAt(instr, 1);
2516 
2517   switch (instr->GetOpKind()) {
2518     case HInstruction::kAnd:
2519       __ Bic(dst, lhs, rhs);
2520       break;
2521     case HInstruction::kOr:
2522       __ Orn(dst, lhs, rhs);
2523       break;
2524     case HInstruction::kXor:
2525       __ Eon(dst, lhs, rhs);
2526       break;
2527     default:
2528       LOG(FATAL) << "Unreachable";
2529   }
2530 }
2531 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2532 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2533     HDataProcWithShifterOp* instruction) {
2534   DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
2535          instruction->GetType() == DataType::Type::kInt64);
2536   LocationSummary* locations =
2537       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2538   if (instruction->GetInstrKind() == HInstruction::kNeg) {
2539     locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)));
2540   } else {
2541     locations->SetInAt(0, Location::RequiresRegister());
2542   }
2543   locations->SetInAt(1, Location::RequiresRegister());
2544   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2545 }
2546 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2547 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2548     HDataProcWithShifterOp* instruction) {
2549   DataType::Type type = instruction->GetType();
2550   HInstruction::InstructionKind kind = instruction->GetInstrKind();
2551   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
2552   Register out = OutputRegister(instruction);
2553   Register left;
2554   if (kind != HInstruction::kNeg) {
2555     left = InputRegisterAt(instruction, 0);
2556   }
2557   // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2558   // shifter operand operation, the IR generating `right_reg` (input to the type
2559   // conversion) can have a different type from the current instruction's type,
2560   // so we manually indicate the type.
2561   Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2562   Operand right_operand(0);
2563 
2564   HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2565   if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2566     right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2567   } else {
2568     right_operand = Operand(right_reg,
2569                             helpers::ShiftFromOpKind(op_kind),
2570                             instruction->GetShiftAmount());
2571   }
2572 
2573   // Logical binary operations do not support extension operations in the
2574   // operand. Note that VIXL would still manage if it was passed by generating
2575   // the extension as a separate instruction.
2576   // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2577   DCHECK_IMPLIES(right_operand.IsExtendedRegister(),
2578                  kind != HInstruction::kAnd && kind != HInstruction::kOr &&
2579                      kind != HInstruction::kXor && kind != HInstruction::kNeg);
2580   switch (kind) {
2581     case HInstruction::kAdd:
2582       __ Add(out, left, right_operand);
2583       break;
2584     case HInstruction::kAnd:
2585       __ And(out, left, right_operand);
2586       break;
2587     case HInstruction::kNeg:
2588       DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2589       __ Neg(out, right_operand);
2590       break;
2591     case HInstruction::kOr:
2592       __ Orr(out, left, right_operand);
2593       break;
2594     case HInstruction::kSub:
2595       __ Sub(out, left, right_operand);
2596       break;
2597     case HInstruction::kXor:
2598       __ Eor(out, left, right_operand);
2599       break;
2600     default:
2601       LOG(FATAL) << "Unexpected operation kind: " << kind;
2602       UNREACHABLE();
2603   }
2604 }
2605 
VisitIntermediateAddress(HIntermediateAddress * instruction)2606 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2607   LocationSummary* locations =
2608       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2609   locations->SetInAt(0, Location::RequiresRegister());
2610   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2611   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2612 }
2613 
VisitIntermediateAddress(HIntermediateAddress * instruction)2614 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2615   __ Add(OutputRegister(instruction),
2616          InputRegisterAt(instruction, 0),
2617          Operand(InputOperandAt(instruction, 1)));
2618 }
2619 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2620 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
2621   LocationSummary* locations =
2622       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2623 
2624   HIntConstant* shift = instruction->GetShift()->AsIntConstant();
2625 
2626   locations->SetInAt(0, Location::RequiresRegister());
2627   // For byte case we don't need to shift the index variable so we can encode the data offset into
2628   // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
2629   // data offset constant generation out of the loop and reduce the critical path length in the
2630   // loop.
2631   locations->SetInAt(1, shift->GetValue() == 0
2632                         ? Location::ConstantLocation(instruction->GetOffset())
2633                         : Location::RequiresRegister());
2634   locations->SetInAt(2, Location::ConstantLocation(shift));
2635   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2636 }
2637 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2638 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
2639     HIntermediateAddressIndex* instruction) {
2640   Register index_reg = InputRegisterAt(instruction, 0);
2641   uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2));
2642   uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
2643 
2644   if (shift == 0) {
2645     __ Add(OutputRegister(instruction), index_reg, offset);
2646   } else {
2647     Register offset_reg = InputRegisterAt(instruction, 1);
2648     __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
2649   }
2650 }
2651 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2652 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2653   LocationSummary* locations =
2654       new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
2655   HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2656   if (instr->GetOpKind() == HInstruction::kSub &&
2657       accumulator->IsConstant() &&
2658       accumulator->AsConstant()->IsArithmeticZero()) {
2659     // Don't allocate register for Mneg instruction.
2660   } else {
2661     locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2662                        Location::RequiresRegister());
2663   }
2664   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2665   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2666   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2667 }
2668 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2669 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2670   Register res = OutputRegister(instr);
2671   Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2672   Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2673 
2674   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2675   // This fixup should be carried out for all multiply-accumulate instructions:
2676   // madd, msub, smaddl, smsubl, umaddl and umsubl.
2677   if (instr->GetType() == DataType::Type::kInt64 &&
2678       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2679     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2680     ptrdiff_t off = masm->GetCursorOffset();
2681     if (off >= static_cast<ptrdiff_t>(kInstructionSize) &&
2682         masm->GetInstructionAt(off - static_cast<ptrdiff_t>(kInstructionSize))->IsLoadOrStore()) {
2683       // Make sure we emit only exactly one nop.
2684       ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2685       __ nop();
2686     }
2687   }
2688 
2689   if (instr->GetOpKind() == HInstruction::kAdd) {
2690     Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2691     __ Madd(res, mul_left, mul_right, accumulator);
2692   } else {
2693     DCHECK(instr->GetOpKind() == HInstruction::kSub);
2694     HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2695     if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2696       __ Mneg(res, mul_left, mul_right);
2697     } else {
2698       Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2699       __ Msub(res, mul_left, mul_right, accumulator);
2700     }
2701   }
2702 }
2703 
VisitArrayGet(HArrayGet * instruction)2704 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2705   bool object_array_get_with_read_barrier =
2706       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
2707   LocationSummary* locations =
2708       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2709                                                        object_array_get_with_read_barrier
2710                                                            ? LocationSummary::kCallOnSlowPath
2711                                                            : LocationSummary::kNoCall);
2712   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2713     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2714     if (instruction->GetIndex()->IsConstant()) {
2715       // Array loads with constant index are treated as field loads.
2716       // We need a temporary register for the read barrier load in
2717       // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2718       // only if the offset is too big.
2719       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2720       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2721       offset += index << DataType::SizeShift(DataType::Type::kReference);
2722       if (offset >= kReferenceLoadMinFarOffset) {
2723         locations->AddTemp(FixedTempLocation());
2724       }
2725     } else if (!instruction->GetArray()->IsIntermediateAddress()) {
2726       // We need a non-scratch temporary for the array data pointer in
2727       // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no
2728       // intermediate address.
2729       locations->AddTemp(Location::RequiresRegister());
2730     }
2731   }
2732   locations->SetInAt(0, Location::RequiresRegister());
2733   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2734   if (DataType::IsFloatingPointType(instruction->GetType())) {
2735     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2736   } else {
2737     // The output overlaps in the case of an object array get with
2738     // read barriers enabled: we do not want the move to overwrite the
2739     // array's location, as we need it to emit the read barrier.
2740     locations->SetOut(
2741         Location::RequiresRegister(),
2742         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2743   }
2744 }
2745 
VisitArrayGet(HArrayGet * instruction)2746 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2747   DataType::Type type = instruction->GetType();
2748   Register obj = InputRegisterAt(instruction, 0);
2749   LocationSummary* locations = instruction->GetLocations();
2750   Location index = locations->InAt(1);
2751   Location out = locations->Out();
2752   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2753   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2754                                         instruction->IsStringCharAt();
2755   MacroAssembler* masm = GetVIXLAssembler();
2756   UseScratchRegisterScope temps(masm);
2757 
2758   // The non-Baker read barrier instrumentation of object ArrayGet instructions
2759   // does not support the HIntermediateAddress instruction.
2760   DCHECK(!((type == DataType::Type::kReference) &&
2761            instruction->GetArray()->IsIntermediateAddress() &&
2762            codegen_->EmitNonBakerReadBarrier()));
2763 
2764   if (type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) {
2765     // Object ArrayGet with Baker's read barrier case.
2766     // Note that a potential implicit null check is handled in the
2767     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2768     DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
2769     if (index.IsConstant()) {
2770       DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2771       // Array load with a constant index can be treated as a field load.
2772       offset += Int64FromLocation(index) << DataType::SizeShift(type);
2773       Location maybe_temp =
2774           (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2775       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2776                                                       out,
2777                                                       obj.W(),
2778                                                       offset,
2779                                                       maybe_temp,
2780                                                       /* needs_null_check= */ false,
2781                                                       /* use_load_acquire= */ false);
2782     } else {
2783       codegen_->GenerateArrayLoadWithBakerReadBarrier(
2784           instruction, out, obj.W(), offset, index, /* needs_null_check= */ false);
2785     }
2786   } else {
2787     // General case.
2788     MemOperand source = HeapOperand(obj);
2789     Register length;
2790     if (maybe_compressed_char_at) {
2791       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2792       length = temps.AcquireW();
2793       {
2794         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2795         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2796 
2797         if (instruction->GetArray()->IsIntermediateAddress()) {
2798           DCHECK_LT(count_offset, offset);
2799           int64_t adjusted_offset =
2800               static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2801           // Note that `adjusted_offset` is negative, so this will be a LDUR.
2802           __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2803         } else {
2804           __ Ldr(length, HeapOperand(obj, count_offset));
2805         }
2806         codegen_->MaybeRecordImplicitNullCheck(instruction);
2807       }
2808     }
2809     if (index.IsConstant()) {
2810       if (maybe_compressed_char_at) {
2811         vixl::aarch64::Label uncompressed_load, done;
2812         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2813                       "Expecting 0=compressed, 1=uncompressed");
2814         __ Tbnz(length.W(), 0, &uncompressed_load);
2815         __ Ldrb(Register(OutputCPURegister(instruction)),
2816                 HeapOperand(obj, offset + Int64FromLocation(index)));
2817         __ B(&done);
2818         __ Bind(&uncompressed_load);
2819         __ Ldrh(Register(OutputCPURegister(instruction)),
2820                 HeapOperand(obj, offset + (Int64FromLocation(index) << 1)));
2821         __ Bind(&done);
2822       } else {
2823         offset += Int64FromLocation(index) << DataType::SizeShift(type);
2824         source = HeapOperand(obj, offset);
2825       }
2826     } else {
2827       Register temp = temps.AcquireSameSizeAs(obj);
2828       if (instruction->GetArray()->IsIntermediateAddress()) {
2829         // We do not need to compute the intermediate address from the array: the
2830         // input instruction has done it already. See the comment in
2831         // `TryExtractArrayAccessAddress()`.
2832         if (kIsDebugBuild) {
2833           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2834           DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2835         }
2836         temp = obj;
2837       } else {
2838         __ Add(temp, obj, offset);
2839       }
2840       if (maybe_compressed_char_at) {
2841         vixl::aarch64::Label uncompressed_load, done;
2842         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2843                       "Expecting 0=compressed, 1=uncompressed");
2844         __ Tbnz(length.W(), 0, &uncompressed_load);
2845         __ Ldrb(Register(OutputCPURegister(instruction)),
2846                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2847         __ B(&done);
2848         __ Bind(&uncompressed_load);
2849         __ Ldrh(Register(OutputCPURegister(instruction)),
2850                 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2851         __ Bind(&done);
2852       } else {
2853         source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type));
2854       }
2855     }
2856     if (!maybe_compressed_char_at) {
2857       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2858       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2859       codegen_->Load(type, OutputCPURegister(instruction), source);
2860       codegen_->MaybeRecordImplicitNullCheck(instruction);
2861     }
2862 
2863     if (type == DataType::Type::kReference) {
2864       static_assert(
2865           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2866           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2867       Location obj_loc = locations->InAt(0);
2868       if (index.IsConstant()) {
2869         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2870       } else {
2871         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2872       }
2873     }
2874   }
2875 }
2876 
VisitArrayLength(HArrayLength * instruction)2877 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2878   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2879   locations->SetInAt(0, Location::RequiresRegister());
2880   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2881 }
2882 
VisitArrayLength(HArrayLength * instruction)2883 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2884   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2885   vixl::aarch64::Register out = OutputRegister(instruction);
2886   {
2887     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2888     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2889     __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2890     codegen_->MaybeRecordImplicitNullCheck(instruction);
2891   }
2892   // Mask out compression flag from String's array length.
2893   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2894     __ Lsr(out.W(), out.W(), 1u);
2895   }
2896 }
2897 
VisitArraySet(HArraySet * instruction)2898 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2899   DataType::Type value_type = instruction->GetComponentType();
2900 
2901   bool needs_type_check = instruction->NeedsTypeCheck();
2902   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
2903       instruction,
2904       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
2905   locations->SetInAt(0, Location::RequiresRegister());
2906   locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetIndex()));
2907   HInstruction* value = instruction->GetValue();
2908   if (IsZeroBitPattern(value)) {
2909     locations->SetInAt(2, Location::ConstantLocation(value));
2910   } else if (DataType::IsFloatingPointType(value_type)) {
2911     locations->SetInAt(2, Location::RequiresFpuRegister());
2912   } else {
2913     locations->SetInAt(2, Location::RequiresRegister());
2914   }
2915 }
2916 
VisitArraySet(HArraySet * instruction)2917 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2918   DataType::Type value_type = instruction->GetComponentType();
2919   LocationSummary* locations = instruction->GetLocations();
2920   bool needs_type_check = instruction->NeedsTypeCheck();
2921   const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
2922   bool needs_write_barrier =
2923       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
2924 
2925   Register array = InputRegisterAt(instruction, 0);
2926   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2927   CPURegister source = value;
2928   Location index = locations->InAt(1);
2929   size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
2930   MemOperand destination = HeapOperand(array);
2931   MacroAssembler* masm = GetVIXLAssembler();
2932 
2933   if (!needs_write_barrier) {
2934     if (codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind)) {
2935       codegen_->CheckGCCardIsValid(array);
2936     }
2937 
2938     DCHECK(!needs_type_check);
2939     UseScratchRegisterScope temps(masm);
2940     if (index.IsConstant()) {
2941       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2942       destination = HeapOperand(array, offset);
2943     } else {
2944       Register temp_dest = temps.AcquireSameSizeAs(array);
2945       if (instruction->GetArray()->IsIntermediateAddress()) {
2946         // We do not need to compute the intermediate address from the array: the
2947         // input instruction has done it already. See the comment in
2948         // `TryExtractArrayAccessAddress()`.
2949         if (kIsDebugBuild) {
2950           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2951           DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2952         }
2953         temp_dest = array;
2954       } else {
2955         __ Add(temp_dest, array, offset);
2956       }
2957       destination = HeapOperand(temp_dest,
2958                                 XRegisterFrom(index),
2959                                 LSL,
2960                                 DataType::SizeShift(value_type));
2961     }
2962 
2963     if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
2964       DCHECK(value.IsW());
2965       Register temp_src = temps.AcquireW();
2966       __ Mov(temp_src, value.W());
2967       GetAssembler()->PoisonHeapReference(temp_src.W());
2968       source = temp_src;
2969     }
2970 
2971     {
2972       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2973       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2974       codegen_->Store(value_type, source, destination);
2975       codegen_->MaybeRecordImplicitNullCheck(instruction);
2976     }
2977   } else {
2978     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2979     bool can_value_be_null = true;
2980     // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
2981     // value is null (without an extra CompareAndBranchIfZero since we already checked if the
2982     // value is null for the type check).
2983     bool skip_marking_gc_card = false;
2984     SlowPathCodeARM64* slow_path = nullptr;
2985     vixl::aarch64::Label skip_writing_card;
2986     if (!Register(value).IsZero()) {
2987       can_value_be_null = instruction->GetValueCanBeNull();
2988       skip_marking_gc_card =
2989           can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
2990       vixl::aarch64::Label do_store;
2991       if (can_value_be_null) {
2992         if (skip_marking_gc_card) {
2993           __ Cbz(Register(value), &skip_writing_card);
2994         } else {
2995           __ Cbz(Register(value), &do_store);
2996         }
2997       }
2998 
2999       if (needs_type_check) {
3000         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
3001         codegen_->AddSlowPath(slow_path);
3002 
3003         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3004         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3005         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3006 
3007         UseScratchRegisterScope temps(masm);
3008         Register temp = temps.AcquireSameSizeAs(array);
3009         Register temp2 = temps.AcquireSameSizeAs(array);
3010 
3011         // Note that when Baker read barriers are enabled, the type
3012         // checks are performed without read barriers.  This is fine,
3013         // even in the case where a class object is in the from-space
3014         // after the flip, as a comparison involving such a type would
3015         // not produce a false positive; it may of course produce a
3016         // false negative, in which case we would take the ArraySet
3017         // slow path.
3018 
3019         // /* HeapReference<Class> */ temp = array->klass_
3020         {
3021           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
3022           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3023           __ Ldr(temp, HeapOperand(array, class_offset));
3024           codegen_->MaybeRecordImplicitNullCheck(instruction);
3025         }
3026         GetAssembler()->MaybeUnpoisonHeapReference(temp);
3027 
3028         // /* HeapReference<Class> */ temp = temp->component_type_
3029         __ Ldr(temp, HeapOperand(temp, component_offset));
3030         // /* HeapReference<Class> */ temp2 = value->klass_
3031         __ Ldr(temp2, HeapOperand(Register(value), class_offset));
3032         // If heap poisoning is enabled, no need to unpoison `temp`
3033         // nor `temp2`, as we are comparing two poisoned references.
3034         __ Cmp(temp, temp2);
3035 
3036         if (instruction->StaticTypeOfArrayIsObjectArray()) {
3037           vixl::aarch64::Label do_put;
3038           __ B(eq, &do_put);
3039           // If heap poisoning is enabled, the `temp` reference has
3040           // not been unpoisoned yet; unpoison it now.
3041           GetAssembler()->MaybeUnpoisonHeapReference(temp);
3042 
3043           // /* HeapReference<Class> */ temp = temp->super_class_
3044           __ Ldr(temp, HeapOperand(temp, super_offset));
3045           // If heap poisoning is enabled, no need to unpoison
3046           // `temp`, as we are comparing against null below.
3047           __ Cbnz(temp, slow_path->GetEntryLabel());
3048           __ Bind(&do_put);
3049         } else {
3050           __ B(ne, slow_path->GetEntryLabel());
3051         }
3052       }
3053 
3054       if (can_value_be_null && !skip_marking_gc_card) {
3055         DCHECK(do_store.IsLinked());
3056         __ Bind(&do_store);
3057       }
3058     }
3059 
3060     DCHECK_NE(write_barrier_kind, WriteBarrierKind::kDontEmit);
3061     DCHECK_IMPLIES(Register(value).IsZero(),
3062                    write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn);
3063     codegen_->MarkGCCard(array);
3064 
3065     if (skip_marking_gc_card) {
3066       // Note that we don't check that the GC card is valid as it can be correctly clean.
3067       DCHECK(skip_writing_card.IsLinked());
3068       __ Bind(&skip_writing_card);
3069     }
3070 
3071     UseScratchRegisterScope temps(masm);
3072     if (kPoisonHeapReferences) {
3073       DCHECK(value.IsW());
3074       Register temp_source = temps.AcquireW();
3075       __ Mov(temp_source, value.W());
3076       GetAssembler()->PoisonHeapReference(temp_source);
3077       source = temp_source;
3078     }
3079 
3080     if (index.IsConstant()) {
3081       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
3082       destination = HeapOperand(array, offset);
3083     } else {
3084       Register temp_base = temps.AcquireSameSizeAs(array);
3085       __ Add(temp_base, array, offset);
3086       destination = HeapOperand(temp_base,
3087                                 XRegisterFrom(index),
3088                                 LSL,
3089                                 DataType::SizeShift(value_type));
3090     }
3091 
3092     {
3093       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
3094       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3095       __ Str(source, destination);
3096 
3097       if (can_value_be_null || !needs_type_check) {
3098         codegen_->MaybeRecordImplicitNullCheck(instruction);
3099       }
3100     }
3101 
3102     if (slow_path != nullptr) {
3103       __ Bind(slow_path->GetExitLabel());
3104     }
3105   }
3106 }
3107 
VisitBoundsCheck(HBoundsCheck * instruction)3108 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3109   RegisterSet caller_saves = RegisterSet::Empty();
3110   InvokeRuntimeCallingConvention calling_convention;
3111   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3112   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
3113   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
3114 
3115   // If both index and length are constant, we can check the bounds statically and
3116   // generate code accordingly. We want to make sure we generate constant locations
3117   // in that case, regardless of whether they are encodable in the comparison or not.
3118   HInstruction* index = instruction->InputAt(0);
3119   HInstruction* length = instruction->InputAt(1);
3120   bool both_const = index->IsConstant() && length->IsConstant();
3121   locations->SetInAt(0, both_const
3122       ? Location::ConstantLocation(index)
3123       : ARM64EncodableConstantOrRegister(index, instruction));
3124   locations->SetInAt(1, both_const
3125       ? Location::ConstantLocation(length)
3126       : ARM64EncodableConstantOrRegister(length, instruction));
3127 }
3128 
VisitBoundsCheck(HBoundsCheck * instruction)3129 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3130   LocationSummary* locations = instruction->GetLocations();
3131   Location index_loc = locations->InAt(0);
3132   Location length_loc = locations->InAt(1);
3133 
3134   int cmp_first_input = 0;
3135   int cmp_second_input = 1;
3136   Condition cond = hs;
3137 
3138   if (index_loc.IsConstant()) {
3139     int64_t index = Int64FromLocation(index_loc);
3140     if (length_loc.IsConstant()) {
3141       int64_t length = Int64FromLocation(length_loc);
3142       if (index < 0 || index >= length) {
3143         BoundsCheckSlowPathARM64* slow_path =
3144             new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
3145         codegen_->AddSlowPath(slow_path);
3146         __ B(slow_path->GetEntryLabel());
3147       } else {
3148         // BCE will remove the bounds check if we are guaranteed to pass.
3149         // However, some optimization after BCE may have generated this, and we should not
3150         // generate a bounds check if it is a valid range.
3151       }
3152       return;
3153     }
3154     // Only the index is constant: change the order of the operands and commute the condition
3155     // so we can use an immediate constant for the index (only the second input to a cmp
3156     // instruction can be an immediate).
3157     cmp_first_input = 1;
3158     cmp_second_input = 0;
3159     cond = ls;
3160   }
3161   BoundsCheckSlowPathARM64* slow_path =
3162       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
3163   __ Cmp(InputRegisterAt(instruction, cmp_first_input),
3164          InputOperandAt(instruction, cmp_second_input));
3165   codegen_->AddSlowPath(slow_path);
3166   __ B(slow_path->GetEntryLabel(), cond);
3167 }
3168 
VisitClinitCheck(HClinitCheck * check)3169 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
3170   LocationSummary* locations =
3171       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
3172   locations->SetInAt(0, Location::RequiresRegister());
3173   if (check->HasUses()) {
3174     locations->SetOut(Location::SameAsFirstInput());
3175   }
3176   // Rely on the type initialization to save everything we need.
3177   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
3178 }
3179 
VisitClinitCheck(HClinitCheck * check)3180 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
3181   // We assume the class is not null.
3182   SlowPathCodeARM64* slow_path =
3183       new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check);
3184   codegen_->AddSlowPath(slow_path);
3185   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
3186 }
3187 
IsFloatingPointZeroConstant(HInstruction * inst)3188 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
3189   return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
3190       || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
3191 }
3192 
GenerateFcmp(HInstruction * instruction)3193 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
3194   VRegister lhs_reg = InputFPRegisterAt(instruction, 0);
3195   Location rhs_loc = instruction->GetLocations()->InAt(1);
3196   if (rhs_loc.IsConstant()) {
3197     // 0.0 is the only immediate that can be encoded directly in
3198     // an FCMP instruction.
3199     //
3200     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
3201     // specify that in a floating-point comparison, positive zero
3202     // and negative zero are considered equal, so we can use the
3203     // literal 0.0 for both cases here.
3204     //
3205     // Note however that some methods (Float.equal, Float.compare,
3206     // Float.compareTo, Double.equal, Double.compare,
3207     // Double.compareTo, Math.max, Math.min, StrictMath.max,
3208     // StrictMath.min) consider 0.0 to be (strictly) greater than
3209     // -0.0. So if we ever translate calls to these methods into a
3210     // HCompare instruction, we must handle the -0.0 case with
3211     // care here.
3212     DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
3213     __ Fcmp(lhs_reg, 0.0);
3214   } else {
3215     __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
3216   }
3217 }
3218 
VisitCompare(HCompare * compare)3219 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
3220   LocationSummary* locations =
3221       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
3222   DataType::Type in_type = compare->InputAt(0)->GetType();
3223   HInstruction* rhs = compare->InputAt(1);
3224   switch (in_type) {
3225     case DataType::Type::kBool:
3226     case DataType::Type::kUint8:
3227     case DataType::Type::kInt8:
3228     case DataType::Type::kUint16:
3229     case DataType::Type::kInt16:
3230     case DataType::Type::kInt32:
3231     case DataType::Type::kInt64: {
3232       locations->SetInAt(0, Location::RequiresRegister());
3233       locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, compare));
3234       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3235       break;
3236     }
3237     case DataType::Type::kFloat32:
3238     case DataType::Type::kFloat64: {
3239       locations->SetInAt(0, Location::RequiresFpuRegister());
3240       locations->SetInAt(1,
3241                          IsFloatingPointZeroConstant(rhs)
3242                              ? Location::ConstantLocation(rhs)
3243                              : Location::RequiresFpuRegister());
3244       locations->SetOut(Location::RequiresRegister());
3245       break;
3246     }
3247     default:
3248       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
3249   }
3250 }
3251 
VisitCompare(HCompare * compare)3252 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
3253   DataType::Type in_type = compare->InputAt(0)->GetType();
3254 
3255   //  0 if: left == right
3256   //  1 if: left  > right
3257   // -1 if: left  < right
3258   switch (in_type) {
3259     case DataType::Type::kBool:
3260     case DataType::Type::kUint8:
3261     case DataType::Type::kInt8:
3262     case DataType::Type::kUint16:
3263     case DataType::Type::kInt16:
3264     case DataType::Type::kInt32:
3265     case DataType::Type::kInt64: {
3266       Register result = OutputRegister(compare);
3267       Register left = InputRegisterAt(compare, 0);
3268       Operand right = InputOperandAt(compare, 1);
3269       __ Cmp(left, right);
3270       __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
3271       __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
3272       break;
3273     }
3274     case DataType::Type::kFloat32:
3275     case DataType::Type::kFloat64: {
3276       Register result = OutputRegister(compare);
3277       GenerateFcmp(compare);
3278       __ Cset(result, ne);
3279       __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
3280       break;
3281     }
3282     default:
3283       LOG(FATAL) << "Unimplemented compare type " << in_type;
3284   }
3285 }
3286 
HandleCondition(HCondition * instruction)3287 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
3288   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
3289 
3290   HInstruction* rhs = instruction->InputAt(1);
3291   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3292     locations->SetInAt(0, Location::RequiresFpuRegister());
3293     locations->SetInAt(1,
3294                        IsFloatingPointZeroConstant(rhs)
3295                            ? Location::ConstantLocation(rhs)
3296                            : Location::RequiresFpuRegister());
3297   } else {
3298     // Integer cases.
3299     locations->SetInAt(0, Location::RequiresRegister());
3300     locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, instruction));
3301   }
3302 
3303   if (!instruction->IsEmittedAtUseSite()) {
3304     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3305   }
3306 }
3307 
HandleCondition(HCondition * instruction)3308 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
3309   if (instruction->IsEmittedAtUseSite()) {
3310     return;
3311   }
3312 
3313   LocationSummary* locations = instruction->GetLocations();
3314   Register res = RegisterFrom(locations->Out(), instruction->GetType());
3315   IfCondition if_cond = instruction->GetCondition();
3316 
3317   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3318     GenerateFcmp(instruction);
3319     __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
3320   } else {
3321     // Integer cases.
3322     Register lhs = InputRegisterAt(instruction, 0);
3323     Operand rhs = InputOperandAt(instruction, 1);
3324     __ Cmp(lhs, rhs);
3325     __ Cset(res, ARM64Condition(if_cond));
3326   }
3327 }
3328 
3329 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
3330   M(Equal)                                                                               \
3331   M(NotEqual)                                                                            \
3332   M(LessThan)                                                                            \
3333   M(LessThanOrEqual)                                                                     \
3334   M(GreaterThan)                                                                         \
3335   M(GreaterThanOrEqual)                                                                  \
3336   M(Below)                                                                               \
3337   M(BelowOrEqual)                                                                        \
3338   M(Above)                                                                               \
3339   M(AboveOrEqual)
3340 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
3341 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
3342 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)3343 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3344 #undef DEFINE_CONDITION_VISITORS
3345 #undef FOR_EACH_CONDITION_INSTRUCTION
3346 
3347 void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) {
3348   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3349   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3350   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
3351 
3352   Register out = OutputRegister(instruction);
3353   Register dividend = InputRegisterAt(instruction, 0);
3354 
3355   Register final_dividend;
3356   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
3357     // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
3358     // NOTE: The generated code for HDiv correctly works for the INT32_MIN/INT64_MIN dividends:
3359     //   imm == 2
3360     //     add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
3361     //     asr out, out(0x80000001), #1 => out = 0xc0000000
3362     //     This is the same as 'asr out, 0x80000000, #1'
3363     //
3364     //   imm > 2
3365     //     add temp, dividend(0x80000000), imm - 1 => temp = 0b10..01..1, where the number
3366     //         of the rightmost 1s is ctz_imm.
3367     //     cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3368     //     csel out, temp(0b10..01..1), dividend(0x80000000), lt => out = 0b10..01..1
3369     //     asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
3370     //         leftmost 1s is ctz_imm + 1.
3371     //     This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
3372     //
3373     //   imm == INT32_MIN
3374     //     add tmp, dividend(0x80000000), #0x7fffffff => tmp = -1
3375     //     cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3376     //     csel out, temp(-1), dividend(0x80000000), lt => out = -1
3377     //     neg out, out(-1), asr #31 => out = 1
3378     //     This is the same as 'neg out, dividend(0x80000000), asr #31'.
3379     final_dividend = dividend;
3380   } else {
3381     if (abs_imm == 2) {
3382       int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte;
3383       __ Add(out, dividend, Operand(dividend, LSR, bits - 1));
3384     } else {
3385       UseScratchRegisterScope temps(GetVIXLAssembler());
3386       Register temp = temps.AcquireSameSizeAs(out);
3387       __ Add(temp, dividend, abs_imm - 1);
3388       __ Cmp(dividend, 0);
3389       __ Csel(out, temp, dividend, lt);
3390     }
3391     final_dividend = out;
3392   }
3393 
3394   int ctz_imm = CTZ(abs_imm);
3395   if (imm > 0) {
3396     __ Asr(out, final_dividend, ctz_imm);
3397   } else {
3398     __ Neg(out, Operand(final_dividend, ASR, ctz_imm));
3399   }
3400 }
3401 
3402 // Return true if the magic number was modified by subtracting 2^32(Int32 div) or 2^64(Int64 div).
3403 // So dividend needs to be added.
NeedToAddDividend(int64_t magic_number,int64_t divisor)3404 static inline bool NeedToAddDividend(int64_t magic_number, int64_t divisor) {
3405   return divisor > 0 && magic_number < 0;
3406 }
3407 
3408 // Return true if the magic number was modified by adding 2^32(Int32 div) or 2^64(Int64 div).
3409 // So dividend needs to be subtracted.
NeedToSubDividend(int64_t magic_number,int64_t divisor)3410 static inline bool NeedToSubDividend(int64_t magic_number, int64_t divisor) {
3411   return divisor < 0 && magic_number > 0;
3412 }
3413 
3414 // Generate code which increments the value in register 'in' by 1 if the value is negative.
3415 // It is done with 'add out, in, in, lsr #31 or #63'.
3416 // If the value is a result of an operation setting the N flag, CINC MI can be used
3417 // instead of ADD. 'use_cond_inc' controls this.
GenerateIncrementNegativeByOne(Register out,Register in,bool use_cond_inc)3418 void InstructionCodeGeneratorARM64::GenerateIncrementNegativeByOne(
3419     Register out,
3420     Register in,
3421     bool use_cond_inc) {
3422   if (use_cond_inc) {
3423     __ Cinc(out, in, mi);
3424   } else {
3425     __ Add(out, in, Operand(in, LSR, in.GetSizeInBits() - 1));
3426   }
3427 }
3428 
3429 // Helper to generate code producing the result of HRem with a constant divisor.
GenerateResultRemWithAnyConstant(Register out,Register dividend,Register quotient,int64_t divisor,UseScratchRegisterScope * temps_scope)3430 void InstructionCodeGeneratorARM64::GenerateResultRemWithAnyConstant(
3431     Register out,
3432     Register dividend,
3433     Register quotient,
3434     int64_t divisor,
3435     UseScratchRegisterScope* temps_scope) {
3436   Register temp_imm = temps_scope->AcquireSameSizeAs(out);
3437   __ Mov(temp_imm, divisor);
3438   __ Msub(out, quotient, temp_imm, dividend);
3439 }
3440 
3441 // Helper to generate code for HDiv/HRem instructions when a dividend is non-negative and
3442 // a divisor is a positive constant, not power of 2.
GenerateInt64UnsignedDivRemWithAnyPositiveConstant(HBinaryOperation * instruction)3443 void InstructionCodeGeneratorARM64::GenerateInt64UnsignedDivRemWithAnyPositiveConstant(
3444     HBinaryOperation* instruction) {
3445   DCHECK(instruction->IsDiv() || instruction->IsRem());
3446   DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
3447 
3448   LocationSummary* locations = instruction->GetLocations();
3449   Location second = locations->InAt(1);
3450   DCHECK(second.IsConstant());
3451 
3452   Register out = OutputRegister(instruction);
3453   Register dividend = InputRegisterAt(instruction, 0);
3454   int64_t imm = Int64FromConstant(second.GetConstant());
3455   DCHECK_GT(imm, 0);
3456 
3457   int64_t magic;
3458   int shift;
3459   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
3460 
3461   UseScratchRegisterScope temps(GetVIXLAssembler());
3462   Register temp = temps.AcquireSameSizeAs(out);
3463 
3464   auto generate_unsigned_div_code = [this, magic, shift](Register out,
3465                                                          Register dividend,
3466                                                          Register temp) {
3467     // temp = get_high(dividend * magic)
3468     __ Mov(temp, magic);
3469     if (magic > 0 && shift == 0) {
3470       __ Smulh(out, dividend, temp);
3471     } else {
3472       __ Smulh(temp, dividend, temp);
3473       if (magic < 0) {
3474         // The negative magic means that the multiplier m is greater than INT64_MAX.
3475         // In such a case shift is never 0. See the proof in
3476         // InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant.
3477         __ Add(temp, temp, dividend);
3478       }
3479       DCHECK_NE(shift, 0);
3480       __ Lsr(out, temp, shift);
3481     }
3482   };
3483 
3484   if (instruction->IsDiv()) {
3485     generate_unsigned_div_code(out, dividend, temp);
3486   } else {
3487     generate_unsigned_div_code(temp, dividend, temp);
3488     GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3489   }
3490 }
3491 
3492 // Helper to generate code for HDiv/HRem instructions for any dividend and a constant divisor
3493 // (not power of 2).
GenerateInt64DivRemWithAnyConstant(HBinaryOperation * instruction)3494 void InstructionCodeGeneratorARM64::GenerateInt64DivRemWithAnyConstant(
3495     HBinaryOperation* instruction) {
3496   DCHECK(instruction->IsDiv() || instruction->IsRem());
3497   DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
3498 
3499   LocationSummary* locations = instruction->GetLocations();
3500   Location second = locations->InAt(1);
3501   DCHECK(second.IsConstant());
3502 
3503   Register out = OutputRegister(instruction);
3504   Register dividend = InputRegisterAt(instruction, 0);
3505   int64_t imm = Int64FromConstant(second.GetConstant());
3506 
3507   int64_t magic;
3508   int shift;
3509   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
3510 
3511   UseScratchRegisterScope temps(GetVIXLAssembler());
3512   Register temp = temps.AcquireSameSizeAs(out);
3513 
3514   // temp = get_high(dividend * magic)
3515   __ Mov(temp, magic);
3516   __ Smulh(temp, dividend, temp);
3517 
3518   // The multiplication result might need some corrections to be finalized.
3519   // The last correction is to increment by 1, if the result is negative.
3520   // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3521   // Such ADD usually has latency 2, e.g. on Cortex-A55.
3522   // However if one of the corrections is ADD or SUB, the sign can be detected
3523   // with ADDS/SUBS. They set the N flag if the result is negative.
3524   // This allows to use CINC MI which has latency 1.
3525   bool use_cond_inc = false;
3526 
3527   // Some combinations of magic_number and the divisor require to correct the result.
3528   // Check whether the correction is needed.
3529   if (NeedToAddDividend(magic, imm)) {
3530     __ Adds(temp, temp, dividend);
3531     use_cond_inc = true;
3532   } else if (NeedToSubDividend(magic, imm)) {
3533     __ Subs(temp, temp, dividend);
3534     use_cond_inc = true;
3535   }
3536 
3537   if (shift != 0) {
3538     __ Asr(temp, temp, shift);
3539   }
3540 
3541   if (instruction->IsRem()) {
3542     GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3543     GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3544   } else {
3545     GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3546   }
3547 }
3548 
GenerateInt32DivRemWithAnyConstant(HBinaryOperation * instruction)3549 void InstructionCodeGeneratorARM64::GenerateInt32DivRemWithAnyConstant(
3550     HBinaryOperation* instruction) {
3551   DCHECK(instruction->IsDiv() || instruction->IsRem());
3552   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
3553 
3554   LocationSummary* locations = instruction->GetLocations();
3555   Location second = locations->InAt(1);
3556   DCHECK(second.IsConstant());
3557 
3558   Register out = OutputRegister(instruction);
3559   Register dividend = InputRegisterAt(instruction, 0);
3560   int64_t imm = Int64FromConstant(second.GetConstant());
3561 
3562   int64_t magic;
3563   int shift;
3564   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3565   UseScratchRegisterScope temps(GetVIXLAssembler());
3566   Register temp = temps.AcquireSameSizeAs(out);
3567 
3568   // temp = get_high(dividend * magic)
3569   __ Mov(temp, magic);
3570   __ Smull(temp.X(), dividend, temp);
3571 
3572   // The multiplication result might need some corrections to be finalized.
3573   // The last correction is to increment by 1, if the result is negative.
3574   // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3575   // Such ADD usually has latency 2, e.g. on Cortex-A55.
3576   // However if one of the corrections is ADD or SUB, the sign can be detected
3577   // with ADDS/SUBS. They set the N flag if the result is negative.
3578   // This allows to use CINC MI which has latency 1.
3579   bool use_cond_inc = false;
3580 
3581   // ADD/SUB correction is performed in the high 32 bits
3582   // as high 32 bits are ignored because type are kInt32.
3583   if (NeedToAddDividend(magic, imm)) {
3584     __ Adds(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3585     use_cond_inc = true;
3586   } else if (NeedToSubDividend(magic, imm)) {
3587     __ Subs(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3588     use_cond_inc = true;
3589   }
3590 
3591   // Extract the result from the high 32 bits and apply the final right shift.
3592   DCHECK_LT(shift, 32);
3593   if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
3594     // No need to adjust the result for a non-negative dividend and a positive divisor.
3595     if (instruction->IsDiv()) {
3596       __ Lsr(out.X(), temp.X(), 32 + shift);
3597     } else {
3598       __ Lsr(temp.X(), temp.X(), 32 + shift);
3599       GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3600     }
3601   } else {
3602     __ Asr(temp.X(), temp.X(), 32 + shift);
3603 
3604     if (instruction->IsRem()) {
3605       GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3606       GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3607     } else {
3608       GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3609     }
3610   }
3611 }
3612 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction,int64_t divisor)3613 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction,
3614                                                                   int64_t divisor) {
3615   DCHECK(instruction->IsDiv() || instruction->IsRem());
3616   if (instruction->GetResultType() == DataType::Type::kInt64) {
3617     if (divisor > 0 && HasNonNegativeInputAt(instruction, 0)) {
3618       GenerateInt64UnsignedDivRemWithAnyPositiveConstant(instruction);
3619     } else {
3620       GenerateInt64DivRemWithAnyConstant(instruction);
3621     }
3622   } else {
3623     GenerateInt32DivRemWithAnyConstant(instruction);
3624   }
3625 }
3626 
GenerateIntDivForConstDenom(HDiv * instruction)3627 void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) {
3628   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3629 
3630   if (imm == 0) {
3631     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3632     return;
3633   }
3634 
3635   if (IsPowerOfTwo(AbsOrMin(imm))) {
3636     GenerateIntDivForPower2Denom(instruction);
3637   } else {
3638     // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier.
3639     DCHECK(imm < -2 || imm > 2) << imm;
3640     GenerateDivRemWithAnyConstant(instruction, imm);
3641   }
3642 }
3643 
GenerateIntDiv(HDiv * instruction)3644 void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) {
3645   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
3646        << instruction->GetResultType();
3647 
3648   if (instruction->GetLocations()->InAt(1).IsConstant()) {
3649     GenerateIntDivForConstDenom(instruction);
3650   } else {
3651     Register out = OutputRegister(instruction);
3652     Register dividend = InputRegisterAt(instruction, 0);
3653     Register divisor = InputRegisterAt(instruction, 1);
3654     __ Sdiv(out, dividend, divisor);
3655   }
3656 }
3657 
VisitDiv(HDiv * div)3658 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3659   LocationSummary* locations =
3660       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3661   switch (div->GetResultType()) {
3662     case DataType::Type::kInt32:
3663     case DataType::Type::kInt64:
3664       locations->SetInAt(0, Location::RequiresRegister());
3665       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3666       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3667       break;
3668 
3669     case DataType::Type::kFloat32:
3670     case DataType::Type::kFloat64:
3671       locations->SetInAt(0, Location::RequiresFpuRegister());
3672       locations->SetInAt(1, Location::RequiresFpuRegister());
3673       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3674       break;
3675 
3676     default:
3677       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3678   }
3679 }
3680 
VisitDiv(HDiv * div)3681 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3682   DataType::Type type = div->GetResultType();
3683   switch (type) {
3684     case DataType::Type::kInt32:
3685     case DataType::Type::kInt64:
3686       GenerateIntDiv(div);
3687       break;
3688 
3689     case DataType::Type::kFloat32:
3690     case DataType::Type::kFloat64:
3691       __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3692       break;
3693 
3694     default:
3695       LOG(FATAL) << "Unexpected div type " << type;
3696   }
3697 }
3698 
VisitDivZeroCheck(HDivZeroCheck * instruction)3699 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3700   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3701   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3702 }
3703 
VisitDivZeroCheck(HDivZeroCheck * instruction)3704 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3705   SlowPathCodeARM64* slow_path =
3706       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction);
3707   codegen_->AddSlowPath(slow_path);
3708   Location value = instruction->GetLocations()->InAt(0);
3709 
3710   DataType::Type type = instruction->GetType();
3711 
3712   if (!DataType::IsIntegralType(type)) {
3713     LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3714     UNREACHABLE();
3715   }
3716 
3717   if (value.IsConstant()) {
3718     int64_t divisor = Int64FromLocation(value);
3719     if (divisor == 0) {
3720       __ B(slow_path->GetEntryLabel());
3721     } else {
3722       // A division by a non-null constant is valid. We don't need to perform
3723       // any check, so simply fall through.
3724     }
3725   } else {
3726     __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3727   }
3728 }
3729 
VisitDoubleConstant(HDoubleConstant * constant)3730 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3731   LocationSummary* locations =
3732       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3733   locations->SetOut(Location::ConstantLocation(constant));
3734 }
3735 
VisitDoubleConstant(HDoubleConstant * constant)3736 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3737     [[maybe_unused]] HDoubleConstant* constant) {
3738   // Will be generated at use site.
3739 }
3740 
VisitExit(HExit * exit)3741 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3742   exit->SetLocations(nullptr);
3743 }
3744 
VisitExit(HExit * exit)3745 void InstructionCodeGeneratorARM64::VisitExit([[maybe_unused]] HExit* exit) {}
3746 
VisitFloatConstant(HFloatConstant * constant)3747 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3748   LocationSummary* locations =
3749       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3750   locations->SetOut(Location::ConstantLocation(constant));
3751 }
3752 
VisitFloatConstant(HFloatConstant * constant)3753 void InstructionCodeGeneratorARM64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
3754   // Will be generated at use site.
3755 }
3756 
HandleGoto(HInstruction * got,HBasicBlock * successor)3757 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3758   if (successor->IsExitBlock()) {
3759     DCHECK(got->GetPrevious()->AlwaysThrows());
3760     return;  // no code needed
3761   }
3762 
3763   HBasicBlock* block = got->GetBlock();
3764   HInstruction* previous = got->GetPrevious();
3765   HLoopInformation* info = block->GetLoopInformation();
3766 
3767   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3768     codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
3769     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3770     return;  // `GenerateSuspendCheck()` emitted the jump.
3771   }
3772   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3773     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3774     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
3775   }
3776   if (!codegen_->GoesToNextBlock(block, successor)) {
3777     __ B(codegen_->GetLabelOf(successor));
3778   }
3779 }
3780 
VisitGoto(HGoto * got)3781 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3782   got->SetLocations(nullptr);
3783 }
3784 
VisitGoto(HGoto * got)3785 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3786   HandleGoto(got, got->GetSuccessor());
3787 }
3788 
VisitTryBoundary(HTryBoundary * try_boundary)3789 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3790   try_boundary->SetLocations(nullptr);
3791 }
3792 
VisitTryBoundary(HTryBoundary * try_boundary)3793 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3794   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3795   if (!successor->IsExitBlock()) {
3796     HandleGoto(try_boundary, successor);
3797   }
3798 }
3799 
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl::aarch64::Label * true_target,vixl::aarch64::Label * false_target)3800 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3801                                                           size_t condition_input_index,
3802                                                           vixl::aarch64::Label* true_target,
3803                                                           vixl::aarch64::Label* false_target) {
3804   HInstruction* cond = instruction->InputAt(condition_input_index);
3805 
3806   if (true_target == nullptr && false_target == nullptr) {
3807     // Nothing to do. The code always falls through.
3808     return;
3809   } else if (cond->IsIntConstant()) {
3810     // Constant condition, statically compared against "true" (integer value 1).
3811     if (cond->AsIntConstant()->IsTrue()) {
3812       if (true_target != nullptr) {
3813         __ B(true_target);
3814       }
3815     } else {
3816       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3817       if (false_target != nullptr) {
3818         __ B(false_target);
3819       }
3820     }
3821     return;
3822   }
3823 
3824   // The following code generates these patterns:
3825   //  (1) true_target == nullptr && false_target != nullptr
3826   //        - opposite condition true => branch to false_target
3827   //  (2) true_target != nullptr && false_target == nullptr
3828   //        - condition true => branch to true_target
3829   //  (3) true_target != nullptr && false_target != nullptr
3830   //        - condition true => branch to true_target
3831   //        - branch to false_target
3832   if (IsBooleanValueOrMaterializedCondition(cond)) {
3833     // The condition instruction has been materialized, compare the output to 0.
3834     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3835     DCHECK(cond_val.IsRegister());
3836     if (true_target == nullptr) {
3837       __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3838     } else {
3839       __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3840     }
3841   } else {
3842     // The condition instruction has not been materialized, use its inputs as
3843     // the comparison and its condition as the branch condition.
3844     HCondition* condition = cond->AsCondition();
3845 
3846     DataType::Type type = condition->InputAt(0)->GetType();
3847     if (DataType::IsFloatingPointType(type)) {
3848       GenerateFcmp(condition);
3849       if (true_target == nullptr) {
3850         IfCondition opposite_condition = condition->GetOppositeCondition();
3851         __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3852       } else {
3853         __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3854       }
3855     } else {
3856       // Integer cases.
3857       Register lhs = InputRegisterAt(condition, 0);
3858       Operand rhs = InputOperandAt(condition, 1);
3859 
3860       Condition arm64_cond;
3861       vixl::aarch64::Label* non_fallthrough_target;
3862       if (true_target == nullptr) {
3863         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3864         non_fallthrough_target = false_target;
3865       } else {
3866         arm64_cond = ARM64Condition(condition->GetCondition());
3867         non_fallthrough_target = true_target;
3868       }
3869 
3870       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3871           rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3872         switch (arm64_cond) {
3873           case eq:
3874             __ Cbz(lhs, non_fallthrough_target);
3875             break;
3876           case ne:
3877             __ Cbnz(lhs, non_fallthrough_target);
3878             break;
3879           case lt:
3880             // Test the sign bit and branch accordingly.
3881             __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3882             break;
3883           case ge:
3884             // Test the sign bit and branch accordingly.
3885             __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3886             break;
3887           default:
3888             // Without the `static_cast` the compiler throws an error for
3889             // `-Werror=sign-promo`.
3890             LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3891         }
3892       } else {
3893         __ Cmp(lhs, rhs);
3894         __ B(arm64_cond, non_fallthrough_target);
3895       }
3896     }
3897   }
3898 
3899   // If neither branch falls through (case 3), the conditional branch to `true_target`
3900   // was already emitted (case 2) and we need to emit a jump to `false_target`.
3901   if (true_target != nullptr && false_target != nullptr) {
3902     __ B(false_target);
3903   }
3904 }
3905 
VisitIf(HIf * if_instr)3906 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3907   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3908   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3909     locations->SetInAt(0, Location::RequiresRegister());
3910   }
3911 }
3912 
VisitIf(HIf * if_instr)3913 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3914   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3915   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3916   vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3917   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3918     true_target = nullptr;
3919   }
3920   vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3921   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3922     false_target = nullptr;
3923   }
3924   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3925     if (GetGraph()->IsCompilingBaseline() &&
3926         codegen_->GetCompilerOptions().ProfileBranches() &&
3927         !Runtime::Current()->IsAotCompiler()) {
3928       DCHECK(if_instr->InputAt(0)->IsCondition());
3929       ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3930       DCHECK(info != nullptr);
3931       BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
3932       // Currently, not all If branches are profiled.
3933       if (cache != nullptr) {
3934         uint64_t address =
3935             reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
3936         static_assert(
3937             BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
3938             "Unexpected offsets for BranchCache");
3939         vixl::aarch64::Label done;
3940         UseScratchRegisterScope temps(GetVIXLAssembler());
3941         Register temp = temps.AcquireX();
3942         Register counter = temps.AcquireW();
3943         Register condition = InputRegisterAt(if_instr, 0).X();
3944         __ Mov(temp, address);
3945         __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
3946         __ Add(counter, counter, 1);
3947         __ Tbnz(counter, 16, &done);
3948         __ Strh(counter, MemOperand(temp, condition, LSL, 1));
3949         __ Bind(&done);
3950       }
3951     }
3952   }
3953   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3954 }
3955 
VisitDeoptimize(HDeoptimize * deoptimize)3956 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3957   LocationSummary* locations = new (GetGraph()->GetAllocator())
3958       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3959   InvokeRuntimeCallingConvention calling_convention;
3960   RegisterSet caller_saves = RegisterSet::Empty();
3961   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3962   locations->SetCustomSlowPathCallerSaves(caller_saves);
3963   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3964     locations->SetInAt(0, Location::RequiresRegister());
3965   }
3966 }
3967 
VisitDeoptimize(HDeoptimize * deoptimize)3968 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3969   SlowPathCodeARM64* slow_path =
3970       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3971   GenerateTestAndBranch(deoptimize,
3972                         /* condition_input_index= */ 0,
3973                         slow_path->GetEntryLabel(),
3974                         /* false_target= */ nullptr);
3975 }
3976 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3977 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3978   LocationSummary* locations = new (GetGraph()->GetAllocator())
3979       LocationSummary(flag, LocationSummary::kNoCall);
3980   locations->SetOut(Location::RequiresRegister());
3981 }
3982 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3983 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3984   __ Ldr(OutputRegister(flag),
3985          MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3986 }
3987 
IsConditionOnFloatingPointValues(HInstruction * condition)3988 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3989   return condition->IsCondition() &&
3990          DataType::IsFloatingPointType(condition->InputAt(0)->GetType());
3991 }
3992 
GetConditionForSelect(HCondition * condition)3993 static inline Condition GetConditionForSelect(HCondition* condition) {
3994   IfCondition cond = condition->GetCondition();
3995   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3996                                                      : ARM64Condition(cond);
3997 }
3998 
VisitSelect(HSelect * select)3999 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
4000   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
4001   if (DataType::IsFloatingPointType(select->GetType())) {
4002     locations->SetInAt(0, Location::RequiresFpuRegister());
4003     locations->SetInAt(1, Location::RequiresFpuRegister());
4004     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4005   } else {
4006     HConstant* cst_true_value = select->GetTrueValue()->AsConstantOrNull();
4007     HConstant* cst_false_value = select->GetFalseValue()->AsConstantOrNull();
4008     bool is_true_value_constant = cst_true_value != nullptr;
4009     bool is_false_value_constant = cst_false_value != nullptr;
4010     // Ask VIXL whether we should synthesize constants in registers.
4011     // We give an arbitrary register to VIXL when dealing with non-constant inputs.
4012     Operand true_op = is_true_value_constant ?
4013         Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
4014     Operand false_op = is_false_value_constant ?
4015         Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
4016     bool true_value_in_register = false;
4017     bool false_value_in_register = false;
4018     MacroAssembler::GetCselSynthesisInformation(
4019         x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
4020     true_value_in_register |= !is_true_value_constant;
4021     false_value_in_register |= !is_false_value_constant;
4022 
4023     locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
4024                                                  : Location::ConstantLocation(cst_true_value));
4025     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
4026                                                   : Location::ConstantLocation(cst_false_value));
4027     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4028   }
4029 
4030   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
4031     locations->SetInAt(2, Location::RequiresRegister());
4032   }
4033 }
4034 
VisitSelect(HSelect * select)4035 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
4036   HInstruction* cond = select->GetCondition();
4037   Condition csel_cond;
4038 
4039   if (IsBooleanValueOrMaterializedCondition(cond)) {
4040     if (cond->IsCondition() && cond->GetNext() == select) {
4041       // Use the condition flags set by the previous instruction.
4042       csel_cond = GetConditionForSelect(cond->AsCondition());
4043     } else {
4044       __ Cmp(InputRegisterAt(select, 2), 0);
4045       csel_cond = ne;
4046     }
4047   } else if (IsConditionOnFloatingPointValues(cond)) {
4048     GenerateFcmp(cond);
4049     csel_cond = GetConditionForSelect(cond->AsCondition());
4050   } else {
4051     __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
4052     csel_cond = GetConditionForSelect(cond->AsCondition());
4053   }
4054 
4055   if (DataType::IsFloatingPointType(select->GetType())) {
4056     __ Fcsel(OutputFPRegister(select),
4057              InputFPRegisterAt(select, 1),
4058              InputFPRegisterAt(select, 0),
4059              csel_cond);
4060   } else {
4061     __ Csel(OutputRegister(select),
4062             InputOperandAt(select, 1),
4063             InputOperandAt(select, 0),
4064             csel_cond);
4065   }
4066 }
4067 
VisitNop(HNop * nop)4068 void LocationsBuilderARM64::VisitNop(HNop* nop) {
4069   new (GetGraph()->GetAllocator()) LocationSummary(nop);
4070 }
4071 
VisitNop(HNop *)4072 void InstructionCodeGeneratorARM64::VisitNop(HNop*) {
4073   // The environment recording already happened in CodeGenerator::Compile.
4074 }
4075 
IncreaseFrame(size_t adjustment)4076 void CodeGeneratorARM64::IncreaseFrame(size_t adjustment) {
4077   __ Claim(adjustment);
4078   GetAssembler()->cfi().AdjustCFAOffset(adjustment);
4079 }
4080 
DecreaseFrame(size_t adjustment)4081 void CodeGeneratorARM64::DecreaseFrame(size_t adjustment) {
4082   __ Drop(adjustment);
4083   GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
4084 }
4085 
GenerateNop()4086 void CodeGeneratorARM64::GenerateNop() {
4087   __ Nop();
4088 }
4089 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4090 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4091   HandleFieldGet(instruction, instruction->GetFieldInfo());
4092 }
4093 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4094 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4095   HandleFieldGet(instruction, instruction->GetFieldInfo());
4096 }
4097 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4098 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4099   HandleFieldSet(instruction);
4100 }
4101 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4102 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4103   HandleFieldSet(instruction,
4104                  instruction->GetFieldInfo(),
4105                  instruction->GetValueCanBeNull(),
4106                  instruction->GetWriteBarrierKind());
4107 }
4108 
4109 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)4110 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
4111   if (emit_read_barrier &&
4112       (kUseBakerReadBarrier ||
4113           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
4114           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
4115           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
4116     return 1;
4117   }
4118   return 0;
4119 }
4120 
4121 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
4122 // interface pointer, one for loading the current interface.
4123 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)4124 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
4125   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
4126     return 3;
4127   }
4128   return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
4129 }
4130 
VisitInstanceOf(HInstanceOf * instruction)4131 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
4132   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4133   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4134   bool baker_read_barrier_slow_path = false;
4135   switch (type_check_kind) {
4136     case TypeCheckKind::kExactCheck:
4137     case TypeCheckKind::kAbstractClassCheck:
4138     case TypeCheckKind::kClassHierarchyCheck:
4139     case TypeCheckKind::kArrayObjectCheck:
4140     case TypeCheckKind::kInterfaceCheck: {
4141       bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
4142       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
4143       baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
4144                                      (type_check_kind != TypeCheckKind::kInterfaceCheck);
4145       break;
4146     }
4147     case TypeCheckKind::kArrayCheck:
4148     case TypeCheckKind::kUnresolvedCheck:
4149       call_kind = LocationSummary::kCallOnSlowPath;
4150       break;
4151     case TypeCheckKind::kBitstringCheck:
4152       break;
4153   }
4154 
4155   LocationSummary* locations =
4156       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4157   if (baker_read_barrier_slow_path) {
4158     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4159   }
4160   locations->SetInAt(0, Location::RequiresRegister());
4161   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
4162     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
4163     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
4164     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
4165   } else {
4166     locations->SetInAt(1, Location::RequiresRegister());
4167   }
4168   // The "out" register is used as a temporary, so it overlaps with the inputs.
4169   // Note that TypeCheckSlowPathARM64 uses this register too.
4170   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4171   // Add temps if necessary for read barriers.
4172   locations->AddRegisterTemps(
4173       NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
4174 }
4175 
VisitInstanceOf(HInstanceOf * instruction)4176 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
4177   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4178   LocationSummary* locations = instruction->GetLocations();
4179   Location obj_loc = locations->InAt(0);
4180   Register obj = InputRegisterAt(instruction, 0);
4181   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
4182       ? Register()
4183       : InputRegisterAt(instruction, 1);
4184   Location out_loc = locations->Out();
4185   Register out = OutputRegister(instruction);
4186   const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
4187   DCHECK_LE(num_temps, 1u);
4188   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
4189   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4190   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4191   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4192   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4193   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4194   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4195   const uint32_t object_array_data_offset =
4196       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4197 
4198   vixl::aarch64::Label done, zero;
4199   SlowPathCodeARM64* slow_path = nullptr;
4200 
4201   // Return 0 if `obj` is null.
4202   // Avoid null check if we know `obj` is not null.
4203   if (instruction->MustDoNullCheck()) {
4204     __ Cbz(obj, &zero);
4205   }
4206 
4207   switch (type_check_kind) {
4208     case TypeCheckKind::kExactCheck: {
4209       ReadBarrierOption read_barrier_option =
4210           codegen_->ReadBarrierOptionForInstanceOf(instruction);
4211       // /* HeapReference<Class> */ out = obj->klass_
4212       GenerateReferenceLoadTwoRegisters(instruction,
4213                                         out_loc,
4214                                         obj_loc,
4215                                         class_offset,
4216                                         maybe_temp_loc,
4217                                         read_barrier_option);
4218       __ Cmp(out, cls);
4219       __ Cset(out, eq);
4220       if (zero.IsLinked()) {
4221         __ B(&done);
4222       }
4223       break;
4224     }
4225 
4226     case TypeCheckKind::kAbstractClassCheck: {
4227       ReadBarrierOption read_barrier_option =
4228           codegen_->ReadBarrierOptionForInstanceOf(instruction);
4229       // /* HeapReference<Class> */ out = obj->klass_
4230       GenerateReferenceLoadTwoRegisters(instruction,
4231                                         out_loc,
4232                                         obj_loc,
4233                                         class_offset,
4234                                         maybe_temp_loc,
4235                                         read_barrier_option);
4236       // If the class is abstract, we eagerly fetch the super class of the
4237       // object to avoid doing a comparison we know will fail.
4238       vixl::aarch64::Label loop, success;
4239       __ Bind(&loop);
4240       // /* HeapReference<Class> */ out = out->super_class_
4241       GenerateReferenceLoadOneRegister(instruction,
4242                                        out_loc,
4243                                        super_offset,
4244                                        maybe_temp_loc,
4245                                        read_barrier_option);
4246       // If `out` is null, we use it for the result, and jump to `done`.
4247       __ Cbz(out, &done);
4248       __ Cmp(out, cls);
4249       __ B(ne, &loop);
4250       __ Mov(out, 1);
4251       if (zero.IsLinked()) {
4252         __ B(&done);
4253       }
4254       break;
4255     }
4256 
4257     case TypeCheckKind::kClassHierarchyCheck: {
4258       ReadBarrierOption read_barrier_option =
4259           codegen_->ReadBarrierOptionForInstanceOf(instruction);
4260       // /* HeapReference<Class> */ out = obj->klass_
4261       GenerateReferenceLoadTwoRegisters(instruction,
4262                                         out_loc,
4263                                         obj_loc,
4264                                         class_offset,
4265                                         maybe_temp_loc,
4266                                         read_barrier_option);
4267       // Walk over the class hierarchy to find a match.
4268       vixl::aarch64::Label loop, success;
4269       __ Bind(&loop);
4270       __ Cmp(out, cls);
4271       __ B(eq, &success);
4272       // /* HeapReference<Class> */ out = out->super_class_
4273       GenerateReferenceLoadOneRegister(instruction,
4274                                        out_loc,
4275                                        super_offset,
4276                                        maybe_temp_loc,
4277                                        read_barrier_option);
4278       __ Cbnz(out, &loop);
4279       // If `out` is null, we use it for the result, and jump to `done`.
4280       __ B(&done);
4281       __ Bind(&success);
4282       __ Mov(out, 1);
4283       if (zero.IsLinked()) {
4284         __ B(&done);
4285       }
4286       break;
4287     }
4288 
4289     case TypeCheckKind::kArrayObjectCheck: {
4290       ReadBarrierOption read_barrier_option =
4291           codegen_->ReadBarrierOptionForInstanceOf(instruction);
4292       // /* HeapReference<Class> */ out = obj->klass_
4293       GenerateReferenceLoadTwoRegisters(instruction,
4294                                         out_loc,
4295                                         obj_loc,
4296                                         class_offset,
4297                                         maybe_temp_loc,
4298                                         read_barrier_option);
4299       // Do an exact check.
4300       vixl::aarch64::Label exact_check;
4301       __ Cmp(out, cls);
4302       __ B(eq, &exact_check);
4303       // Otherwise, we need to check that the object's class is a non-primitive array.
4304       // /* HeapReference<Class> */ out = out->component_type_
4305       GenerateReferenceLoadOneRegister(instruction,
4306                                        out_loc,
4307                                        component_offset,
4308                                        maybe_temp_loc,
4309                                        read_barrier_option);
4310       // If `out` is null, we use it for the result, and jump to `done`.
4311       __ Cbz(out, &done);
4312       __ Ldrh(out, HeapOperand(out, primitive_offset));
4313       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4314       __ Cbnz(out, &zero);
4315       __ Bind(&exact_check);
4316       __ Mov(out, 1);
4317       __ B(&done);
4318       break;
4319     }
4320 
4321     case TypeCheckKind::kArrayCheck: {
4322       // No read barrier since the slow path will retry upon failure.
4323       // /* HeapReference<Class> */ out = obj->klass_
4324       GenerateReferenceLoadTwoRegisters(instruction,
4325                                         out_loc,
4326                                         obj_loc,
4327                                         class_offset,
4328                                         maybe_temp_loc,
4329                                         kWithoutReadBarrier);
4330       __ Cmp(out, cls);
4331       DCHECK(locations->OnlyCallsOnSlowPath());
4332       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4333           instruction, /* is_fatal= */ false);
4334       codegen_->AddSlowPath(slow_path);
4335       __ B(ne, slow_path->GetEntryLabel());
4336       __ Mov(out, 1);
4337       if (zero.IsLinked()) {
4338         __ B(&done);
4339       }
4340       break;
4341     }
4342 
4343     case TypeCheckKind::kInterfaceCheck: {
4344       if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
4345         DCHECK(locations->OnlyCallsOnSlowPath());
4346         slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4347             instruction, /* is_fatal= */ false);
4348         codegen_->AddSlowPath(slow_path);
4349         if (codegen_->EmitNonBakerReadBarrier()) {
4350           __ B(slow_path->GetEntryLabel());
4351           break;
4352         }
4353         // For Baker read barrier, take the slow path while marking.
4354         __ Cbnz(mr, slow_path->GetEntryLabel());
4355       }
4356 
4357       // Fast-path without read barriers.
4358       UseScratchRegisterScope temps(GetVIXLAssembler());
4359       Register temp = temps.AcquireW();
4360       Register temp2 = temps.AcquireW();
4361       // /* HeapReference<Class> */ temp = obj->klass_
4362       __ Ldr(temp, HeapOperand(obj, class_offset));
4363       GetAssembler()->MaybeUnpoisonHeapReference(temp);
4364       // /* HeapReference<Class> */ temp = temp->iftable_
4365       __ Ldr(temp, HeapOperand(temp, iftable_offset));
4366       GetAssembler()->MaybeUnpoisonHeapReference(temp);
4367       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
4368       __ Ldr(out, HeapOperand(temp, array_length_offset));
4369       // Loop through the `IfTable` and check if any class matches.
4370       vixl::aarch64::Label loop;
4371       __ Bind(&loop);
4372       __ Cbz(out, &done);  // If taken, the result in `out` is already 0 (false).
4373       __ Ldr(temp2, HeapOperand(temp, object_array_data_offset));
4374       GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4375       // Go to next interface.
4376       __ Add(temp, temp, 2 * kHeapReferenceSize);
4377       __ Sub(out, out, 2);
4378       // Compare the classes and continue the loop if they do not match.
4379       __ Cmp(cls, temp2);
4380       __ B(ne, &loop);
4381       __ Mov(out, 1);
4382       if (zero.IsLinked()) {
4383         __ B(&done);
4384       }
4385       break;
4386     }
4387 
4388     case TypeCheckKind::kUnresolvedCheck: {
4389       // Note that we indeed only call on slow path, but we always go
4390       // into the slow path for the unresolved check case.
4391       //
4392       // We cannot directly call the InstanceofNonTrivial runtime
4393       // entry point without resorting to a type checking slow path
4394       // here (i.e. by calling InvokeRuntime directly), as it would
4395       // require to assign fixed registers for the inputs of this
4396       // HInstanceOf instruction (following the runtime calling
4397       // convention), which might be cluttered by the potential first
4398       // read barrier emission at the beginning of this method.
4399       //
4400       // TODO: Introduce a new runtime entry point taking the object
4401       // to test (instead of its class) as argument, and let it deal
4402       // with the read barrier issues. This will let us refactor this
4403       // case of the `switch` code as it was previously (with a direct
4404       // call to the runtime not using a type checking slow path).
4405       // This should also be beneficial for the other cases above.
4406       DCHECK(locations->OnlyCallsOnSlowPath());
4407       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4408           instruction, /* is_fatal= */ false);
4409       codegen_->AddSlowPath(slow_path);
4410       __ B(slow_path->GetEntryLabel());
4411       break;
4412     }
4413 
4414     case TypeCheckKind::kBitstringCheck: {
4415       // /* HeapReference<Class> */ temp = obj->klass_
4416       GenerateReferenceLoadTwoRegisters(instruction,
4417                                         out_loc,
4418                                         obj_loc,
4419                                         class_offset,
4420                                         maybe_temp_loc,
4421                                         kWithoutReadBarrier);
4422 
4423       GenerateBitstringTypeCheckCompare(instruction, out);
4424       __ Cset(out, eq);
4425       if (zero.IsLinked()) {
4426         __ B(&done);
4427       }
4428       break;
4429     }
4430   }
4431 
4432   if (zero.IsLinked()) {
4433     __ Bind(&zero);
4434     __ Mov(out, 0);
4435   }
4436 
4437   if (done.IsLinked()) {
4438     __ Bind(&done);
4439   }
4440 
4441   if (slow_path != nullptr) {
4442     __ Bind(slow_path->GetExitLabel());
4443   }
4444 }
4445 
VisitCheckCast(HCheckCast * instruction)4446 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
4447   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4448   LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
4449   LocationSummary* locations =
4450       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4451   locations->SetInAt(0, Location::RequiresRegister());
4452   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
4453     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
4454     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
4455     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
4456   } else {
4457     locations->SetInAt(1, Location::RequiresRegister());
4458   }
4459   locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
4460 }
4461 
VisitCheckCast(HCheckCast * instruction)4462 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
4463   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4464   LocationSummary* locations = instruction->GetLocations();
4465   Location obj_loc = locations->InAt(0);
4466   Register obj = InputRegisterAt(instruction, 0);
4467   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
4468       ? Register()
4469       : InputRegisterAt(instruction, 1);
4470   const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
4471   DCHECK_GE(num_temps, 1u);
4472   DCHECK_LE(num_temps, 3u);
4473   Location temp_loc = locations->GetTemp(0);
4474   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
4475   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
4476   Register temp = WRegisterFrom(temp_loc);
4477   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4478   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4479   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4480   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4481   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4482   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4483   const uint32_t object_array_data_offset =
4484       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4485 
4486   bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
4487   SlowPathCodeARM64* type_check_slow_path =
4488       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4489           instruction, is_type_check_slow_path_fatal);
4490   codegen_->AddSlowPath(type_check_slow_path);
4491 
4492   vixl::aarch64::Label done;
4493   // Avoid null check if we know obj is not null.
4494   if (instruction->MustDoNullCheck()) {
4495     __ Cbz(obj, &done);
4496   }
4497 
4498   switch (type_check_kind) {
4499     case TypeCheckKind::kExactCheck:
4500     case TypeCheckKind::kArrayCheck: {
4501       // /* HeapReference<Class> */ temp = obj->klass_
4502       GenerateReferenceLoadTwoRegisters(instruction,
4503                                         temp_loc,
4504                                         obj_loc,
4505                                         class_offset,
4506                                         maybe_temp2_loc,
4507                                         kWithoutReadBarrier);
4508 
4509       __ Cmp(temp, cls);
4510       // Jump to slow path for throwing the exception or doing a
4511       // more involved array check.
4512       __ B(ne, type_check_slow_path->GetEntryLabel());
4513       break;
4514     }
4515 
4516     case TypeCheckKind::kAbstractClassCheck: {
4517       // /* HeapReference<Class> */ temp = obj->klass_
4518       GenerateReferenceLoadTwoRegisters(instruction,
4519                                         temp_loc,
4520                                         obj_loc,
4521                                         class_offset,
4522                                         maybe_temp2_loc,
4523                                         kWithoutReadBarrier);
4524 
4525       // If the class is abstract, we eagerly fetch the super class of the
4526       // object to avoid doing a comparison we know will fail.
4527       vixl::aarch64::Label loop;
4528       __ Bind(&loop);
4529       // /* HeapReference<Class> */ temp = temp->super_class_
4530       GenerateReferenceLoadOneRegister(instruction,
4531                                        temp_loc,
4532                                        super_offset,
4533                                        maybe_temp2_loc,
4534                                        kWithoutReadBarrier);
4535 
4536       // If the class reference currently in `temp` is null, jump to the slow path to throw the
4537       // exception.
4538       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4539       // Otherwise, compare classes.
4540       __ Cmp(temp, cls);
4541       __ B(ne, &loop);
4542       break;
4543     }
4544 
4545     case TypeCheckKind::kClassHierarchyCheck: {
4546       // /* HeapReference<Class> */ temp = obj->klass_
4547       GenerateReferenceLoadTwoRegisters(instruction,
4548                                         temp_loc,
4549                                         obj_loc,
4550                                         class_offset,
4551                                         maybe_temp2_loc,
4552                                         kWithoutReadBarrier);
4553 
4554       // Walk over the class hierarchy to find a match.
4555       vixl::aarch64::Label loop;
4556       __ Bind(&loop);
4557       __ Cmp(temp, cls);
4558       __ B(eq, &done);
4559 
4560       // /* HeapReference<Class> */ temp = temp->super_class_
4561       GenerateReferenceLoadOneRegister(instruction,
4562                                        temp_loc,
4563                                        super_offset,
4564                                        maybe_temp2_loc,
4565                                        kWithoutReadBarrier);
4566 
4567       // If the class reference currently in `temp` is not null, jump
4568       // back at the beginning of the loop.
4569       __ Cbnz(temp, &loop);
4570       // Otherwise, jump to the slow path to throw the exception.
4571       __ B(type_check_slow_path->GetEntryLabel());
4572       break;
4573     }
4574 
4575     case TypeCheckKind::kArrayObjectCheck: {
4576       // /* HeapReference<Class> */ temp = obj->klass_
4577       GenerateReferenceLoadTwoRegisters(instruction,
4578                                         temp_loc,
4579                                         obj_loc,
4580                                         class_offset,
4581                                         maybe_temp2_loc,
4582                                         kWithoutReadBarrier);
4583 
4584       // Do an exact check.
4585       __ Cmp(temp, cls);
4586       __ B(eq, &done);
4587 
4588       // Otherwise, we need to check that the object's class is a non-primitive array.
4589       // /* HeapReference<Class> */ temp = temp->component_type_
4590       GenerateReferenceLoadOneRegister(instruction,
4591                                        temp_loc,
4592                                        component_offset,
4593                                        maybe_temp2_loc,
4594                                        kWithoutReadBarrier);
4595 
4596       // If the component type is null, jump to the slow path to throw the exception.
4597       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4598       // Otherwise, the object is indeed an array. Further check that this component type is not a
4599       // primitive type.
4600       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
4601       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4602       __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
4603       break;
4604     }
4605 
4606     case TypeCheckKind::kUnresolvedCheck:
4607       // We always go into the type check slow path for the unresolved check cases.
4608       //
4609       // We cannot directly call the CheckCast runtime entry point
4610       // without resorting to a type checking slow path here (i.e. by
4611       // calling InvokeRuntime directly), as it would require to
4612       // assign fixed registers for the inputs of this HInstanceOf
4613       // instruction (following the runtime calling convention), which
4614       // might be cluttered by the potential first read barrier
4615       // emission at the beginning of this method.
4616       __ B(type_check_slow_path->GetEntryLabel());
4617       break;
4618     case TypeCheckKind::kInterfaceCheck: {
4619       // /* HeapReference<Class> */ temp = obj->klass_
4620       GenerateReferenceLoadTwoRegisters(instruction,
4621                                         temp_loc,
4622                                         obj_loc,
4623                                         class_offset,
4624                                         maybe_temp2_loc,
4625                                         kWithoutReadBarrier);
4626 
4627       // /* HeapReference<Class> */ temp = temp->iftable_
4628       GenerateReferenceLoadOneRegister(instruction,
4629                                        temp_loc,
4630                                        iftable_offset,
4631                                        maybe_temp2_loc,
4632                                        kWithoutReadBarrier);
4633       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
4634       __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4635       // Loop through the iftable and check if any class matches.
4636       vixl::aarch64::Label start_loop;
4637       __ Bind(&start_loop);
4638       __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4639       __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4640       GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4641       // Go to next interface.
4642       __ Add(temp, temp, 2 * kHeapReferenceSize);
4643       __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4644       // Compare the classes and continue the loop if they do not match.
4645       __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4646       __ B(ne, &start_loop);
4647       break;
4648     }
4649 
4650     case TypeCheckKind::kBitstringCheck: {
4651       // /* HeapReference<Class> */ temp = obj->klass_
4652       GenerateReferenceLoadTwoRegisters(instruction,
4653                                         temp_loc,
4654                                         obj_loc,
4655                                         class_offset,
4656                                         maybe_temp2_loc,
4657                                         kWithoutReadBarrier);
4658 
4659       GenerateBitstringTypeCheckCompare(instruction, temp);
4660       __ B(ne, type_check_slow_path->GetEntryLabel());
4661       break;
4662     }
4663   }
4664   __ Bind(&done);
4665 
4666   __ Bind(type_check_slow_path->GetExitLabel());
4667 }
4668 
VisitIntConstant(HIntConstant * constant)4669 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4670   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4671   locations->SetOut(Location::ConstantLocation(constant));
4672 }
4673 
VisitIntConstant(HIntConstant * constant)4674 void InstructionCodeGeneratorARM64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
4675   // Will be generated at use site.
4676 }
4677 
VisitNullConstant(HNullConstant * constant)4678 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4679   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4680   locations->SetOut(Location::ConstantLocation(constant));
4681 }
4682 
VisitNullConstant(HNullConstant * constant)4683 void InstructionCodeGeneratorARM64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
4684   // Will be generated at use site.
4685 }
4686 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4687 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4688   // The trampoline uses the same calling convention as dex calling conventions,
4689   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4690   // the method_idx.
4691   HandleInvoke(invoke);
4692 }
4693 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4694 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4695   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4696   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4697 }
4698 
HandleInvoke(HInvoke * invoke)4699 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4700   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4701   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4702 }
4703 
VisitInvokeInterface(HInvokeInterface * invoke)4704 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4705   HandleInvoke(invoke);
4706   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
4707     // We cannot request ip1 as it's blocked by the register allocator.
4708     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
4709   }
4710 }
4711 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)4712 void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
4713                                                        Register klass) {
4714   DCHECK_EQ(klass.GetCode(), 0u);
4715   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
4716     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
4717     DCHECK(info != nullptr);
4718     InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
4719         info, GetCompilerOptions(), instruction->AsInvoke());
4720     if (cache != nullptr) {
4721       uint64_t address = reinterpret_cast64<uint64_t>(cache);
4722       vixl::aarch64::Label done;
4723       __ Mov(x8, address);
4724       __ Ldr(w9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
4725       // Fast path for a monomorphic cache.
4726       __ Cmp(klass.W(), w9);
4727       __ B(eq, &done);
4728       InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
4729       __ Bind(&done);
4730     } else {
4731       // This is unexpected, but we don't guarantee stable compilation across
4732       // JIT runs so just warn about it.
4733       ScopedObjectAccess soa(Thread::Current());
4734       LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
4735     }
4736   }
4737 }
4738 
VisitInvokeInterface(HInvokeInterface * invoke)4739 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4740   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4741   LocationSummary* locations = invoke->GetLocations();
4742   Register temp = XRegisterFrom(locations->GetTemp(0));
4743   Location receiver = locations->InAt(0);
4744   Offset class_offset = mirror::Object::ClassOffset();
4745   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4746 
4747   // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4748   if (receiver.IsStackSlot()) {
4749     __ Ldr(temp.W(), StackOperandFrom(receiver));
4750     {
4751       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4752       // /* HeapReference<Class> */ temp = temp->klass_
4753       __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4754       codegen_->MaybeRecordImplicitNullCheck(invoke);
4755     }
4756   } else {
4757     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4758     // /* HeapReference<Class> */ temp = receiver->klass_
4759     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4760     codegen_->MaybeRecordImplicitNullCheck(invoke);
4761   }
4762 
4763   // Instead of simply (possibly) unpoisoning `temp` here, we should
4764   // emit a read barrier for the previous class reference load.
4765   // However this is not required in practice, as this is an
4766   // intermediate/temporary reference and because the current
4767   // concurrent copying collector keeps the from-space memory
4768   // intact/accessible until the end of the marking phase (the
4769   // concurrent copying collector may not in the future).
4770   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4771 
4772   // If we're compiling baseline, update the inline cache.
4773   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
4774 
4775   // The register ip1 is required to be used for the hidden argument in
4776   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4777   MacroAssembler* masm = GetVIXLAssembler();
4778   UseScratchRegisterScope scratch_scope(masm);
4779   scratch_scope.Exclude(ip1);
4780   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
4781     Location interface_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
4782     if (interface_method.IsStackSlot()) {
4783       __ Ldr(ip1, StackOperandFrom(interface_method));
4784     } else {
4785       __ Mov(ip1, XRegisterFrom(interface_method));
4786     }
4787   // If the load kind is through a runtime call, we will pass the method we
4788   // fetch the IMT, which will either be a no-op if we don't hit the conflict
4789   // stub, or will make us always go through the trampoline when there is a
4790   // conflict.
4791   } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
4792     codegen_->LoadMethod(
4793         invoke->GetHiddenArgumentLoadKind(), Location::RegisterLocation(ip1.GetCode()), invoke);
4794   }
4795 
4796   __ Ldr(temp,
4797       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4798   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4799       invoke->GetImtIndex(), kArm64PointerSize));
4800   // temp = temp->GetImtEntryAt(method_offset);
4801   __ Ldr(temp, MemOperand(temp, method_offset));
4802   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
4803     // We pass the method from the IMT in case of a conflict. This will ensure
4804     // we go into the runtime to resolve the actual method.
4805     __ Mov(ip1, temp);
4806   }
4807   // lr = temp->GetEntryPoint();
4808   __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4809 
4810   {
4811     // Ensure the pc position is recorded immediately after the `blr` instruction.
4812     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4813 
4814     // lr();
4815     __ blr(lr);
4816     DCHECK(!codegen_->IsLeafMethod());
4817     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4818   }
4819 
4820   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4821 }
4822 
VisitInvokeVirtual(HInvokeVirtual * invoke)4823 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4824   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4825   if (intrinsic.TryDispatch(invoke)) {
4826     return;
4827   }
4828 
4829   HandleInvoke(invoke);
4830 }
4831 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4832 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4833   // Explicit clinit checks triggered by static invokes must have been pruned by
4834   // art::PrepareForRegisterAllocation.
4835   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4836 
4837   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4838   if (intrinsic.TryDispatch(invoke)) {
4839     return;
4840   }
4841 
4842   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
4843     CriticalNativeCallingConventionVisitorARM64 calling_convention_visitor(
4844         /*for_register_allocation=*/ true);
4845     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4846   } else {
4847     HandleInvoke(invoke);
4848   }
4849 }
4850 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARM64 * codegen)4851 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4852   if (invoke->GetLocations()->Intrinsified()) {
4853     IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4854     intrinsic.Dispatch(invoke);
4855     return true;
4856   }
4857   return false;
4858 }
4859 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)4860 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4861     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4862     [[maybe_unused]] ArtMethod* method) {
4863   // On ARM64 we support all dispatch types.
4864   return desired_dispatch_info;
4865 }
4866 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)4867 void CodeGeneratorARM64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
4868   switch (load_kind) {
4869     case MethodLoadKind::kBootImageLinkTimePcRelative: {
4870       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4871       // Add ADRP with its PC-relative method patch.
4872       vixl::aarch64::Label* adrp_label =
4873           NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
4874       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4875       // Add ADD with its PC-relative method patch.
4876       vixl::aarch64::Label* add_label =
4877           NewBootImageMethodPatch(invoke->GetResolvedMethodReference(), adrp_label);
4878       EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
4879       break;
4880     }
4881     case MethodLoadKind::kBootImageRelRo: {
4882       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
4883       uint32_t boot_image_offset = GetBootImageOffset(invoke);
4884       LoadBootImageRelRoEntry(WRegisterFrom(temp), boot_image_offset);
4885       break;
4886     }
4887     case MethodLoadKind::kBssEntry: {
4888       // Add ADRP with its PC-relative .bss entry patch.
4889       vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(invoke->GetMethodReference());
4890       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4891       // Add LDR with its PC-relative .bss entry patch.
4892       vixl::aarch64::Label* ldr_label =
4893           NewMethodBssEntryPatch(invoke->GetMethodReference(), adrp_label);
4894       // All aligned loads are implicitly atomic consume operations on ARM64.
4895       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4896       break;
4897     }
4898     case MethodLoadKind::kJitDirectAddress: {
4899       // Load method address from literal pool.
4900       __ Ldr(XRegisterFrom(temp),
4901              jit_patches_.DeduplicateUint64Literal(
4902                  reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
4903       break;
4904     }
4905     case MethodLoadKind::kRuntimeCall: {
4906       // Test situation, don't do anything.
4907       break;
4908     }
4909     default: {
4910       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
4911       UNREACHABLE();
4912     }
4913   }
4914 }
4915 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4916 void CodeGeneratorARM64::GenerateStaticOrDirectCall(
4917     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4918   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4919   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
4920   switch (invoke->GetMethodLoadKind()) {
4921     case MethodLoadKind::kStringInit: {
4922       uint32_t offset =
4923           GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4924       // temp = thread->string_init_entrypoint
4925       __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4926       break;
4927     }
4928     case MethodLoadKind::kRecursive:
4929       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
4930       break;
4931     case MethodLoadKind::kRuntimeCall:
4932       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4933       return;  // No code pointer retrieval; the runtime performs the call directly.
4934     case MethodLoadKind::kBootImageLinkTimePcRelative:
4935       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4936       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
4937         // Do not materialize the method pointer, load directly the entrypoint.
4938         // Add ADRP with its PC-relative JNI entrypoint patch.
4939         vixl::aarch64::Label* adrp_label =
4940             NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference());
4941         EmitAdrpPlaceholder(adrp_label, lr);
4942         // Add the LDR with its PC-relative method patch.
4943         vixl::aarch64::Label* add_label =
4944             NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), adrp_label);
4945         EmitLdrOffsetPlaceholder(add_label, lr, lr);
4946         break;
4947       }
4948       FALLTHROUGH_INTENDED;
4949     default:
4950       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
4951       break;
4952   }
4953 
4954   auto call_lr = [&]() {
4955     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4956     ExactAssemblyScope eas(GetVIXLAssembler(),
4957                            kInstructionSize,
4958                            CodeBufferCheckScope::kExactSize);
4959     // lr()
4960     __ blr(lr);
4961     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4962   };
4963   switch (invoke->GetCodePtrLocation()) {
4964     case CodePtrLocation::kCallSelf:
4965       {
4966         DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
4967         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4968         ExactAssemblyScope eas(GetVIXLAssembler(),
4969                                kInstructionSize,
4970                                CodeBufferCheckScope::kExactSize);
4971         __ bl(&frame_entry_label_);
4972         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4973       }
4974       break;
4975     case CodePtrLocation::kCallCriticalNative: {
4976       size_t out_frame_size =
4977           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
4978                                     kAapcs64StackAlignment,
4979                                     GetCriticalNativeDirectCallFrameSize>(invoke);
4980       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
4981         call_lr();
4982       } else {
4983         // LR = callee_method->ptr_sized_fields_.data_;  // EntryPointFromJni
4984         MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kArm64PointerSize);
4985         __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
4986         // lr()
4987         call_lr();
4988       }
4989       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
4990       switch (invoke->GetType()) {
4991         case DataType::Type::kBool:
4992           __ Ubfx(w0, w0, 0, 8);
4993           break;
4994         case DataType::Type::kInt8:
4995           __ Sbfx(w0, w0, 0, 8);
4996           break;
4997         case DataType::Type::kUint16:
4998           __ Ubfx(w0, w0, 0, 16);
4999           break;
5000         case DataType::Type::kInt16:
5001           __ Sbfx(w0, w0, 0, 16);
5002           break;
5003         case DataType::Type::kInt32:
5004         case DataType::Type::kInt64:
5005         case DataType::Type::kFloat32:
5006         case DataType::Type::kFloat64:
5007         case DataType::Type::kVoid:
5008           break;
5009         default:
5010           DCHECK(false) << invoke->GetType();
5011           break;
5012       }
5013       if (out_frame_size != 0u) {
5014         DecreaseFrame(out_frame_size);
5015       }
5016       break;
5017     }
5018     case CodePtrLocation::kCallArtMethod: {
5019       // LR = callee_method->ptr_sized_fields_.entry_point_from_quick_compiled_code_;
5020       MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
5021       __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
5022       // lr()
5023       call_lr();
5024       break;
5025     }
5026   }
5027 
5028   DCHECK(!IsLeafMethod());
5029 }
5030 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5031 void CodeGeneratorARM64::GenerateVirtualCall(
5032     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5033   // Use the calling convention instead of the location of the receiver, as
5034   // intrinsics may have put the receiver in a different register. In the intrinsics
5035   // slow path, the arguments have been moved to the right place, so here we are
5036   // guaranteed that the receiver is the first register of the calling convention.
5037   InvokeDexCallingConvention calling_convention;
5038   Register receiver = calling_convention.GetRegisterAt(0);
5039   Register temp = XRegisterFrom(temp_in);
5040   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5041       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
5042   Offset class_offset = mirror::Object::ClassOffset();
5043   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
5044 
5045   DCHECK(receiver.IsRegister());
5046 
5047   {
5048     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
5049     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5050     // /* HeapReference<Class> */ temp = receiver->klass_
5051     __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
5052     MaybeRecordImplicitNullCheck(invoke);
5053   }
5054   // Instead of simply (possibly) unpoisoning `temp` here, we should
5055   // emit a read barrier for the previous class reference load.
5056   // However this is not required in practice, as this is an
5057   // intermediate/temporary reference and because the current
5058   // concurrent copying collector keeps the from-space memory
5059   // intact/accessible until the end of the marking phase (the
5060   // concurrent copying collector may not in the future).
5061   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
5062 
5063   // If we're compiling baseline, update the inline cache.
5064   MaybeGenerateInlineCacheCheck(invoke, temp);
5065 
5066   // temp = temp->GetMethodAt(method_offset);
5067   __ Ldr(temp, MemOperand(temp, method_offset));
5068   // lr = temp->GetEntryPoint();
5069   __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
5070   {
5071     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
5072     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
5073     // lr();
5074     __ blr(lr);
5075     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5076   }
5077 }
5078 
MoveFromReturnRegister(Location trg,DataType::Type type)5079 void CodeGeneratorARM64::MoveFromReturnRegister(Location trg, DataType::Type type) {
5080   if (!trg.IsValid()) {
5081     DCHECK(type == DataType::Type::kVoid);
5082     return;
5083   }
5084 
5085   DCHECK_NE(type, DataType::Type::kVoid);
5086 
5087   if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
5088     Register trg_reg = RegisterFrom(trg, type);
5089     Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
5090     __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
5091   } else {
5092     VRegister trg_reg = FPRegisterFrom(trg, type);
5093     VRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
5094     __ Fmov(trg_reg, res_reg);
5095   }
5096 }
5097 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)5098 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
5099   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
5100   if (intrinsic.TryDispatch(invoke)) {
5101     return;
5102   }
5103   HandleInvoke(invoke);
5104 }
5105 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)5106 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
5107   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5108     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5109     return;
5110   }
5111   codegen_->GenerateInvokePolymorphicCall(invoke);
5112   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5113 }
5114 
VisitInvokeCustom(HInvokeCustom * invoke)5115 void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
5116   HandleInvoke(invoke);
5117 }
5118 
VisitInvokeCustom(HInvokeCustom * invoke)5119 void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
5120   codegen_->GenerateInvokeCustomCall(invoke);
5121   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5122 }
5123 
NewBootImageIntrinsicPatch(uint32_t intrinsic_data,vixl::aarch64::Label * adrp_label)5124 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch(
5125     uint32_t intrinsic_data,
5126     vixl::aarch64::Label* adrp_label) {
5127   return NewPcRelativePatch(
5128       /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_other_patches_);
5129 }
5130 
NewBootImageRelRoPatch(uint32_t boot_image_offset,vixl::aarch64::Label * adrp_label)5131 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
5132     uint32_t boot_image_offset,
5133     vixl::aarch64::Label* adrp_label) {
5134   return NewPcRelativePatch(
5135       /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_other_patches_);
5136 }
5137 
NewBootImageMethodPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)5138 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
5139     MethodReference target_method,
5140     vixl::aarch64::Label* adrp_label) {
5141   return NewPcRelativePatch(
5142       target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_);
5143 }
5144 
NewMethodBssEntryPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)5145 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
5146     MethodReference target_method,
5147     vixl::aarch64::Label* adrp_label) {
5148   return NewPcRelativePatch(
5149       target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_);
5150 }
5151 
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)5152 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch(
5153     const DexFile& dex_file,
5154     dex::TypeIndex type_index,
5155     vixl::aarch64::Label* adrp_label) {
5156   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_);
5157 }
5158 
NewAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)5159 vixl::aarch64::Label* CodeGeneratorARM64::NewAppImageTypePatch(
5160     const DexFile& dex_file,
5161     dex::TypeIndex type_index,
5162     vixl::aarch64::Label* adrp_label) {
5163   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &app_image_type_patches_);
5164 }
5165 
NewBssEntryTypePatch(HLoadClass * load_class,vixl::aarch64::Label * adrp_label)5166 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
5167     HLoadClass* load_class,
5168     vixl::aarch64::Label* adrp_label) {
5169   const DexFile& dex_file = load_class->GetDexFile();
5170   dex::TypeIndex type_index = load_class->GetTypeIndex();
5171   ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
5172   switch (load_class->GetLoadKind()) {
5173     case HLoadClass::LoadKind::kBssEntry:
5174       patches = &type_bss_entry_patches_;
5175       break;
5176     case HLoadClass::LoadKind::kBssEntryPublic:
5177       patches = &public_type_bss_entry_patches_;
5178       break;
5179     case HLoadClass::LoadKind::kBssEntryPackage:
5180       patches = &package_type_bss_entry_patches_;
5181       break;
5182     default:
5183       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5184       UNREACHABLE();
5185   }
5186   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, patches);
5187 }
5188 
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)5189 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch(
5190     const DexFile& dex_file,
5191     dex::StringIndex string_index,
5192     vixl::aarch64::Label* adrp_label) {
5193   return NewPcRelativePatch(
5194       &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_);
5195 }
5196 
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)5197 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
5198     const DexFile& dex_file,
5199     dex::StringIndex string_index,
5200     vixl::aarch64::Label* adrp_label) {
5201   return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
5202 }
5203 
NewBootImageJniEntrypointPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)5204 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageJniEntrypointPatch(
5205     MethodReference target_method,
5206     vixl::aarch64::Label* adrp_label) {
5207   return NewPcRelativePatch(
5208       target_method.dex_file, target_method.index, adrp_label, &boot_image_jni_entrypoint_patches_);
5209 }
5210 
EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset)5211 void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) {
5212   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
5213   DCHECK(!GetCompilerOptions().IsJitCompiler());
5214   call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
5215   vixl::aarch64::Label* bl_label = &call_entrypoint_patches_.back().label;
5216   __ bind(bl_label);
5217   __ bl(static_cast<int64_t>(0));  // Placeholder, patched at link-time.
5218 }
5219 
EmitBakerReadBarrierCbnz(uint32_t custom_data)5220 void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) {
5221   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
5222   if (GetCompilerOptions().IsJitCompiler()) {
5223     auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
5224     vixl::aarch64::Label* slow_path_entry = &it->second.label;
5225     __ cbnz(mr, slow_path_entry);
5226   } else {
5227     baker_read_barrier_patches_.emplace_back(custom_data);
5228     vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label;
5229     __ bind(cbnz_label);
5230     __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
5231   }
5232 }
5233 
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,vixl::aarch64::Label * adrp_label,ArenaDeque<PcRelativePatchInfo> * patches)5234 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
5235     const DexFile* dex_file,
5236     uint32_t offset_or_index,
5237     vixl::aarch64::Label* adrp_label,
5238     ArenaDeque<PcRelativePatchInfo>* patches) {
5239   // Add a patch entry and return the label.
5240   patches->emplace_back(dex_file, offset_or_index);
5241   PcRelativePatchInfo* info = &patches->back();
5242   vixl::aarch64::Label* label = &info->label;
5243   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
5244   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
5245   return label;
5246 }
5247 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)5248 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
5249   jit_patches_.EmitJitRootPatches(code, roots_data, *GetCodeGenerationData());
5250 }
5251 
EmitAdrpPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register reg)5252 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
5253                                              vixl::aarch64::Register reg) {
5254   DCHECK(reg.IsX());
5255   SingleEmissionCheckScope guard(GetVIXLAssembler());
5256   __ Bind(fixup_label);
5257   __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
5258 }
5259 
EmitAddPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)5260 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
5261                                             vixl::aarch64::Register out,
5262                                             vixl::aarch64::Register base) {
5263   DCHECK(out.IsX());
5264   DCHECK(base.IsX());
5265   SingleEmissionCheckScope guard(GetVIXLAssembler());
5266   __ Bind(fixup_label);
5267   __ add(out, base, Operand(/* offset placeholder */ 0));
5268 }
5269 
EmitLdrOffsetPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)5270 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
5271                                                   vixl::aarch64::Register out,
5272                                                   vixl::aarch64::Register base) {
5273   DCHECK(base.IsX());
5274   SingleEmissionCheckScope guard(GetVIXLAssembler());
5275   __ Bind(fixup_label);
5276   __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
5277 }
5278 
LoadBootImageRelRoEntry(vixl::aarch64::Register reg,uint32_t boot_image_offset)5279 void CodeGeneratorARM64::LoadBootImageRelRoEntry(vixl::aarch64::Register reg,
5280                                                  uint32_t boot_image_offset) {
5281   DCHECK(reg.IsW());
5282   // Add ADRP with its PC-relative boot image .data.img.rel.ro patch.
5283   vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
5284   EmitAdrpPlaceholder(adrp_label, reg.X());
5285   // Add LDR with its PC-relative boot image .data.img.rel.ro patch.
5286   vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
5287   EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X());
5288 }
5289 
LoadBootImageAddress(vixl::aarch64::Register reg,uint32_t boot_image_reference)5290 void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
5291                                               uint32_t boot_image_reference) {
5292   if (GetCompilerOptions().IsBootImage()) {
5293     // Add ADRP with its PC-relative type patch.
5294     vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference);
5295     EmitAdrpPlaceholder(adrp_label, reg.X());
5296     // Add ADD with its PC-relative type patch.
5297     vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label);
5298     EmitAddPlaceholder(add_label, reg.X(), reg.X());
5299   } else if (GetCompilerOptions().GetCompilePic()) {
5300     LoadBootImageRelRoEntry(reg, boot_image_reference);
5301   } else {
5302     DCHECK(GetCompilerOptions().IsJitCompiler());
5303     gc::Heap* heap = Runtime::Current()->GetHeap();
5304     DCHECK(!heap->GetBootImageSpaces().empty());
5305     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5306     __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address)));
5307   }
5308 }
5309 
LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,TypeReference target_type)5310 void CodeGeneratorARM64::LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,
5311                                                        TypeReference target_type) {
5312   // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5313   DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5314   // Add ADRP with its PC-relative type patch.
5315   vixl::aarch64::Label* adrp_label =
5316       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
5317   EmitAdrpPlaceholder(adrp_label, reg.X());
5318   // Add ADD with its PC-relative type patch.
5319   vixl::aarch64::Label* add_label =
5320       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex(), adrp_label);
5321   EmitAddPlaceholder(add_label, reg.X(), reg.X());
5322 }
5323 
LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg,HInvoke * invoke)5324 void CodeGeneratorARM64::LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg, HInvoke* invoke) {
5325   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5326   if (GetCompilerOptions().IsBootImage()) {
5327     MethodReference target_method = invoke->GetResolvedMethodReference();
5328     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5329     LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
5330   } else {
5331     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5332     LoadBootImageAddress(reg, boot_image_offset);
5333   }
5334 }
5335 
LoadClassRootForIntrinsic(vixl::aarch64::Register reg,ClassRoot class_root)5336 void CodeGeneratorARM64::LoadClassRootForIntrinsic(vixl::aarch64::Register reg,
5337                                                    ClassRoot class_root) {
5338   if (GetCompilerOptions().IsBootImage()) {
5339     ScopedObjectAccess soa(Thread::Current());
5340     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
5341     TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
5342     LoadTypeForBootImageIntrinsic(reg, target_type);
5343   } else {
5344     uint32_t boot_image_offset = GetBootImageOffset(class_root);
5345     LoadBootImageAddress(reg, boot_image_offset);
5346   }
5347 }
5348 
5349 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5350 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
5351     const ArenaDeque<PcRelativePatchInfo>& infos,
5352     ArenaVector<linker::LinkerPatch>* linker_patches) {
5353   for (const PcRelativePatchInfo& info : infos) {
5354     linker_patches->push_back(Factory(info.label.GetLocation(),
5355                                       info.target_dex_file,
5356                                       info.pc_insn_label->GetLocation(),
5357                                       info.offset_or_index));
5358   }
5359 }
5360 
5361 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5362 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5363                                      const DexFile* target_dex_file,
5364                                      uint32_t pc_insn_offset,
5365                                      uint32_t boot_image_offset) {
5366   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
5367   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5368 }
5369 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5370 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5371   DCHECK(linker_patches->empty());
5372   size_t size =
5373       boot_image_method_patches_.size() +
5374       method_bss_entry_patches_.size() +
5375       boot_image_type_patches_.size() +
5376       app_image_type_patches_.size() +
5377       type_bss_entry_patches_.size() +
5378       public_type_bss_entry_patches_.size() +
5379       package_type_bss_entry_patches_.size() +
5380       boot_image_string_patches_.size() +
5381       string_bss_entry_patches_.size() +
5382       boot_image_jni_entrypoint_patches_.size() +
5383       boot_image_other_patches_.size() +
5384       call_entrypoint_patches_.size() +
5385       baker_read_barrier_patches_.size();
5386   linker_patches->reserve(size);
5387   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5388     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5389         boot_image_method_patches_, linker_patches);
5390     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5391         boot_image_type_patches_, linker_patches);
5392     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5393         boot_image_string_patches_, linker_patches);
5394   } else {
5395     DCHECK(boot_image_method_patches_.empty());
5396     DCHECK(boot_image_type_patches_.empty());
5397     DCHECK(boot_image_string_patches_.empty());
5398   }
5399   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
5400   if (GetCompilerOptions().IsBootImage()) {
5401     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5402         boot_image_other_patches_, linker_patches);
5403   } else {
5404     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
5405         boot_image_other_patches_, linker_patches);
5406     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
5407         app_image_type_patches_, linker_patches);
5408   }
5409   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5410       method_bss_entry_patches_, linker_patches);
5411   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5412       type_bss_entry_patches_, linker_patches);
5413   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5414       public_type_bss_entry_patches_, linker_patches);
5415   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5416       package_type_bss_entry_patches_, linker_patches);
5417   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5418       string_bss_entry_patches_, linker_patches);
5419   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5420       boot_image_jni_entrypoint_patches_, linker_patches);
5421   for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) {
5422     DCHECK(info.target_dex_file == nullptr);
5423     linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
5424         info.label.GetLocation(), info.offset_or_index));
5425   }
5426   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
5427     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
5428         info.label.GetLocation(), info.custom_data));
5429   }
5430   DCHECK_EQ(size, linker_patches->size());
5431 }
5432 
NeedsThunkCode(const linker::LinkerPatch & patch) const5433 bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
5434   return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
5435          patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
5436          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
5437 }
5438 
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)5439 void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
5440                                        /*out*/ ArenaVector<uint8_t>* code,
5441                                        /*out*/ std::string* debug_name) {
5442   Arm64Assembler assembler(GetGraph()->GetAllocator());
5443   switch (patch.GetType()) {
5444     case linker::LinkerPatch::Type::kCallRelative: {
5445       // The thunk just uses the entry point in the ArtMethod. This works even for calls
5446       // to the generic JNI and interpreter trampolines.
5447       Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5448           kArm64PointerSize).Int32Value());
5449       assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
5450       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
5451         *debug_name = "MethodCallThunk";
5452       }
5453       break;
5454     }
5455     case linker::LinkerPatch::Type::kCallEntrypoint: {
5456       Offset offset(patch.EntrypointOffset());
5457       assembler.JumpTo(ManagedRegister(arm64::TR), offset, ManagedRegister(arm64::IP0));
5458       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
5459         *debug_name = "EntrypointCallThunk_" + std::to_string(offset.Uint32Value());
5460       }
5461       break;
5462     }
5463     case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
5464       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
5465       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
5466       break;
5467     }
5468     default:
5469       LOG(FATAL) << "Unexpected patch type " << patch.GetType();
5470       UNREACHABLE();
5471   }
5472 
5473   // Ensure we emit the literal pool if any.
5474   assembler.FinalizeCode();
5475   code->resize(assembler.CodeSize());
5476   MemoryRegion code_region(code->data(), code->size());
5477   assembler.CopyInstructions(code_region);
5478 }
5479 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)5480 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
5481   // Explicit clinit checks triggered by static invokes must have been pruned by
5482   // art::PrepareForRegisterAllocation.
5483   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
5484 
5485   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5486     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5487     return;
5488   }
5489 
5490   LocationSummary* locations = invoke->GetLocations();
5491   codegen_->GenerateStaticOrDirectCall(
5492       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
5493 
5494   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5495 }
5496 
VisitInvokeVirtual(HInvokeVirtual * invoke)5497 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
5498   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5499     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5500     return;
5501   }
5502 
5503   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
5504   DCHECK(!codegen_->IsLeafMethod());
5505 
5506   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5507 }
5508 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5509 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
5510     HLoadClass::LoadKind desired_class_load_kind) {
5511   switch (desired_class_load_kind) {
5512     case HLoadClass::LoadKind::kInvalid:
5513       LOG(FATAL) << "UNREACHABLE";
5514       UNREACHABLE();
5515     case HLoadClass::LoadKind::kReferrersClass:
5516       break;
5517     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5518     case HLoadClass::LoadKind::kBootImageRelRo:
5519     case HLoadClass::LoadKind::kAppImageRelRo:
5520     case HLoadClass::LoadKind::kBssEntry:
5521     case HLoadClass::LoadKind::kBssEntryPublic:
5522     case HLoadClass::LoadKind::kBssEntryPackage:
5523       DCHECK(!GetCompilerOptions().IsJitCompiler());
5524       break;
5525     case HLoadClass::LoadKind::kJitBootImageAddress:
5526     case HLoadClass::LoadKind::kJitTableAddress:
5527       DCHECK(GetCompilerOptions().IsJitCompiler());
5528       break;
5529     case HLoadClass::LoadKind::kRuntimeCall:
5530       break;
5531   }
5532   return desired_class_load_kind;
5533 }
5534 
VisitLoadClass(HLoadClass * cls)5535 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
5536   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5537   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5538     InvokeRuntimeCallingConvention calling_convention;
5539     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5540         cls,
5541         LocationFrom(calling_convention.GetRegisterAt(0)),
5542         LocationFrom(vixl::aarch64::x0));
5543     DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
5544     return;
5545   }
5546   DCHECK_EQ(cls->NeedsAccessCheck(),
5547             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5548                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
5549 
5550   const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
5551   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5552       ? LocationSummary::kCallOnSlowPath
5553       : LocationSummary::kNoCall;
5554   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5555   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5556     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5557   }
5558 
5559   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5560     locations->SetInAt(0, Location::RequiresRegister());
5561   }
5562   locations->SetOut(Location::RequiresRegister());
5563   if (load_kind == HLoadClass::LoadKind::kBssEntry ||
5564       load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5565       load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
5566     if (codegen_->EmitNonBakerReadBarrier()) {
5567       // For non-Baker read barrier we have a temp-clobbering call.
5568     } else {
5569       // Rely on the type resolution or initialization and marking to save everything we need.
5570       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5571     }
5572   }
5573 }
5574 
5575 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5576 // move.
VisitLoadClass(HLoadClass * cls)5577 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5578   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5579   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5580     codegen_->GenerateLoadClassRuntimeCall(cls);
5581     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5582     return;
5583   }
5584   DCHECK_EQ(cls->NeedsAccessCheck(),
5585             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5586                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
5587 
5588   Location out_loc = cls->GetLocations()->Out();
5589   Register out = OutputRegister(cls);
5590 
5591   const ReadBarrierOption read_barrier_option =
5592       cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
5593   bool generate_null_check = false;
5594   switch (load_kind) {
5595     case HLoadClass::LoadKind::kReferrersClass: {
5596       DCHECK(!cls->CanCallRuntime());
5597       DCHECK(!cls->MustGenerateClinitCheck());
5598       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5599       Register current_method = InputRegisterAt(cls, 0);
5600       codegen_->GenerateGcRootFieldLoad(cls,
5601                                         out_loc,
5602                                         current_method,
5603                                         ArtMethod::DeclaringClassOffset().Int32Value(),
5604                                         /* fixup_label= */ nullptr,
5605                                         read_barrier_option);
5606       break;
5607     }
5608     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
5609       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
5610              codegen_->GetCompilerOptions().IsBootImageExtension());
5611       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5612       // Add ADRP with its PC-relative type patch.
5613       const DexFile& dex_file = cls->GetDexFile();
5614       dex::TypeIndex type_index = cls->GetTypeIndex();
5615       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
5616       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5617       // Add ADD with its PC-relative type patch.
5618       vixl::aarch64::Label* add_label =
5619           codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
5620       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5621       break;
5622     }
5623     case HLoadClass::LoadKind::kBootImageRelRo: {
5624       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5625       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
5626       codegen_->LoadBootImageRelRoEntry(out.W(), boot_image_offset);
5627       break;
5628     }
5629     case HLoadClass::LoadKind::kAppImageRelRo: {
5630       DCHECK(codegen_->GetCompilerOptions().IsAppImage());
5631       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5632       // Add ADRP with its PC-relative type patch.
5633       const DexFile& dex_file = cls->GetDexFile();
5634       dex::TypeIndex type_index = cls->GetTypeIndex();
5635       vixl::aarch64::Label* adrp_label = codegen_->NewAppImageTypePatch(dex_file, type_index);
5636       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5637       // Add LDR with its PC-relative type patch.
5638       vixl::aarch64::Label* ldr_label =
5639           codegen_->NewAppImageTypePatch(dex_file, type_index, adrp_label);
5640       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
5641       break;
5642     }
5643     case HLoadClass::LoadKind::kBssEntry:
5644     case HLoadClass::LoadKind::kBssEntryPublic:
5645     case HLoadClass::LoadKind::kBssEntryPackage: {
5646       // Add ADRP with its PC-relative Class .bss entry patch.
5647       vixl::aarch64::Register temp = XRegisterFrom(out_loc);
5648       vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(cls);
5649       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5650       // Add LDR with its PC-relative Class .bss entry patch.
5651       vixl::aarch64::Label* ldr_label = codegen_->NewBssEntryTypePatch(cls, adrp_label);
5652       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
5653       // All aligned loads are implicitly atomic consume operations on ARM64.
5654       codegen_->GenerateGcRootFieldLoad(cls,
5655                                         out_loc,
5656                                         temp,
5657                                         /* offset placeholder */ 0u,
5658                                         ldr_label,
5659                                         read_barrier_option);
5660       generate_null_check = true;
5661       break;
5662     }
5663     case HLoadClass::LoadKind::kJitBootImageAddress: {
5664       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5665       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
5666       DCHECK_NE(address, 0u);
5667       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5668       break;
5669     }
5670     case HLoadClass::LoadKind::kJitTableAddress: {
5671       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
5672                                                        cls->GetTypeIndex(),
5673                                                        cls->GetClass()));
5674       codegen_->GenerateGcRootFieldLoad(cls,
5675                                         out_loc,
5676                                         out.X(),
5677                                         /* offset= */ 0,
5678                                         /* fixup_label= */ nullptr,
5679                                         read_barrier_option);
5680       break;
5681     }
5682     case HLoadClass::LoadKind::kRuntimeCall:
5683     case HLoadClass::LoadKind::kInvalid:
5684       LOG(FATAL) << "UNREACHABLE";
5685       UNREACHABLE();
5686   }
5687 
5688   bool do_clinit = cls->MustGenerateClinitCheck();
5689   if (generate_null_check || do_clinit) {
5690     DCHECK(cls->CanCallRuntime());
5691     SlowPathCodeARM64* slow_path =
5692         new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls);
5693     codegen_->AddSlowPath(slow_path);
5694     if (generate_null_check) {
5695       __ Cbz(out, slow_path->GetEntryLabel());
5696     }
5697     if (cls->MustGenerateClinitCheck()) {
5698       GenerateClassInitializationCheck(slow_path, out);
5699     } else {
5700       __ Bind(slow_path->GetExitLabel());
5701     }
5702     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5703   }
5704 }
5705 
VisitLoadMethodHandle(HLoadMethodHandle * load)5706 void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5707   InvokeRuntimeCallingConvention calling_convention;
5708   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5709   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
5710 }
5711 
VisitLoadMethodHandle(HLoadMethodHandle * load)5712 void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5713   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
5714 }
5715 
VisitLoadMethodType(HLoadMethodType * load)5716 void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) {
5717   InvokeRuntimeCallingConvention calling_convention;
5718   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5719   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
5720 }
5721 
VisitLoadMethodType(HLoadMethodType * load)5722 void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) {
5723   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
5724 }
5725 
GetExceptionTlsAddress()5726 static MemOperand GetExceptionTlsAddress() {
5727   return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
5728 }
5729 
VisitLoadException(HLoadException * load)5730 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
5731   LocationSummary* locations =
5732       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
5733   locations->SetOut(Location::RequiresRegister());
5734 }
5735 
VisitLoadException(HLoadException * instruction)5736 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
5737   __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
5738 }
5739 
VisitClearException(HClearException * clear)5740 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
5741   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
5742 }
5743 
VisitClearException(HClearException * clear)5744 void InstructionCodeGeneratorARM64::VisitClearException([[maybe_unused]] HClearException* clear) {
5745   __ Str(wzr, GetExceptionTlsAddress());
5746 }
5747 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5748 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
5749     HLoadString::LoadKind desired_string_load_kind) {
5750   switch (desired_string_load_kind) {
5751     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5752     case HLoadString::LoadKind::kBootImageRelRo:
5753     case HLoadString::LoadKind::kBssEntry:
5754       DCHECK(!GetCompilerOptions().IsJitCompiler());
5755       break;
5756     case HLoadString::LoadKind::kJitBootImageAddress:
5757     case HLoadString::LoadKind::kJitTableAddress:
5758       DCHECK(GetCompilerOptions().IsJitCompiler());
5759       break;
5760     case HLoadString::LoadKind::kRuntimeCall:
5761       break;
5762   }
5763   return desired_string_load_kind;
5764 }
5765 
VisitLoadString(HLoadString * load)5766 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
5767   LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
5768   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
5769   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
5770     InvokeRuntimeCallingConvention calling_convention;
5771     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
5772   } else {
5773     locations->SetOut(Location::RequiresRegister());
5774     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5775       if (codegen_->EmitNonBakerReadBarrier()) {
5776         // For non-Baker read barrier we have a temp-clobbering call.
5777       } else {
5778         // Rely on the pResolveString and marking to save everything we need.
5779         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5780       }
5781     }
5782   }
5783 }
5784 
5785 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5786 // move.
VisitLoadString(HLoadString * load)5787 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5788   Register out = OutputRegister(load);
5789   Location out_loc = load->GetLocations()->Out();
5790 
5791   switch (load->GetLoadKind()) {
5792     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5793       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
5794              codegen_->GetCompilerOptions().IsBootImageExtension());
5795       // Add ADRP with its PC-relative String patch.
5796       const DexFile& dex_file = load->GetDexFile();
5797       const dex::StringIndex string_index = load->GetStringIndex();
5798       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
5799       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5800       // Add ADD with its PC-relative String patch.
5801       vixl::aarch64::Label* add_label =
5802           codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
5803       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5804       return;
5805     }
5806     case HLoadString::LoadKind::kBootImageRelRo: {
5807       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5808       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
5809       codegen_->LoadBootImageRelRoEntry(out.W(), boot_image_offset);
5810       return;
5811     }
5812     case HLoadString::LoadKind::kBssEntry: {
5813       // Add ADRP with its PC-relative String .bss entry patch.
5814       const DexFile& dex_file = load->GetDexFile();
5815       const dex::StringIndex string_index = load->GetStringIndex();
5816       Register temp = XRegisterFrom(out_loc);
5817       vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
5818       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5819       // Add LDR with its PC-relative String .bss entry patch.
5820       vixl::aarch64::Label* ldr_label =
5821           codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
5822       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
5823       // All aligned loads are implicitly atomic consume operations on ARM64.
5824       codegen_->GenerateGcRootFieldLoad(load,
5825                                         out_loc,
5826                                         temp,
5827                                         /* offset placeholder */ 0u,
5828                                         ldr_label,
5829                                         codegen_->GetCompilerReadBarrierOption());
5830       SlowPathCodeARM64* slow_path =
5831           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
5832       codegen_->AddSlowPath(slow_path);
5833       __ Cbz(out.X(), slow_path->GetEntryLabel());
5834       __ Bind(slow_path->GetExitLabel());
5835       codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5836       return;
5837     }
5838     case HLoadString::LoadKind::kJitBootImageAddress: {
5839       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
5840       DCHECK_NE(address, 0u);
5841       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5842       return;
5843     }
5844     case HLoadString::LoadKind::kJitTableAddress: {
5845       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
5846                                                         load->GetStringIndex(),
5847                                                         load->GetString()));
5848       codegen_->GenerateGcRootFieldLoad(load,
5849                                         out_loc,
5850                                         out.X(),
5851                                         /* offset= */ 0,
5852                                         /* fixup_label= */ nullptr,
5853                                         codegen_->GetCompilerReadBarrierOption());
5854       return;
5855     }
5856     default:
5857       break;
5858   }
5859 
5860   InvokeRuntimeCallingConvention calling_convention;
5861   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
5862   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
5863   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
5864   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5865   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5866 }
5867 
VisitLongConstant(HLongConstant * constant)5868 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
5869   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
5870   locations->SetOut(Location::ConstantLocation(constant));
5871 }
5872 
VisitLongConstant(HLongConstant * constant)5873 void InstructionCodeGeneratorARM64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
5874   // Will be generated at use site.
5875 }
5876 
VisitMonitorOperation(HMonitorOperation * instruction)5877 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5878   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5879       instruction, LocationSummary::kCallOnMainOnly);
5880   InvokeRuntimeCallingConvention calling_convention;
5881   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5882 }
5883 
VisitMonitorOperation(HMonitorOperation * instruction)5884 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5885   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
5886                           instruction,
5887                           instruction->GetDexPc());
5888   if (instruction->IsEnter()) {
5889     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
5890   } else {
5891     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
5892   }
5893   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5894 }
5895 
VisitMul(HMul * mul)5896 void LocationsBuilderARM64::VisitMul(HMul* mul) {
5897   LocationSummary* locations =
5898       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
5899   switch (mul->GetResultType()) {
5900     case DataType::Type::kInt32:
5901     case DataType::Type::kInt64:
5902       locations->SetInAt(0, Location::RequiresRegister());
5903       locations->SetInAt(1, Location::RequiresRegister());
5904       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5905       break;
5906 
5907     case DataType::Type::kFloat32:
5908     case DataType::Type::kFloat64:
5909       locations->SetInAt(0, Location::RequiresFpuRegister());
5910       locations->SetInAt(1, Location::RequiresFpuRegister());
5911       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5912       break;
5913 
5914     default:
5915       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5916   }
5917 }
5918 
VisitMul(HMul * mul)5919 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
5920   switch (mul->GetResultType()) {
5921     case DataType::Type::kInt32:
5922     case DataType::Type::kInt64:
5923       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
5924       break;
5925 
5926     case DataType::Type::kFloat32:
5927     case DataType::Type::kFloat64:
5928       __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
5929       break;
5930 
5931     default:
5932       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5933   }
5934 }
5935 
VisitNeg(HNeg * neg)5936 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
5937   LocationSummary* locations =
5938       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
5939   switch (neg->GetResultType()) {
5940     case DataType::Type::kInt32:
5941     case DataType::Type::kInt64:
5942       locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
5943       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5944       break;
5945 
5946     case DataType::Type::kFloat32:
5947     case DataType::Type::kFloat64:
5948       locations->SetInAt(0, Location::RequiresFpuRegister());
5949       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5950       break;
5951 
5952     default:
5953       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5954   }
5955 }
5956 
VisitNeg(HNeg * neg)5957 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
5958   switch (neg->GetResultType()) {
5959     case DataType::Type::kInt32:
5960     case DataType::Type::kInt64:
5961       __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
5962       break;
5963 
5964     case DataType::Type::kFloat32:
5965     case DataType::Type::kFloat64:
5966       __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
5967       break;
5968 
5969     default:
5970       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5971   }
5972 }
5973 
VisitNewArray(HNewArray * instruction)5974 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5975   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5976       instruction, LocationSummary::kCallOnMainOnly);
5977   InvokeRuntimeCallingConvention calling_convention;
5978   locations->SetOut(LocationFrom(x0));
5979   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5980   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5981 }
5982 
VisitNewArray(HNewArray * instruction)5983 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5984   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5985   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5986   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5987   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5988   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5989 }
5990 
VisitNewInstance(HNewInstance * instruction)5991 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5992   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5993       instruction, LocationSummary::kCallOnMainOnly);
5994   InvokeRuntimeCallingConvention calling_convention;
5995   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5996   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
5997 }
5998 
VisitNewInstance(HNewInstance * instruction)5999 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
6000   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
6001   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
6002   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6003 }
6004 
VisitNot(HNot * instruction)6005 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
6006   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6007   locations->SetInAt(0, Location::RequiresRegister());
6008   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6009 }
6010 
VisitNot(HNot * instruction)6011 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
6012   switch (instruction->GetResultType()) {
6013     case DataType::Type::kInt32:
6014     case DataType::Type::kInt64:
6015       __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
6016       break;
6017 
6018     default:
6019       LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
6020   }
6021 }
6022 
VisitBooleanNot(HBooleanNot * instruction)6023 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
6024   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6025   locations->SetInAt(0, Location::RequiresRegister());
6026   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6027 }
6028 
VisitBooleanNot(HBooleanNot * instruction)6029 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
6030   __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
6031 }
6032 
VisitNullCheck(HNullCheck * instruction)6033 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
6034   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6035   locations->SetInAt(0, Location::RequiresRegister());
6036 }
6037 
GenerateImplicitNullCheck(HNullCheck * instruction)6038 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
6039   if (CanMoveNullCheckToUser(instruction)) {
6040     return;
6041   }
6042   {
6043     // Ensure that between load and RecordPcInfo there are no pools emitted.
6044     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6045     Location obj = instruction->GetLocations()->InAt(0);
6046     __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
6047     RecordPcInfo(instruction, instruction->GetDexPc());
6048   }
6049 }
6050 
GenerateExplicitNullCheck(HNullCheck * instruction)6051 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
6052   SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction);
6053   AddSlowPath(slow_path);
6054 
6055   LocationSummary* locations = instruction->GetLocations();
6056   Location obj = locations->InAt(0);
6057 
6058   __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
6059 }
6060 
VisitNullCheck(HNullCheck * instruction)6061 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
6062   codegen_->GenerateNullCheck(instruction);
6063 }
6064 
VisitOr(HOr * instruction)6065 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
6066   HandleBinaryOp(instruction);
6067 }
6068 
VisitOr(HOr * instruction)6069 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
6070   HandleBinaryOp(instruction);
6071 }
6072 
VisitParallelMove(HParallelMove * instruction)6073 void LocationsBuilderARM64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6074   LOG(FATAL) << "Unreachable";
6075 }
6076 
VisitParallelMove(HParallelMove * instruction)6077 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
6078   if (instruction->GetNext()->IsSuspendCheck() &&
6079       instruction->GetBlock()->GetLoopInformation() != nullptr) {
6080     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6081     // The back edge will generate the suspend check.
6082     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6083   }
6084 
6085   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6086 }
6087 
VisitParameterValue(HParameterValue * instruction)6088 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
6089   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6090   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
6091   if (location.IsStackSlot()) {
6092     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
6093   } else if (location.IsDoubleStackSlot()) {
6094     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
6095   }
6096   locations->SetOut(location);
6097 }
6098 
VisitParameterValue(HParameterValue * instruction)6099 void InstructionCodeGeneratorARM64::VisitParameterValue(
6100     [[maybe_unused]] HParameterValue* instruction) {
6101   // Nothing to do, the parameter is already at its location.
6102 }
6103 
VisitCurrentMethod(HCurrentMethod * instruction)6104 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
6105   LocationSummary* locations =
6106       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6107   locations->SetOut(LocationFrom(kArtMethodRegister));
6108 }
6109 
VisitCurrentMethod(HCurrentMethod * instruction)6110 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
6111     [[maybe_unused]] HCurrentMethod* instruction) {
6112   // Nothing to do, the method is already at its location.
6113 }
6114 
VisitPhi(HPhi * instruction)6115 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
6116   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6117   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
6118     locations->SetInAt(i, Location::Any());
6119   }
6120   locations->SetOut(Location::Any());
6121 }
6122 
VisitPhi(HPhi * instruction)6123 void InstructionCodeGeneratorARM64::VisitPhi([[maybe_unused]] HPhi* instruction) {
6124   LOG(FATAL) << "Unreachable";
6125 }
6126 
VisitRem(HRem * rem)6127 void LocationsBuilderARM64::VisitRem(HRem* rem) {
6128   DataType::Type type = rem->GetResultType();
6129   LocationSummary::CallKind call_kind =
6130       DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
6131                                            : LocationSummary::kNoCall;
6132   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
6133 
6134   switch (type) {
6135     case DataType::Type::kInt32:
6136     case DataType::Type::kInt64:
6137       locations->SetInAt(0, Location::RequiresRegister());
6138       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
6139       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6140       break;
6141 
6142     case DataType::Type::kFloat32:
6143     case DataType::Type::kFloat64: {
6144       InvokeRuntimeCallingConvention calling_convention;
6145       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
6146       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
6147       locations->SetOut(calling_convention.GetReturnLocation(type));
6148 
6149       break;
6150     }
6151 
6152     default:
6153       LOG(FATAL) << "Unexpected rem type " << type;
6154   }
6155 }
6156 
GenerateIntRemForPower2Denom(HRem * instruction)6157 void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) {
6158   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
6159   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
6160   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
6161 
6162   Register out = OutputRegister(instruction);
6163   Register dividend = InputRegisterAt(instruction, 0);
6164 
6165   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
6166     // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
6167     // NOTE: The generated code for HRem correctly works for the INT32_MIN/INT64_MIN dividends.
6168     // INT*_MIN % imm must be 0 for any imm of power 2. 'and' works only with bits
6169     // 0..30 (Int32 case)/0..62 (Int64 case) of a dividend. For INT32_MIN/INT64_MIN they are zeros.
6170     // So 'and' always produces zero.
6171     __ And(out, dividend, abs_imm - 1);
6172   } else {
6173     if (abs_imm == 2) {
6174       __ Cmp(dividend, 0);
6175       __ And(out, dividend, 1);
6176       __ Csneg(out, out, out, ge);
6177     } else {
6178       UseScratchRegisterScope temps(GetVIXLAssembler());
6179       Register temp = temps.AcquireSameSizeAs(out);
6180 
6181       __ Negs(temp, dividend);
6182       __ And(out, dividend, abs_imm - 1);
6183       __ And(temp, temp, abs_imm - 1);
6184       __ Csneg(out, out, temp, mi);
6185     }
6186   }
6187 }
6188 
GenerateIntRemForConstDenom(HRem * instruction)6189 void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) {
6190   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
6191 
6192   if (imm == 0) {
6193     // Do not generate anything.
6194     // DivZeroCheck would prevent any code to be executed.
6195     return;
6196   }
6197 
6198   if (IsPowerOfTwo(AbsOrMin(imm))) {
6199     // Cases imm == -1 or imm == 1 are handled in constant folding by
6200     // InstructionWithAbsorbingInputSimplifier.
6201     // If the cases have survided till code generation they are handled in
6202     // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0).
6203     // The correct code is generated for them, just more instructions.
6204     GenerateIntRemForPower2Denom(instruction);
6205   } else {
6206     DCHECK(imm < -2 || imm > 2) << imm;
6207     GenerateDivRemWithAnyConstant(instruction, imm);
6208   }
6209 }
6210 
GenerateIntRem(HRem * instruction)6211 void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) {
6212   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
6213          << instruction->GetResultType();
6214 
6215   if (instruction->GetLocations()->InAt(1).IsConstant()) {
6216     GenerateIntRemForConstDenom(instruction);
6217   } else {
6218     Register out = OutputRegister(instruction);
6219     Register dividend = InputRegisterAt(instruction, 0);
6220     Register divisor = InputRegisterAt(instruction, 1);
6221     UseScratchRegisterScope temps(GetVIXLAssembler());
6222     Register temp = temps.AcquireSameSizeAs(out);
6223     __ Sdiv(temp, dividend, divisor);
6224     __ Msub(out, temp, divisor, dividend);
6225   }
6226 }
6227 
VisitRem(HRem * rem)6228 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
6229   DataType::Type type = rem->GetResultType();
6230 
6231   switch (type) {
6232     case DataType::Type::kInt32:
6233     case DataType::Type::kInt64: {
6234       GenerateIntRem(rem);
6235       break;
6236     }
6237 
6238     case DataType::Type::kFloat32:
6239     case DataType::Type::kFloat64: {
6240       QuickEntrypointEnum entrypoint =
6241           (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
6242       codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
6243       if (type == DataType::Type::kFloat32) {
6244         CheckEntrypointTypes<kQuickFmodf, float, float, float>();
6245       } else {
6246         CheckEntrypointTypes<kQuickFmod, double, double, double>();
6247       }
6248       break;
6249     }
6250 
6251     default:
6252       LOG(FATAL) << "Unexpected rem type " << type;
6253       UNREACHABLE();
6254   }
6255 }
6256 
VisitMin(HMin * min)6257 void LocationsBuilderARM64::VisitMin(HMin* min) {
6258   HandleBinaryOp(min);
6259 }
6260 
VisitMin(HMin * min)6261 void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
6262   HandleBinaryOp(min);
6263 }
6264 
VisitMax(HMax * max)6265 void LocationsBuilderARM64::VisitMax(HMax* max) {
6266   HandleBinaryOp(max);
6267 }
6268 
VisitMax(HMax * max)6269 void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
6270   HandleBinaryOp(max);
6271 }
6272 
VisitAbs(HAbs * abs)6273 void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
6274   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
6275   switch (abs->GetResultType()) {
6276     case DataType::Type::kInt32:
6277     case DataType::Type::kInt64:
6278       locations->SetInAt(0, Location::RequiresRegister());
6279       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6280       break;
6281     case DataType::Type::kFloat32:
6282     case DataType::Type::kFloat64:
6283       locations->SetInAt(0, Location::RequiresFpuRegister());
6284       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6285       break;
6286     default:
6287       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
6288   }
6289 }
6290 
VisitAbs(HAbs * abs)6291 void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) {
6292   switch (abs->GetResultType()) {
6293     case DataType::Type::kInt32:
6294     case DataType::Type::kInt64: {
6295       Register in_reg = InputRegisterAt(abs, 0);
6296       Register out_reg = OutputRegister(abs);
6297       __ Cmp(in_reg, Operand(0));
6298       __ Cneg(out_reg, in_reg, lt);
6299       break;
6300     }
6301     case DataType::Type::kFloat32:
6302     case DataType::Type::kFloat64: {
6303       VRegister in_reg = InputFPRegisterAt(abs, 0);
6304       VRegister out_reg = OutputFPRegister(abs);
6305       __ Fabs(out_reg, in_reg);
6306       break;
6307     }
6308     default:
6309       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
6310   }
6311 }
6312 
VisitConstructorFence(HConstructorFence * constructor_fence)6313 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
6314   constructor_fence->SetLocations(nullptr);
6315 }
6316 
VisitConstructorFence(HConstructorFence * constructor_fence)6317 void InstructionCodeGeneratorARM64::VisitConstructorFence(
6318     [[maybe_unused]] HConstructorFence* constructor_fence) {
6319   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
6320 }
6321 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)6322 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
6323   memory_barrier->SetLocations(nullptr);
6324 }
6325 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)6326 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
6327   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
6328 }
6329 
VisitReturn(HReturn * instruction)6330 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
6331   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6332   DataType::Type return_type = instruction->InputAt(0)->GetType();
6333   locations->SetInAt(0, ARM64ReturnLocation(return_type));
6334 }
6335 
VisitReturn(HReturn * ret)6336 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* ret) {
6337   if (GetGraph()->IsCompilingOsr()) {
6338     // To simplify callers of an OSR method, we put the return value in both
6339     // floating point and core register.
6340     switch (ret->InputAt(0)->GetType()) {
6341       case DataType::Type::kFloat32:
6342         __ Fmov(w0, s0);
6343         break;
6344       case DataType::Type::kFloat64:
6345         __ Fmov(x0, d0);
6346         break;
6347       default:
6348         break;
6349     }
6350   }
6351   codegen_->GenerateFrameExit();
6352 }
6353 
VisitReturnVoid(HReturnVoid * instruction)6354 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
6355   instruction->SetLocations(nullptr);
6356 }
6357 
VisitReturnVoid(HReturnVoid * instruction)6358 void InstructionCodeGeneratorARM64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) {
6359   codegen_->GenerateFrameExit();
6360 }
6361 
VisitRor(HRor * ror)6362 void LocationsBuilderARM64::VisitRor(HRor* ror) {
6363   HandleBinaryOp(ror);
6364 }
6365 
VisitRor(HRor * ror)6366 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
6367   HandleBinaryOp(ror);
6368 }
6369 
VisitShl(HShl * shl)6370 void LocationsBuilderARM64::VisitShl(HShl* shl) {
6371   HandleShift(shl);
6372 }
6373 
VisitShl(HShl * shl)6374 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
6375   HandleShift(shl);
6376 }
6377 
VisitShr(HShr * shr)6378 void LocationsBuilderARM64::VisitShr(HShr* shr) {
6379   HandleShift(shr);
6380 }
6381 
VisitShr(HShr * shr)6382 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
6383   HandleShift(shr);
6384 }
6385 
VisitSub(HSub * instruction)6386 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
6387   HandleBinaryOp(instruction);
6388 }
6389 
VisitSub(HSub * instruction)6390 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
6391   HandleBinaryOp(instruction);
6392 }
6393 
VisitStaticFieldGet(HStaticFieldGet * instruction)6394 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6395   HandleFieldGet(instruction, instruction->GetFieldInfo());
6396 }
6397 
VisitStaticFieldGet(HStaticFieldGet * instruction)6398 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6399   HandleFieldGet(instruction, instruction->GetFieldInfo());
6400 }
6401 
VisitStaticFieldSet(HStaticFieldSet * instruction)6402 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6403   HandleFieldSet(instruction);
6404 }
6405 
VisitStaticFieldSet(HStaticFieldSet * instruction)6406 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6407   HandleFieldSet(instruction,
6408                  instruction->GetFieldInfo(),
6409                  instruction->GetValueCanBeNull(),
6410                  instruction->GetWriteBarrierKind());
6411 }
6412 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6413 void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6414   codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(x0));
6415 }
6416 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6417 void InstructionCodeGeneratorARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6418   __ Mov(w0, instruction->GetFormat()->GetValue());
6419   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6420 }
6421 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6422 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
6423     HUnresolvedInstanceFieldGet* instruction) {
6424   FieldAccessCallingConventionARM64 calling_convention;
6425   codegen_->CreateUnresolvedFieldLocationSummary(
6426       instruction, instruction->GetFieldType(), calling_convention);
6427 }
6428 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6429 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
6430     HUnresolvedInstanceFieldGet* instruction) {
6431   FieldAccessCallingConventionARM64 calling_convention;
6432   codegen_->GenerateUnresolvedFieldAccess(instruction,
6433                                           instruction->GetFieldType(),
6434                                           instruction->GetFieldIndex(),
6435                                           instruction->GetDexPc(),
6436                                           calling_convention);
6437 }
6438 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6439 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
6440     HUnresolvedInstanceFieldSet* instruction) {
6441   FieldAccessCallingConventionARM64 calling_convention;
6442   codegen_->CreateUnresolvedFieldLocationSummary(
6443       instruction, instruction->GetFieldType(), calling_convention);
6444 }
6445 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6446 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
6447     HUnresolvedInstanceFieldSet* instruction) {
6448   FieldAccessCallingConventionARM64 calling_convention;
6449   codegen_->GenerateUnresolvedFieldAccess(instruction,
6450                                           instruction->GetFieldType(),
6451                                           instruction->GetFieldIndex(),
6452                                           instruction->GetDexPc(),
6453                                           calling_convention);
6454 }
6455 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6456 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
6457     HUnresolvedStaticFieldGet* instruction) {
6458   FieldAccessCallingConventionARM64 calling_convention;
6459   codegen_->CreateUnresolvedFieldLocationSummary(
6460       instruction, instruction->GetFieldType(), calling_convention);
6461 }
6462 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6463 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
6464     HUnresolvedStaticFieldGet* instruction) {
6465   FieldAccessCallingConventionARM64 calling_convention;
6466   codegen_->GenerateUnresolvedFieldAccess(instruction,
6467                                           instruction->GetFieldType(),
6468                                           instruction->GetFieldIndex(),
6469                                           instruction->GetDexPc(),
6470                                           calling_convention);
6471 }
6472 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6473 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
6474     HUnresolvedStaticFieldSet* instruction) {
6475   FieldAccessCallingConventionARM64 calling_convention;
6476   codegen_->CreateUnresolvedFieldLocationSummary(
6477       instruction, instruction->GetFieldType(), calling_convention);
6478 }
6479 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6480 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
6481     HUnresolvedStaticFieldSet* instruction) {
6482   FieldAccessCallingConventionARM64 calling_convention;
6483   codegen_->GenerateUnresolvedFieldAccess(instruction,
6484                                           instruction->GetFieldType(),
6485                                           instruction->GetFieldIndex(),
6486                                           instruction->GetDexPc(),
6487                                           calling_convention);
6488 }
6489 
VisitSuspendCheck(HSuspendCheck * instruction)6490 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
6491   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6492       instruction, LocationSummary::kCallOnSlowPath);
6493   // In suspend check slow path, usually there are no caller-save registers at all.
6494   // If SIMD instructions are present, however, we force spilling all live SIMD
6495   // registers in full width (since the runtime only saves/restores lower part).
6496   // Note that only a suspend check can see live SIMD registers. In the
6497   // loop optimization, we make sure this does not happen for any other slow
6498   // path.
6499   locations->SetCustomSlowPathCallerSaves(
6500       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6501 }
6502 
VisitSuspendCheck(HSuspendCheck * instruction)6503 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
6504   HBasicBlock* block = instruction->GetBlock();
6505   if (block->GetLoopInformation() != nullptr) {
6506     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6507     // The back edge will generate the suspend check.
6508     return;
6509   }
6510   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6511     // The goto will generate the suspend check.
6512     return;
6513   }
6514   GenerateSuspendCheck(instruction, nullptr);
6515   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6516 }
6517 
VisitThrow(HThrow * instruction)6518 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
6519   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6520       instruction, LocationSummary::kCallOnMainOnly);
6521   InvokeRuntimeCallingConvention calling_convention;
6522   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
6523 }
6524 
VisitThrow(HThrow * instruction)6525 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
6526   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6527   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6528 }
6529 
VisitTypeConversion(HTypeConversion * conversion)6530 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
6531   LocationSummary* locations =
6532       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
6533   DataType::Type input_type = conversion->GetInputType();
6534   DataType::Type result_type = conversion->GetResultType();
6535   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6536       << input_type << " -> " << result_type;
6537   if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
6538       (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
6539     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
6540   }
6541 
6542   if (DataType::IsFloatingPointType(input_type)) {
6543     locations->SetInAt(0, Location::RequiresFpuRegister());
6544   } else {
6545     locations->SetInAt(0, Location::RequiresRegister());
6546   }
6547 
6548   if (DataType::IsFloatingPointType(result_type)) {
6549     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6550   } else {
6551     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6552   }
6553 }
6554 
VisitTypeConversion(HTypeConversion * conversion)6555 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
6556   DataType::Type result_type = conversion->GetResultType();
6557   DataType::Type input_type = conversion->GetInputType();
6558 
6559   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6560       << input_type << " -> " << result_type;
6561 
6562   if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
6563     int result_size = DataType::Size(result_type);
6564     int input_size = DataType::Size(input_type);
6565     int min_size = std::min(result_size, input_size);
6566     Register output = OutputRegister(conversion);
6567     Register source = InputRegisterAt(conversion, 0);
6568     if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) {
6569       // 'int' values are used directly as W registers, discarding the top
6570       // bits, so we don't need to sign-extend and can just perform a move.
6571       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
6572       // top 32 bits of the target register. We theoretically could leave those
6573       // bits unchanged, but we would have to make sure that no code uses a
6574       // 32bit input value as a 64bit value assuming that the top 32 bits are
6575       // zero.
6576       __ Mov(output.W(), source.W());
6577     } else if (DataType::IsUnsignedType(result_type) ||
6578                (DataType::IsUnsignedType(input_type) && input_size < result_size)) {
6579       __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte);
6580     } else {
6581       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
6582     }
6583   } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
6584     __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
6585   } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
6586     CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
6587     __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
6588   } else if (DataType::IsFloatingPointType(result_type) &&
6589              DataType::IsFloatingPointType(input_type)) {
6590     __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
6591   } else {
6592     LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
6593                 << " to " << result_type;
6594   }
6595 }
6596 
VisitUShr(HUShr * ushr)6597 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
6598   HandleShift(ushr);
6599 }
6600 
VisitUShr(HUShr * ushr)6601 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
6602   HandleShift(ushr);
6603 }
6604 
VisitXor(HXor * instruction)6605 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
6606   HandleBinaryOp(instruction);
6607 }
6608 
VisitXor(HXor * instruction)6609 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
6610   HandleBinaryOp(instruction);
6611 }
6612 
VisitBoundType(HBoundType * instruction)6613 void LocationsBuilderARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
6614   // Nothing to do, this should be removed during prepare for register allocator.
6615   LOG(FATAL) << "Unreachable";
6616 }
6617 
VisitBoundType(HBoundType * instruction)6618 void InstructionCodeGeneratorARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
6619   // Nothing to do, this should be removed during prepare for register allocator.
6620   LOG(FATAL) << "Unreachable";
6621 }
6622 
6623 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6624 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6625   LocationSummary* locations =
6626       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6627   locations->SetInAt(0, Location::RequiresRegister());
6628 }
6629 
VisitPackedSwitch(HPackedSwitch * switch_instr)6630 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6631   int32_t lower_bound = switch_instr->GetStartValue();
6632   uint32_t num_entries = switch_instr->GetNumEntries();
6633   Register value_reg = InputRegisterAt(switch_instr, 0);
6634   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6635 
6636   // Roughly set 16 as max average assemblies generated per HIR in a graph.
6637   static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
6638   // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
6639   // make sure we don't emit it if the target may run out of range.
6640   // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
6641   // ranges and emit the tables only as required.
6642   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
6643 
6644   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
6645       // Current instruction id is an upper bound of the number of HIRs in the graph.
6646       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
6647     // Create a series of compare/jumps.
6648     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6649     Register temp = temps.AcquireW();
6650     __ Subs(temp, value_reg, Operand(lower_bound));
6651 
6652     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6653     // Jump to successors[0] if value == lower_bound.
6654     __ B(eq, codegen_->GetLabelOf(successors[0]));
6655     int32_t last_index = 0;
6656     for (; num_entries - last_index > 2; last_index += 2) {
6657       __ Subs(temp, temp, Operand(2));
6658       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
6659       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
6660       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
6661       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
6662     }
6663     if (num_entries - last_index == 2) {
6664       // The last missing case_value.
6665       __ Cmp(temp, Operand(1));
6666       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
6667     }
6668 
6669     // And the default for any other value.
6670     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6671       __ B(codegen_->GetLabelOf(default_block));
6672     }
6673   } else {
6674     JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
6675 
6676     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6677 
6678     // Below instructions should use at most one blocked register. Since there are two blocked
6679     // registers, we are free to block one.
6680     Register temp_w = temps.AcquireW();
6681     Register index;
6682     // Remove the bias.
6683     if (lower_bound != 0) {
6684       index = temp_w;
6685       __ Sub(index, value_reg, Operand(lower_bound));
6686     } else {
6687       index = value_reg;
6688     }
6689 
6690     // Jump to default block if index is out of the range.
6691     __ Cmp(index, Operand(num_entries));
6692     __ B(hs, codegen_->GetLabelOf(default_block));
6693 
6694     // In current VIXL implementation, it won't require any blocked registers to encode the
6695     // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
6696     // register pressure.
6697     Register table_base = temps.AcquireX();
6698     // Load jump offset from the table.
6699     __ Adr(table_base, jump_table->GetTableStartLabel());
6700     Register jump_offset = temp_w;
6701     __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
6702 
6703     // Jump to target block by branching to table_base(pc related) + offset.
6704     Register target_address = table_base;
6705     __ Add(target_address, table_base, Operand(jump_offset, SXTW));
6706     __ Br(target_address);
6707   }
6708 }
6709 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6710 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
6711     HInstruction* instruction,
6712     Location out,
6713     uint32_t offset,
6714     Location maybe_temp,
6715     ReadBarrierOption read_barrier_option) {
6716   DataType::Type type = DataType::Type::kReference;
6717   Register out_reg = RegisterFrom(out, type);
6718   if (read_barrier_option == kWithReadBarrier) {
6719     DCHECK(codegen_->EmitReadBarrier());
6720     if (kUseBakerReadBarrier) {
6721       // Load with fast path based Baker's read barrier.
6722       // /* HeapReference<Object> */ out = *(out + offset)
6723       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6724                                                       out,
6725                                                       out_reg,
6726                                                       offset,
6727                                                       maybe_temp,
6728                                                       /* needs_null_check= */ false,
6729                                                       /* use_load_acquire= */ false);
6730     } else {
6731       // Load with slow path based read barrier.
6732       // Save the value of `out` into `maybe_temp` before overwriting it
6733       // in the following move operation, as we will need it for the
6734       // read barrier below.
6735       Register temp_reg = RegisterFrom(maybe_temp, type);
6736       __ Mov(temp_reg, out_reg);
6737       // /* HeapReference<Object> */ out = *(out + offset)
6738       __ Ldr(out_reg, HeapOperand(out_reg, offset));
6739       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6740     }
6741   } else {
6742     // Plain load with no read barrier.
6743     // /* HeapReference<Object> */ out = *(out + offset)
6744     __ Ldr(out_reg, HeapOperand(out_reg, offset));
6745     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
6746   }
6747 }
6748 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6749 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
6750     HInstruction* instruction,
6751     Location out,
6752     Location obj,
6753     uint32_t offset,
6754     Location maybe_temp,
6755     ReadBarrierOption read_barrier_option) {
6756   DataType::Type type = DataType::Type::kReference;
6757   Register out_reg = RegisterFrom(out, type);
6758   Register obj_reg = RegisterFrom(obj, type);
6759   if (read_barrier_option == kWithReadBarrier) {
6760     DCHECK(codegen_->EmitReadBarrier());
6761     if (kUseBakerReadBarrier) {
6762       // Load with fast path based Baker's read barrier.
6763       // /* HeapReference<Object> */ out = *(obj + offset)
6764       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6765                                                       out,
6766                                                       obj_reg,
6767                                                       offset,
6768                                                       maybe_temp,
6769                                                       /* needs_null_check= */ false,
6770                                                       /* use_load_acquire= */ false);
6771     } else {
6772       // Load with slow path based read barrier.
6773       // /* HeapReference<Object> */ out = *(obj + offset)
6774       __ Ldr(out_reg, HeapOperand(obj_reg, offset));
6775       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6776     }
6777   } else {
6778     // Plain load with no read barrier.
6779     // /* HeapReference<Object> */ out = *(obj + offset)
6780     __ Ldr(out_reg, HeapOperand(obj_reg, offset));
6781     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
6782   }
6783 }
6784 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,Register obj,uint32_t offset,vixl::aarch64::Label * fixup_label,ReadBarrierOption read_barrier_option)6785 void CodeGeneratorARM64::GenerateGcRootFieldLoad(
6786     HInstruction* instruction,
6787     Location root,
6788     Register obj,
6789     uint32_t offset,
6790     vixl::aarch64::Label* fixup_label,
6791     ReadBarrierOption read_barrier_option) {
6792   DCHECK(fixup_label == nullptr || offset == 0u);
6793   Register root_reg = RegisterFrom(root, DataType::Type::kReference);
6794   if (read_barrier_option == kWithReadBarrier) {
6795     DCHECK(EmitReadBarrier());
6796     if (kUseBakerReadBarrier) {
6797       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6798       // Baker's read barrier are used.
6799 
6800       // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
6801       // the Marking Register) to decide whether we need to enter
6802       // the slow path to mark the GC root.
6803       //
6804       // We use shared thunks for the slow path; shared within the method
6805       // for JIT, across methods for AOT. That thunk checks the reference
6806       // and jumps to the entrypoint if needed.
6807       //
6808       //     lr = &return_address;
6809       //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
6810       //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6811       //       goto gc_root_thunk<root_reg>(lr)
6812       //     }
6813       //   return_address:
6814 
6815       UseScratchRegisterScope temps(GetVIXLAssembler());
6816       DCHECK(temps.IsAvailable(ip0));
6817       DCHECK(temps.IsAvailable(ip1));
6818       temps.Exclude(ip0, ip1);
6819       uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
6820 
6821       ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6822       vixl::aarch64::Label return_address;
6823       __ adr(lr, &return_address);
6824       if (fixup_label != nullptr) {
6825         __ bind(fixup_label);
6826       }
6827       static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6828                     "GC root LDR must be 2 instructions (8B) before the return address label.");
6829       __ ldr(root_reg, MemOperand(obj.X(), offset));
6830       EmitBakerReadBarrierCbnz(custom_data);
6831       __ bind(&return_address);
6832     } else {
6833       // GC root loaded through a slow path for read barriers other
6834       // than Baker's.
6835       // /* GcRoot<mirror::Object>* */ root = obj + offset
6836       if (fixup_label == nullptr) {
6837         __ Add(root_reg.X(), obj.X(), offset);
6838       } else {
6839         EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
6840       }
6841       // /* mirror::Object* */ root = root->Read()
6842       GenerateReadBarrierForRootSlow(instruction, root, root);
6843     }
6844   } else {
6845     // Plain GC root load with no read barrier.
6846     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6847     if (fixup_label == nullptr) {
6848       __ Ldr(root_reg, MemOperand(obj, offset));
6849     } else {
6850       EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
6851     }
6852     // Note that GC roots are not affected by heap poisoning, thus we
6853     // do not have to unpoison `root_reg` here.
6854   }
6855   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6856 }
6857 
GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value,vixl::aarch64::Register old_value)6858 void CodeGeneratorARM64::GenerateIntrinsicMoveWithBakerReadBarrier(
6859     vixl::aarch64::Register marked_old_value,
6860     vixl::aarch64::Register old_value) {
6861   DCHECK(EmitBakerReadBarrier());
6862 
6863   // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
6864   uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode());
6865 
6866   ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6867   vixl::aarch64::Label return_address;
6868   __ adr(lr, &return_address);
6869   static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6870                 "GC root LDR must be 2 instructions (8B) before the return address label.");
6871   __ mov(marked_old_value, old_value);
6872   EmitBakerReadBarrierCbnz(custom_data);
6873   __ bind(&return_address);
6874 }
6875 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl::aarch64::Register obj,const vixl::aarch64::MemOperand & src,bool needs_null_check,bool use_load_acquire)6876 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6877                                                                Location ref,
6878                                                                vixl::aarch64::Register obj,
6879                                                                const vixl::aarch64::MemOperand& src,
6880                                                                bool needs_null_check,
6881                                                                bool use_load_acquire) {
6882   DCHECK(EmitBakerReadBarrier());
6883 
6884   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6885   // Marking Register) to decide whether we need to enter the slow
6886   // path to mark the reference. Then, in the slow path, check the
6887   // gray bit in the lock word of the reference's holder (`obj`) to
6888   // decide whether to mark `ref` or not.
6889   //
6890   // We use shared thunks for the slow path; shared within the method
6891   // for JIT, across methods for AOT. That thunk checks the holder
6892   // and jumps to the entrypoint if needed. If the holder is not gray,
6893   // it creates a fake dependency and returns to the LDR instruction.
6894   //
6895   //     lr = &gray_return_address;
6896   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6897   //       goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr)
6898   //     }
6899   //   not_gray_return_address:
6900   //     // Original reference load. If the offset is too large to fit
6901   //     // into LDR, we use an adjusted base register here.
6902   //     HeapReference<mirror::Object> reference = *(obj+offset);
6903   //   gray_return_address:
6904 
6905   DCHECK(src.GetAddrMode() == vixl::aarch64::Offset);
6906   DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>));
6907 
6908   UseScratchRegisterScope temps(GetVIXLAssembler());
6909   DCHECK(temps.IsAvailable(ip0));
6910   DCHECK(temps.IsAvailable(ip1));
6911   temps.Exclude(ip0, ip1);
6912   uint32_t custom_data = use_load_acquire
6913       ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode())
6914       : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode());
6915 
6916   {
6917     ExactAssemblyScope guard(GetVIXLAssembler(),
6918                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6919     vixl::aarch64::Label return_address;
6920     __ adr(lr, &return_address);
6921     EmitBakerReadBarrierCbnz(custom_data);
6922     static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6923                   "Field LDR must be 1 instruction (4B) before the return address label; "
6924                   " 2 instructions (8B) for heap poisoning.");
6925     Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6926     if (use_load_acquire) {
6927       DCHECK_EQ(src.GetOffset(), 0);
6928       __ ldar(ref_reg, src);
6929     } else {
6930       __ ldr(ref_reg, src);
6931     }
6932     if (needs_null_check) {
6933       MaybeRecordImplicitNullCheck(instruction);
6934     }
6935     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
6936     // macro instructions disallowed in ExactAssemblyScope.
6937     if (kPoisonHeapReferences) {
6938       __ neg(ref_reg, Operand(ref_reg));
6939     }
6940     __ bind(&return_address);
6941   }
6942   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
6943 }
6944 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check,bool use_load_acquire)6945 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6946                                                                Location ref,
6947                                                                Register obj,
6948                                                                uint32_t offset,
6949                                                                Location maybe_temp,
6950                                                                bool needs_null_check,
6951                                                                bool use_load_acquire) {
6952   DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
6953   Register base = obj;
6954   if (use_load_acquire) {
6955     DCHECK(maybe_temp.IsRegister());
6956     base = WRegisterFrom(maybe_temp);
6957     __ Add(base, obj, offset);
6958     offset = 0u;
6959   } else if (offset >= kReferenceLoadMinFarOffset) {
6960     DCHECK(maybe_temp.IsRegister());
6961     base = WRegisterFrom(maybe_temp);
6962     static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
6963     __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
6964     offset &= (kReferenceLoadMinFarOffset - 1u);
6965   }
6966   MemOperand src(base.X(), offset);
6967   GenerateFieldLoadWithBakerReadBarrier(
6968       instruction, ref, obj, src, needs_null_check, use_load_acquire);
6969 }
6970 
GenerateArrayLoadWithBakerReadBarrier(HArrayGet * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)6971 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
6972                                                                Location ref,
6973                                                                Register obj,
6974                                                                uint32_t data_offset,
6975                                                                Location index,
6976                                                                bool needs_null_check) {
6977   DCHECK(EmitBakerReadBarrier());
6978 
6979   static_assert(
6980       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6981       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6982   size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
6983 
6984   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6985   // Marking Register) to decide whether we need to enter the slow
6986   // path to mark the reference. Then, in the slow path, check the
6987   // gray bit in the lock word of the reference's holder (`obj`) to
6988   // decide whether to mark `ref` or not.
6989   //
6990   // We use shared thunks for the slow path; shared within the method
6991   // for JIT, across methods for AOT. That thunk checks the holder
6992   // and jumps to the entrypoint if needed. If the holder is not gray,
6993   // it creates a fake dependency and returns to the LDR instruction.
6994   //
6995   //     lr = &gray_return_address;
6996   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6997   //       goto array_thunk<base_reg>(lr)
6998   //     }
6999   //   not_gray_return_address:
7000   //     // Original reference load. If the offset is too large to fit
7001   //     // into LDR, we use an adjusted base register here.
7002   //     HeapReference<mirror::Object> reference = data[index];
7003   //   gray_return_address:
7004 
7005   DCHECK(index.IsValid());
7006   Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
7007   Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
7008 
7009   UseScratchRegisterScope temps(GetVIXLAssembler());
7010   DCHECK(temps.IsAvailable(ip0));
7011   DCHECK(temps.IsAvailable(ip1));
7012   temps.Exclude(ip0, ip1);
7013 
7014   Register temp;
7015   if (instruction->GetArray()->IsIntermediateAddress()) {
7016     // We do not need to compute the intermediate address from the array: the
7017     // input instruction has done it already. See the comment in
7018     // `TryExtractArrayAccessAddress()`.
7019     if (kIsDebugBuild) {
7020       HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
7021       DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
7022     }
7023     temp = obj;
7024   } else {
7025     temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
7026     __ Add(temp.X(), obj.X(), Operand(data_offset));
7027   }
7028 
7029   uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
7030 
7031   {
7032     ExactAssemblyScope guard(GetVIXLAssembler(),
7033                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
7034     vixl::aarch64::Label return_address;
7035     __ adr(lr, &return_address);
7036     EmitBakerReadBarrierCbnz(custom_data);
7037     static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
7038                   "Array LDR must be 1 instruction (4B) before the return address label; "
7039                   " 2 instructions (8B) for heap poisoning.");
7040     __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
7041     DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
7042     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
7043     // macro instructions disallowed in ExactAssemblyScope.
7044     if (kPoisonHeapReferences) {
7045       __ neg(ref_reg, Operand(ref_reg));
7046     }
7047     __ bind(&return_address);
7048   }
7049   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
7050 }
7051 
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)7052 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
7053   // The following condition is a compile-time one, so it does not have a run-time cost.
7054   if (kIsDebugBuild && EmitBakerReadBarrier()) {
7055     // The following condition is a run-time one; it is executed after the
7056     // previous compile-time test, to avoid penalizing non-debug builds.
7057     if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
7058       UseScratchRegisterScope temps(GetVIXLAssembler());
7059       Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
7060       GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
7061     }
7062   }
7063 }
7064 
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7065 SlowPathCodeARM64* CodeGeneratorARM64::AddReadBarrierSlowPath(HInstruction* instruction,
7066                                                               Location out,
7067                                                               Location ref,
7068                                                               Location obj,
7069                                                               uint32_t offset,
7070                                                               Location index) {
7071   SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
7072       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
7073   AddSlowPath(slow_path);
7074   return slow_path;
7075 }
7076 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7077 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
7078                                                  Location out,
7079                                                  Location ref,
7080                                                  Location obj,
7081                                                  uint32_t offset,
7082                                                  Location index) {
7083   DCHECK(EmitReadBarrier());
7084 
7085   // Insert a slow path based read barrier *after* the reference load.
7086   //
7087   // If heap poisoning is enabled, the unpoisoning of the loaded
7088   // reference will be carried out by the runtime within the slow
7089   // path.
7090   //
7091   // Note that `ref` currently does not get unpoisoned (when heap
7092   // poisoning is enabled), which is alright as the `ref` argument is
7093   // not used by the artReadBarrierSlow entry point.
7094   //
7095   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7096   SlowPathCodeARM64* slow_path = AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
7097 
7098   __ B(slow_path->GetEntryLabel());
7099   __ Bind(slow_path->GetExitLabel());
7100 }
7101 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7102 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7103                                                       Location out,
7104                                                       Location ref,
7105                                                       Location obj,
7106                                                       uint32_t offset,
7107                                                       Location index) {
7108   if (EmitReadBarrier()) {
7109     // Baker's read barriers shall be handled by the fast path
7110     // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
7111     DCHECK(!kUseBakerReadBarrier);
7112     // If heap poisoning is enabled, unpoisoning will be taken care of
7113     // by the runtime within the slow path.
7114     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7115   } else if (kPoisonHeapReferences) {
7116     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
7117   }
7118 }
7119 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7120 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7121                                                         Location out,
7122                                                         Location root) {
7123   DCHECK(EmitReadBarrier());
7124 
7125   // Insert a slow path based read barrier *after* the GC root load.
7126   //
7127   // Note that GC roots are not affected by heap poisoning, so we do
7128   // not need to do anything special for this here.
7129   SlowPathCodeARM64* slow_path =
7130       new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
7131   AddSlowPath(slow_path);
7132 
7133   __ B(slow_path->GetEntryLabel());
7134   __ Bind(slow_path->GetExitLabel());
7135 }
7136 
VisitClassTableGet(HClassTableGet * instruction)7137 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
7138   LocationSummary* locations =
7139       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7140   locations->SetInAt(0, Location::RequiresRegister());
7141   locations->SetOut(Location::RequiresRegister());
7142 }
7143 
VisitClassTableGet(HClassTableGet * instruction)7144 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
7145   LocationSummary* locations = instruction->GetLocations();
7146   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
7147     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
7148         instruction->GetIndex(), kArm64PointerSize).SizeValue();
7149     __ Ldr(XRegisterFrom(locations->Out()),
7150            MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
7151   } else {
7152     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
7153         instruction->GetIndex(), kArm64PointerSize));
7154     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
7155         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
7156     __ Ldr(XRegisterFrom(locations->Out()),
7157            MemOperand(XRegisterFrom(locations->Out()), method_offset));
7158   }
7159 }
7160 
VecNEONAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)7161 MemOperand InstructionCodeGeneratorARM64::VecNEONAddress(
7162     HVecMemoryOperation* instruction,
7163     UseScratchRegisterScope* temps_scope,
7164     size_t size,
7165     bool is_string_char_at,
7166     /*out*/ Register* scratch) {
7167   LocationSummary* locations = instruction->GetLocations();
7168   Register base = InputRegisterAt(instruction, 0);
7169 
7170   if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
7171     DCHECK(!is_string_char_at);
7172     return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
7173   }
7174 
7175   Location index = locations->InAt(1);
7176   uint32_t offset = is_string_char_at
7177       ? mirror::String::ValueOffset().Uint32Value()
7178       : mirror::Array::DataOffset(size).Uint32Value();
7179   size_t shift = ComponentSizeShiftWidth(size);
7180 
7181   // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
7182   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
7183 
7184   if (index.IsConstant()) {
7185     offset += Int64FromLocation(index) << shift;
7186     return HeapOperand(base, offset);
7187   } else {
7188     *scratch = temps_scope->AcquireSameSizeAs(base);
7189     __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
7190     return HeapOperand(*scratch, offset);
7191   }
7192 }
7193 
VecSVEAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)7194 SVEMemOperand InstructionCodeGeneratorARM64::VecSVEAddress(
7195     HVecMemoryOperation* instruction,
7196     UseScratchRegisterScope* temps_scope,
7197     size_t size,
7198     bool is_string_char_at,
7199     /*out*/ Register* scratch) {
7200   LocationSummary* locations = instruction->GetLocations();
7201   Register base = InputRegisterAt(instruction, 0);
7202   Location index = locations->InAt(1);
7203 
7204   DCHECK(!instruction->InputAt(1)->IsIntermediateAddressIndex());
7205   DCHECK(!index.IsConstant());
7206 
7207   uint32_t offset = is_string_char_at
7208       ? mirror::String::ValueOffset().Uint32Value()
7209       : mirror::Array::DataOffset(size).Uint32Value();
7210   size_t shift = ComponentSizeShiftWidth(size);
7211 
7212   if (instruction->InputAt(0)->IsIntermediateAddress()) {
7213     return SVEMemOperand(base.X(), XRegisterFrom(index), LSL, shift);
7214   }
7215 
7216   *scratch = temps_scope->AcquireSameSizeAs(base);
7217   __ Add(*scratch, base, offset);
7218   return SVEMemOperand(scratch->X(), XRegisterFrom(index), LSL, shift);
7219 }
7220 
7221 #undef __
7222 #undef QUICK_ENTRY_POINT
7223 
7224 #define __ assembler.GetVIXLAssembler()->
7225 
EmitGrayCheckAndFastPath(arm64::Arm64Assembler & assembler,vixl::aarch64::Register base_reg,vixl::aarch64::MemOperand & lock_word,vixl::aarch64::Label * slow_path,vixl::aarch64::Label * throw_npe=nullptr)7226 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
7227                                      vixl::aarch64::Register base_reg,
7228                                      vixl::aarch64::MemOperand& lock_word,
7229                                      vixl::aarch64::Label* slow_path,
7230                                      vixl::aarch64::Label* throw_npe = nullptr) {
7231   vixl::aarch64::Label throw_npe_cont;
7232   // Load the lock word containing the rb_state.
7233   __ Ldr(ip0.W(), lock_word);
7234   // Given the numeric representation, it's enough to check the low bit of the rb_state.
7235   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7236   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7237   __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
7238   static_assert(
7239       BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
7240       "Field and array LDR offsets must be the same to reuse the same code.");
7241   // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
7242   if (throw_npe != nullptr) {
7243     __ Bind(&throw_npe_cont);
7244   }
7245   // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
7246   static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
7247                 "Field LDR must be 1 instruction (4B) before the return address label; "
7248                 " 2 instructions (8B) for heap poisoning.");
7249   __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
7250   // Introduce a dependency on the lock_word including rb_state,
7251   // to prevent load-load reordering, and without using
7252   // a memory barrier (which would be more expensive).
7253   __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
7254   __ Br(lr);          // And return back to the function.
7255   if (throw_npe != nullptr) {
7256     // Clear IP0 before returning to the fast path.
7257     __ Bind(throw_npe);
7258     __ Mov(ip0.X(), xzr);
7259     __ B(&throw_npe_cont);
7260   }
7261   // Note: The fake dependency is unnecessary for the slow path.
7262 }
7263 
7264 // Load the read barrier introspection entrypoint in register `entrypoint`.
LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler & assembler,vixl::aarch64::Register entrypoint)7265 static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
7266                                                        vixl::aarch64::Register entrypoint) {
7267   // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
7268   DCHECK_EQ(ip0.GetCode(), 16u);
7269   const int32_t entry_point_offset =
7270       Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
7271   __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
7272 }
7273 
CompileBakerReadBarrierThunk(Arm64Assembler & assembler,uint32_t encoded_data,std::string * debug_name)7274 void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
7275                                                       uint32_t encoded_data,
7276                                                       /*out*/ std::string* debug_name) {
7277   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
7278   switch (kind) {
7279     case BakerReadBarrierKind::kField:
7280     case BakerReadBarrierKind::kAcquire: {
7281       Register base_reg =
7282           vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7283       CheckValidReg(base_reg.GetCode());
7284       Register holder_reg =
7285           vixl::aarch64::XRegister(BakerReadBarrierSecondRegField::Decode(encoded_data));
7286       CheckValidReg(holder_reg.GetCode());
7287       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7288       temps.Exclude(ip0, ip1);
7289       // In the case of a field load (with relaxed semantic), if `base_reg` differs from
7290       // `holder_reg`, the offset was too large and we must have emitted (during the construction
7291       // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
7292       // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
7293       // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
7294       // not necessarily do that check before going to the thunk.
7295       //
7296       // In the case of a field load with load-acquire semantics (where `base_reg` always differs
7297       // from `holder_reg`), we also need an explicit null check when implicit null checks are
7298       // allowed, as we do not emit one before going to the thunk.
7299       vixl::aarch64::Label throw_npe_label;
7300       vixl::aarch64::Label* throw_npe = nullptr;
7301       if (GetCompilerOptions().GetImplicitNullChecks() &&
7302           (holder_reg.Is(base_reg) || (kind == BakerReadBarrierKind::kAcquire))) {
7303         throw_npe = &throw_npe_label;
7304         __ Cbz(holder_reg.W(), throw_npe);
7305       }
7306       // Check if the holder is gray and, if not, add fake dependency to the base register
7307       // and return to the LDR instruction to load the reference. Otherwise, use introspection
7308       // to load the reference and call the entrypoint that performs further checks on the
7309       // reference and marks it if needed.
7310       vixl::aarch64::Label slow_path;
7311       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
7312       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
7313       __ Bind(&slow_path);
7314       if (kind == BakerReadBarrierKind::kField) {
7315         MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
7316         __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
7317         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7318         __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
7319         __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
7320       } else {
7321         DCHECK(kind == BakerReadBarrierKind::kAcquire);
7322         DCHECK(!base_reg.Is(holder_reg));
7323         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7324         __ Ldar(ip0.W(), MemOperand(base_reg));
7325       }
7326       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
7327       __ Br(ip1);                           // Jump to the entrypoint.
7328       break;
7329     }
7330     case BakerReadBarrierKind::kArray: {
7331       Register base_reg =
7332           vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7333       CheckValidReg(base_reg.GetCode());
7334       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7335                 BakerReadBarrierSecondRegField::Decode(encoded_data));
7336       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7337       temps.Exclude(ip0, ip1);
7338       vixl::aarch64::Label slow_path;
7339       int32_t data_offset =
7340           mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
7341       MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
7342       DCHECK_LT(lock_word.GetOffset(), 0);
7343       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
7344       __ Bind(&slow_path);
7345       MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
7346       __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
7347       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7348       __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
7349       __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
7350                                             // a switch case target based on the index register.
7351       __ Mov(ip0, base_reg);                // Move the base register to ip0.
7352       __ Br(ip1);                           // Jump to the entrypoint's array switch case.
7353       break;
7354     }
7355     case BakerReadBarrierKind::kGcRoot: {
7356       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
7357       // and it does not have a forwarding address), call the correct introspection entrypoint;
7358       // otherwise return the reference (or the extracted forwarding address).
7359       // There is no gray bit check for GC roots.
7360       Register root_reg =
7361           vixl::aarch64::WRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7362       CheckValidReg(root_reg.GetCode());
7363       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7364                 BakerReadBarrierSecondRegField::Decode(encoded_data));
7365       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7366       temps.Exclude(ip0, ip1);
7367       vixl::aarch64::Label return_label, not_marked, forwarding_address;
7368       __ Cbz(root_reg, &return_label);
7369       MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
7370       __ Ldr(ip0.W(), lock_word);
7371       __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, &not_marked);
7372       __ Bind(&return_label);
7373       __ Br(lr);
7374       __ Bind(&not_marked);
7375       __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
7376       __ B(&forwarding_address, mi);
7377       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7378       // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
7379       // art_quick_read_barrier_mark_introspection_gc_roots.
7380       __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
7381       __ Mov(ip0.W(), root_reg);
7382       __ Br(ip1);
7383       __ Bind(&forwarding_address);
7384       __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
7385       __ Br(lr);
7386       break;
7387     }
7388     default:
7389       LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
7390       UNREACHABLE();
7391   }
7392 
7393   // For JIT, the slow path is considered part of the compiled method,
7394   // so JIT should pass null as `debug_name`.
7395   DCHECK_IMPLIES(GetCompilerOptions().IsJitCompiler(), debug_name == nullptr);
7396   if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
7397     std::ostringstream oss;
7398     oss << "BakerReadBarrierThunk";
7399     switch (kind) {
7400       case BakerReadBarrierKind::kField:
7401         oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
7402             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
7403         break;
7404       case BakerReadBarrierKind::kAcquire:
7405         oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
7406             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
7407         break;
7408       case BakerReadBarrierKind::kArray:
7409         oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
7410         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7411                   BakerReadBarrierSecondRegField::Decode(encoded_data));
7412         break;
7413       case BakerReadBarrierKind::kGcRoot:
7414         oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
7415         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7416                   BakerReadBarrierSecondRegField::Decode(encoded_data));
7417         break;
7418     }
7419     *debug_name = oss.str();
7420   }
7421 }
7422 
7423 #undef __
7424 
7425 }  // namespace arm64
7426 }  // namespace art
7427