1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "aarch64/assembler-aarch64.h"
20 #include "aarch64/registers-aarch64.h"
21 #include "arch/arm64/asm_support_arm64.h"
22 #include "arch/arm64/instruction_set_features_arm64.h"
23 #include "arch/arm64/jni_frame_arm64.h"
24 #include "art_method-inl.h"
25 #include "base/bit_utils.h"
26 #include "base/bit_utils_iterator.h"
27 #include "class_root-inl.h"
28 #include "class_table.h"
29 #include "code_generator_utils.h"
30 #include "compiled_method.h"
31 #include "entrypoints/quick/quick_entrypoints.h"
32 #include "entrypoints/quick/quick_entrypoints_enum.h"
33 #include "gc/accounting/card_table.h"
34 #include "gc/space/image_space.h"
35 #include "heap_poisoning.h"
36 #include "interpreter/mterp/nterp.h"
37 #include "intrinsics.h"
38 #include "intrinsics_arm64.h"
39 #include "linker/linker_patch.h"
40 #include "lock_word.h"
41 #include "mirror/array-inl.h"
42 #include "mirror/class-inl.h"
43 #include "mirror/var_handle.h"
44 #include "offsets.h"
45 #include "optimizing/common_arm64.h"
46 #include "thread.h"
47 #include "utils/arm64/assembler_arm64.h"
48 #include "utils/assembler.h"
49 #include "utils/stack_checks.h"
50 
51 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
52 using vixl::ExactAssemblyScope;
53 using vixl::CodeBufferCheckScope;
54 using vixl::EmissionCheckScope;
55 
56 #ifdef __
57 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
58 #endif
59 
60 namespace art {
61 
62 template<class MirrorType>
63 class GcRoot;
64 
65 namespace arm64 {
66 
67 using helpers::ARM64EncodableConstantOrRegister;
68 using helpers::ArtVixlRegCodeCoherentForRegSet;
69 using helpers::CPURegisterFrom;
70 using helpers::DRegisterFrom;
71 using helpers::FPRegisterFrom;
72 using helpers::HeapOperand;
73 using helpers::HeapOperandFrom;
74 using helpers::InputCPURegisterOrZeroRegAt;
75 using helpers::InputFPRegisterAt;
76 using helpers::InputOperandAt;
77 using helpers::InputRegisterAt;
78 using helpers::Int64FromLocation;
79 using helpers::IsConstantZeroBitPattern;
80 using helpers::LocationFrom;
81 using helpers::OperandFromMemOperand;
82 using helpers::OutputCPURegister;
83 using helpers::OutputFPRegister;
84 using helpers::OutputRegister;
85 using helpers::RegisterFrom;
86 using helpers::StackOperandFrom;
87 using helpers::VIXLRegCodeFromART;
88 using helpers::WRegisterFrom;
89 using helpers::XRegisterFrom;
90 
91 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
92 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
93 // generates less code/data with a small num_entries.
94 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
95 
96 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
97 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
98 // For the Baker read barrier implementation using link-time generated thunks we need to split
99 // the offset explicitly.
100 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
101 
ARM64Condition(IfCondition cond)102 inline Condition ARM64Condition(IfCondition cond) {
103   switch (cond) {
104     case kCondEQ: return eq;
105     case kCondNE: return ne;
106     case kCondLT: return lt;
107     case kCondLE: return le;
108     case kCondGT: return gt;
109     case kCondGE: return ge;
110     case kCondB:  return lo;
111     case kCondBE: return ls;
112     case kCondA:  return hi;
113     case kCondAE: return hs;
114   }
115   LOG(FATAL) << "Unreachable";
116   UNREACHABLE();
117 }
118 
ARM64FPCondition(IfCondition cond,bool gt_bias)119 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
120   // The ARM64 condition codes can express all the necessary branches, see the
121   // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
122   // There is no dex instruction or HIR that would need the missing conditions
123   // "equal or unordered" or "not equal".
124   switch (cond) {
125     case kCondEQ: return eq;
126     case kCondNE: return ne /* unordered */;
127     case kCondLT: return gt_bias ? cc : lt /* unordered */;
128     case kCondLE: return gt_bias ? ls : le /* unordered */;
129     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
130     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
131     default:
132       LOG(FATAL) << "UNREACHABLE";
133       UNREACHABLE();
134   }
135 }
136 
ARM64ReturnLocation(DataType::Type return_type)137 Location ARM64ReturnLocation(DataType::Type return_type) {
138   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
139   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
140   // but we use the exact registers for clarity.
141   if (return_type == DataType::Type::kFloat32) {
142     return LocationFrom(s0);
143   } else if (return_type == DataType::Type::kFloat64) {
144     return LocationFrom(d0);
145   } else if (return_type == DataType::Type::kInt64) {
146     return LocationFrom(x0);
147   } else if (return_type == DataType::Type::kVoid) {
148     return Location::NoLocation();
149   } else {
150     return LocationFrom(w0);
151   }
152 }
153 
GetReturnLocation(DataType::Type return_type)154 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
155   return ARM64ReturnLocation(return_type);
156 }
157 
OneRegInReferenceOutSaveEverythingCallerSaves()158 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
159   InvokeRuntimeCallingConvention calling_convention;
160   RegisterSet caller_saves = RegisterSet::Empty();
161   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
162   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
163             RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
164                          DataType::Type::kReference).GetCode());
165   return caller_saves;
166 }
167 
168 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
169 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
170 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
171 
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)172 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
173   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
174   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
175   for (uint32_t i : LowToHighBits(core_spills)) {
176     // If the register holds an object, update the stack mask.
177     if (locations->RegisterContainsObject(i)) {
178       locations->SetStackBit(stack_offset / kVRegSize);
179     }
180     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
181     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
182     saved_core_stack_offsets_[i] = stack_offset;
183     stack_offset += kXRegSizeInBytes;
184   }
185 
186   const size_t fp_reg_size = codegen->GetSlowPathFPWidth();
187   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
188   for (uint32_t i : LowToHighBits(fp_spills)) {
189     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
190     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
191     saved_fpu_stack_offsets_[i] = stack_offset;
192     stack_offset += fp_reg_size;
193   }
194 
195   InstructionCodeGeneratorARM64* visitor =
196       down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
197   visitor->SaveLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
198 }
199 
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)200 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
201   InstructionCodeGeneratorARM64* visitor =
202       down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
203   visitor->RestoreLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
204 }
205 
206 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
207  public:
BoundsCheckSlowPathARM64(HBoundsCheck * instruction)208   explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
209 
EmitNativeCode(CodeGenerator * codegen)210   void EmitNativeCode(CodeGenerator* codegen) override {
211     LocationSummary* locations = instruction_->GetLocations();
212     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
213 
214     __ Bind(GetEntryLabel());
215     if (instruction_->CanThrowIntoCatchBlock()) {
216       // Live registers will be restored in the catch block if caught.
217       SaveLiveRegisters(codegen, instruction_->GetLocations());
218     }
219     // We're moving two locations to locations that could overlap, so we need a parallel
220     // move resolver.
221     InvokeRuntimeCallingConvention calling_convention;
222     codegen->EmitParallelMoves(locations->InAt(0),
223                                LocationFrom(calling_convention.GetRegisterAt(0)),
224                                DataType::Type::kInt32,
225                                locations->InAt(1),
226                                LocationFrom(calling_convention.GetRegisterAt(1)),
227                                DataType::Type::kInt32);
228     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
229         ? kQuickThrowStringBounds
230         : kQuickThrowArrayBounds;
231     arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
232     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
233     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
234   }
235 
IsFatal() const236   bool IsFatal() const override { return true; }
237 
GetDescription() const238   const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; }
239 
240  private:
241   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
242 };
243 
244 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
245  public:
DivZeroCheckSlowPathARM64(HDivZeroCheck * instruction)246   explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
247 
EmitNativeCode(CodeGenerator * codegen)248   void EmitNativeCode(CodeGenerator* codegen) override {
249     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
250     __ Bind(GetEntryLabel());
251     arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
252     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
253   }
254 
IsFatal() const255   bool IsFatal() const override { return true; }
256 
GetDescription() const257   const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; }
258 
259  private:
260   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
261 };
262 
263 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
264  public:
LoadClassSlowPathARM64(HLoadClass * cls,HInstruction * at)265   LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at)
266       : SlowPathCodeARM64(at), cls_(cls) {
267     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
268     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
269   }
270 
EmitNativeCode(CodeGenerator * codegen)271   void EmitNativeCode(CodeGenerator* codegen) override {
272     LocationSummary* locations = instruction_->GetLocations();
273     Location out = locations->Out();
274     const uint32_t dex_pc = instruction_->GetDexPc();
275     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
276     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
277 
278     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
279     __ Bind(GetEntryLabel());
280     SaveLiveRegisters(codegen, locations);
281 
282     InvokeRuntimeCallingConvention calling_convention;
283     if (must_resolve_type) {
284       DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile()));
285       dex::TypeIndex type_index = cls_->GetTypeIndex();
286       __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
287       if (cls_->NeedsAccessCheck()) {
288         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
289         arm64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
290       } else {
291         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
292         arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
293       }
294       // If we also must_do_clinit, the resolved type is now in the correct register.
295     } else {
296       DCHECK(must_do_clinit);
297       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
298       arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)),
299                                   source,
300                                   cls_->GetType());
301     }
302     if (must_do_clinit) {
303       arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
304       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
305     }
306 
307     // Move the class to the desired location.
308     if (out.IsValid()) {
309       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
310       DataType::Type type = instruction_->GetType();
311       arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
312     }
313     RestoreLiveRegisters(codegen, locations);
314     __ B(GetExitLabel());
315   }
316 
GetDescription() const317   const char* GetDescription() const override { return "LoadClassSlowPathARM64"; }
318 
319  private:
320   // The class this slow path will load.
321   HLoadClass* const cls_;
322 
323   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
324 };
325 
326 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
327  public:
LoadStringSlowPathARM64(HLoadString * instruction)328   explicit LoadStringSlowPathARM64(HLoadString* instruction)
329       : SlowPathCodeARM64(instruction) {}
330 
EmitNativeCode(CodeGenerator * codegen)331   void EmitNativeCode(CodeGenerator* codegen) override {
332     LocationSummary* locations = instruction_->GetLocations();
333     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
334     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
335 
336     __ Bind(GetEntryLabel());
337     SaveLiveRegisters(codegen, locations);
338 
339     InvokeRuntimeCallingConvention calling_convention;
340     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
341     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
342     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
343     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
344     DataType::Type type = instruction_->GetType();
345     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
346 
347     RestoreLiveRegisters(codegen, locations);
348 
349     __ B(GetExitLabel());
350   }
351 
GetDescription() const352   const char* GetDescription() const override { return "LoadStringSlowPathARM64"; }
353 
354  private:
355   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
356 };
357 
358 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
359  public:
NullCheckSlowPathARM64(HNullCheck * instr)360   explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
361 
EmitNativeCode(CodeGenerator * codegen)362   void EmitNativeCode(CodeGenerator* codegen) override {
363     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
364     __ Bind(GetEntryLabel());
365     if (instruction_->CanThrowIntoCatchBlock()) {
366       // Live registers will be restored in the catch block if caught.
367       SaveLiveRegisters(codegen, instruction_->GetLocations());
368     }
369     arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
370                                  instruction_,
371                                  instruction_->GetDexPc(),
372                                  this);
373     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
374   }
375 
IsFatal() const376   bool IsFatal() const override { return true; }
377 
GetDescription() const378   const char* GetDescription() const override { return "NullCheckSlowPathARM64"; }
379 
380  private:
381   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
382 };
383 
384 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
385  public:
SuspendCheckSlowPathARM64(HSuspendCheck * instruction,HBasicBlock * successor)386   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
387       : SlowPathCodeARM64(instruction), successor_(successor) {}
388 
EmitNativeCode(CodeGenerator * codegen)389   void EmitNativeCode(CodeGenerator* codegen) override {
390     LocationSummary* locations = instruction_->GetLocations();
391     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
392     __ Bind(GetEntryLabel());
393     SaveLiveRegisters(codegen, locations);  // Only saves live vector regs for SIMD.
394     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
395     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
396     RestoreLiveRegisters(codegen, locations);  // Only restores live vector regs for SIMD.
397     if (successor_ == nullptr) {
398       __ B(GetReturnLabel());
399     } else {
400       __ B(arm64_codegen->GetLabelOf(successor_));
401     }
402   }
403 
GetReturnLabel()404   vixl::aarch64::Label* GetReturnLabel() {
405     DCHECK(successor_ == nullptr);
406     return &return_label_;
407   }
408 
GetSuccessor() const409   HBasicBlock* GetSuccessor() const {
410     return successor_;
411   }
412 
GetDescription() const413   const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; }
414 
415  private:
416   // If not null, the block to branch to after the suspend check.
417   HBasicBlock* const successor_;
418 
419   // If `successor_` is null, the label to branch to after the suspend check.
420   vixl::aarch64::Label return_label_;
421 
422   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
423 };
424 
425 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
426  public:
TypeCheckSlowPathARM64(HInstruction * instruction,bool is_fatal)427   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
428       : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
429 
EmitNativeCode(CodeGenerator * codegen)430   void EmitNativeCode(CodeGenerator* codegen) override {
431     LocationSummary* locations = instruction_->GetLocations();
432 
433     DCHECK(instruction_->IsCheckCast()
434            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
435     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
436     uint32_t dex_pc = instruction_->GetDexPc();
437 
438     __ Bind(GetEntryLabel());
439 
440     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
441       SaveLiveRegisters(codegen, locations);
442     }
443 
444     // We're moving two locations to locations that could overlap, so we need a parallel
445     // move resolver.
446     InvokeRuntimeCallingConvention calling_convention;
447     codegen->EmitParallelMoves(locations->InAt(0),
448                                LocationFrom(calling_convention.GetRegisterAt(0)),
449                                DataType::Type::kReference,
450                                locations->InAt(1),
451                                LocationFrom(calling_convention.GetRegisterAt(1)),
452                                DataType::Type::kReference);
453     if (instruction_->IsInstanceOf()) {
454       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
455       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
456       DataType::Type ret_type = instruction_->GetType();
457       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
458       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
459     } else {
460       DCHECK(instruction_->IsCheckCast());
461       arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
462       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
463     }
464 
465     if (!is_fatal_) {
466       RestoreLiveRegisters(codegen, locations);
467       __ B(GetExitLabel());
468     }
469   }
470 
GetDescription() const471   const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; }
IsFatal() const472   bool IsFatal() const override { return is_fatal_; }
473 
474  private:
475   const bool is_fatal_;
476 
477   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
478 };
479 
480 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
481  public:
DeoptimizationSlowPathARM64(HDeoptimize * instruction)482   explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
483       : SlowPathCodeARM64(instruction) {}
484 
EmitNativeCode(CodeGenerator * codegen)485   void EmitNativeCode(CodeGenerator* codegen) override {
486     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
487     __ Bind(GetEntryLabel());
488     LocationSummary* locations = instruction_->GetLocations();
489     SaveLiveRegisters(codegen, locations);
490     InvokeRuntimeCallingConvention calling_convention;
491     __ Mov(calling_convention.GetRegisterAt(0),
492            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
493     arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
494     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
495   }
496 
GetDescription() const497   const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; }
498 
499  private:
500   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
501 };
502 
503 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
504  public:
ArraySetSlowPathARM64(HInstruction * instruction)505   explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
506 
EmitNativeCode(CodeGenerator * codegen)507   void EmitNativeCode(CodeGenerator* codegen) override {
508     LocationSummary* locations = instruction_->GetLocations();
509     __ Bind(GetEntryLabel());
510     SaveLiveRegisters(codegen, locations);
511 
512     InvokeRuntimeCallingConvention calling_convention;
513     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
514     parallel_move.AddMove(
515         locations->InAt(0),
516         LocationFrom(calling_convention.GetRegisterAt(0)),
517         DataType::Type::kReference,
518         nullptr);
519     parallel_move.AddMove(
520         locations->InAt(1),
521         LocationFrom(calling_convention.GetRegisterAt(1)),
522         DataType::Type::kInt32,
523         nullptr);
524     parallel_move.AddMove(
525         locations->InAt(2),
526         LocationFrom(calling_convention.GetRegisterAt(2)),
527         DataType::Type::kReference,
528         nullptr);
529     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
530 
531     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
532     arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
533     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
534     RestoreLiveRegisters(codegen, locations);
535     __ B(GetExitLabel());
536   }
537 
GetDescription() const538   const char* GetDescription() const override { return "ArraySetSlowPathARM64"; }
539 
540  private:
541   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
542 };
543 
EmitTable(CodeGeneratorARM64 * codegen)544 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
545   uint32_t num_entries = switch_instr_->GetNumEntries();
546   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
547 
548   // We are about to use the assembler to place literals directly. Make sure we have enough
549   // underlying code buffer and we have generated the jump table with right size.
550   EmissionCheckScope scope(codegen->GetVIXLAssembler(),
551                            num_entries * sizeof(int32_t),
552                            CodeBufferCheckScope::kExactSize);
553 
554   __ Bind(&table_start_);
555   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
556   for (uint32_t i = 0; i < num_entries; i++) {
557     vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
558     DCHECK(target_label->IsBound());
559     ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
560     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
561     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
562     Literal<int32_t> literal(jump_offset);
563     __ place(&literal);
564   }
565 }
566 
567 // Slow path generating a read barrier for a heap reference.
568 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
569  public:
ReadBarrierForHeapReferenceSlowPathARM64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)570   ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
571                                            Location out,
572                                            Location ref,
573                                            Location obj,
574                                            uint32_t offset,
575                                            Location index)
576       : SlowPathCodeARM64(instruction),
577         out_(out),
578         ref_(ref),
579         obj_(obj),
580         offset_(offset),
581         index_(index) {
582     DCHECK(kEmitCompilerReadBarrier);
583     // If `obj` is equal to `out` or `ref`, it means the initial object
584     // has been overwritten by (or after) the heap object reference load
585     // to be instrumented, e.g.:
586     //
587     //   __ Ldr(out, HeapOperand(out, class_offset);
588     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
589     //
590     // In that case, we have lost the information about the original
591     // object, and the emitted read barrier cannot work properly.
592     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
593     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
594   }
595 
EmitNativeCode(CodeGenerator * codegen)596   void EmitNativeCode(CodeGenerator* codegen) override {
597     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
598     LocationSummary* locations = instruction_->GetLocations();
599     DataType::Type type = DataType::Type::kReference;
600     DCHECK(locations->CanCall());
601     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
602     DCHECK(instruction_->IsInstanceFieldGet() ||
603            instruction_->IsPredicatedInstanceFieldGet() ||
604            instruction_->IsStaticFieldGet() ||
605            instruction_->IsArrayGet() ||
606            instruction_->IsInstanceOf() ||
607            instruction_->IsCheckCast() ||
608            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
609         << "Unexpected instruction in read barrier for heap reference slow path: "
610         << instruction_->DebugName();
611     // The read barrier instrumentation of object ArrayGet
612     // instructions does not support the HIntermediateAddress
613     // instruction.
614     DCHECK(!(instruction_->IsArrayGet() &&
615              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
616 
617     __ Bind(GetEntryLabel());
618 
619     SaveLiveRegisters(codegen, locations);
620 
621     // We may have to change the index's value, but as `index_` is a
622     // constant member (like other "inputs" of this slow path),
623     // introduce a copy of it, `index`.
624     Location index = index_;
625     if (index_.IsValid()) {
626       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
627       if (instruction_->IsArrayGet()) {
628         // Compute the actual memory offset and store it in `index`.
629         Register index_reg = RegisterFrom(index_, DataType::Type::kInt32);
630         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
631         if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
632           // We are about to change the value of `index_reg` (see the
633           // calls to vixl::MacroAssembler::Lsl and
634           // vixl::MacroAssembler::Mov below), but it has
635           // not been saved by the previous call to
636           // art::SlowPathCode::SaveLiveRegisters, as it is a
637           // callee-save register --
638           // art::SlowPathCode::SaveLiveRegisters does not consider
639           // callee-save registers, as it has been designed with the
640           // assumption that callee-save registers are supposed to be
641           // handled by the called function.  So, as a callee-save
642           // register, `index_reg` _would_ eventually be saved onto
643           // the stack, but it would be too late: we would have
644           // changed its value earlier.  Therefore, we manually save
645           // it here into another freely available register,
646           // `free_reg`, chosen of course among the caller-save
647           // registers (as a callee-save `free_reg` register would
648           // exhibit the same problem).
649           //
650           // Note we could have requested a temporary register from
651           // the register allocator instead; but we prefer not to, as
652           // this is a slow path, and we know we can find a
653           // caller-save register that is available.
654           Register free_reg = FindAvailableCallerSaveRegister(codegen);
655           __ Mov(free_reg.W(), index_reg);
656           index_reg = free_reg;
657           index = LocationFrom(index_reg);
658         } else {
659           // The initial register stored in `index_` has already been
660           // saved in the call to art::SlowPathCode::SaveLiveRegisters
661           // (as it is not a callee-save register), so we can freely
662           // use it.
663         }
664         // Shifting the index value contained in `index_reg` by the scale
665         // factor (2) cannot overflow in practice, as the runtime is
666         // unable to allocate object arrays with a size larger than
667         // 2^26 - 1 (that is, 2^28 - 4 bytes).
668         __ Lsl(index_reg, index_reg, DataType::SizeShift(type));
669         static_assert(
670             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
671             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
672         __ Add(index_reg, index_reg, Operand(offset_));
673       } else {
674         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile/VarHandleGet
675         // intrinsics, `index_` is not shifted by a scale factor of 2
676         // (as in the case of ArrayGet), as it is actually an offset
677         // to an object field within an object.
678         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
679         DCHECK(instruction_->GetLocations()->Intrinsified());
680         Intrinsics intrinsic = instruction_->AsInvoke()->GetIntrinsic();
681         DCHECK(intrinsic == Intrinsics::kUnsafeGetObject ||
682                intrinsic == Intrinsics::kUnsafeGetObjectVolatile ||
683                intrinsic == Intrinsics::kUnsafeCASObject ||
684                mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
685                    mirror::VarHandle::AccessModeTemplate::kGet ||
686                mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
687                    mirror::VarHandle::AccessModeTemplate::kCompareAndSet ||
688                mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
689                    mirror::VarHandle::AccessModeTemplate::kCompareAndExchange ||
690                mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
691                    mirror::VarHandle::AccessModeTemplate::kGetAndUpdate)
692             << instruction_->AsInvoke()->GetIntrinsic();
693         DCHECK_EQ(offset_, 0u);
694         DCHECK(index_.IsRegister());
695       }
696     }
697 
698     // We're moving two or three locations to locations that could
699     // overlap, so we need a parallel move resolver.
700     InvokeRuntimeCallingConvention calling_convention;
701     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
702     parallel_move.AddMove(ref_,
703                           LocationFrom(calling_convention.GetRegisterAt(0)),
704                           type,
705                           nullptr);
706     parallel_move.AddMove(obj_,
707                           LocationFrom(calling_convention.GetRegisterAt(1)),
708                           type,
709                           nullptr);
710     if (index.IsValid()) {
711       parallel_move.AddMove(index,
712                             LocationFrom(calling_convention.GetRegisterAt(2)),
713                             DataType::Type::kInt32,
714                             nullptr);
715       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
716     } else {
717       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
718       arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
719     }
720     arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
721                                  instruction_,
722                                  instruction_->GetDexPc(),
723                                  this);
724     CheckEntrypointTypes<
725         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
726     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
727 
728     RestoreLiveRegisters(codegen, locations);
729 
730     __ B(GetExitLabel());
731   }
732 
GetDescription() const733   const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
734 
735  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)736   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
737     size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
738     size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
739     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
740       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
741         return Register(VIXLRegCodeFromART(i), kXRegSize);
742       }
743     }
744     // We shall never fail to find a free caller-save register, as
745     // there are more than two core caller-save registers on ARM64
746     // (meaning it is possible to find one which is different from
747     // `ref` and `obj`).
748     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
749     LOG(FATAL) << "Could not find a free register";
750     UNREACHABLE();
751   }
752 
753   const Location out_;
754   const Location ref_;
755   const Location obj_;
756   const uint32_t offset_;
757   // An additional location containing an index to an array.
758   // Only used for HArrayGet and the UnsafeGetObject &
759   // UnsafeGetObjectVolatile intrinsics.
760   const Location index_;
761 
762   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
763 };
764 
765 // Slow path generating a read barrier for a GC root.
766 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
767  public:
ReadBarrierForRootSlowPathARM64(HInstruction * instruction,Location out,Location root)768   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
769       : SlowPathCodeARM64(instruction), out_(out), root_(root) {
770     DCHECK(kEmitCompilerReadBarrier);
771   }
772 
EmitNativeCode(CodeGenerator * codegen)773   void EmitNativeCode(CodeGenerator* codegen) override {
774     LocationSummary* locations = instruction_->GetLocations();
775     DataType::Type type = DataType::Type::kReference;
776     DCHECK(locations->CanCall());
777     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
778     DCHECK(instruction_->IsLoadClass() ||
779            instruction_->IsLoadString() ||
780            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
781         << "Unexpected instruction in read barrier for GC root slow path: "
782         << instruction_->DebugName();
783 
784     __ Bind(GetEntryLabel());
785     SaveLiveRegisters(codegen, locations);
786 
787     InvokeRuntimeCallingConvention calling_convention;
788     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
789     // The argument of the ReadBarrierForRootSlow is not a managed
790     // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
791     // thus we need a 64-bit move here, and we cannot use
792     //
793     //   arm64_codegen->MoveLocation(
794     //       LocationFrom(calling_convention.GetRegisterAt(0)),
795     //       root_,
796     //       type);
797     //
798     // which would emit a 32-bit move, as `type` is a (32-bit wide)
799     // reference type (`DataType::Type::kReference`).
800     __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
801     arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
802                                  instruction_,
803                                  instruction_->GetDexPc(),
804                                  this);
805     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
806     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
807 
808     RestoreLiveRegisters(codegen, locations);
809     __ B(GetExitLabel());
810   }
811 
GetDescription() const812   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; }
813 
814  private:
815   const Location out_;
816   const Location root_;
817 
818   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
819 };
820 
821 #undef __
822 
GetNextLocation(DataType::Type type)823 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
824   Location next_location;
825   if (type == DataType::Type::kVoid) {
826     LOG(FATAL) << "Unreachable type " << type;
827   }
828 
829   if (DataType::IsFloatingPointType(type) &&
830       (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
831     next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
832   } else if (!DataType::IsFloatingPointType(type) &&
833              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
834     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
835   } else {
836     size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
837     next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
838                                                 : Location::StackSlot(stack_offset);
839   }
840 
841   // Space on the stack is reserved for all arguments.
842   stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
843   return next_location;
844 }
845 
GetMethodLocation() const846 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
847   return LocationFrom(kArtMethodRegister);
848 }
849 
GetNextLocation(DataType::Type type)850 Location CriticalNativeCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
851   DCHECK_NE(type, DataType::Type::kReference);
852 
853   Location location = Location::NoLocation();
854   if (DataType::IsFloatingPointType(type)) {
855     if (fpr_index_ < kParameterFPRegistersLength) {
856       location = LocationFrom(kParameterFPRegisters[fpr_index_]);
857       ++fpr_index_;
858     }
859   } else {
860     // Native ABI uses the same registers as managed, except that the method register x0
861     // is a normal argument.
862     if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
863       location = LocationFrom(gpr_index_ == 0u ? x0 : kParameterCoreRegisters[gpr_index_ - 1u]);
864       ++gpr_index_;
865     }
866   }
867   if (location.IsInvalid()) {
868     if (DataType::Is64BitType(type)) {
869       location = Location::DoubleStackSlot(stack_offset_);
870     } else {
871       location = Location::StackSlot(stack_offset_);
872     }
873     stack_offset_ += kFramePointerSize;
874 
875     if (for_register_allocation_) {
876       location = Location::Any();
877     }
878   }
879   return location;
880 }
881 
GetReturnLocation(DataType::Type type) const882 Location CriticalNativeCallingConventionVisitorARM64::GetReturnLocation(DataType::Type type) const {
883   // We perform conversion to the managed ABI return register after the call if needed.
884   InvokeDexCallingConventionVisitorARM64 dex_calling_convention;
885   return dex_calling_convention.GetReturnLocation(type);
886 }
887 
GetMethodLocation() const888 Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const {
889   // Pass the method in the hidden argument x15.
890   return Location::RegisterLocation(x15.GetCode());
891 }
892 
CodeGeneratorARM64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)893 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
894                                        const CompilerOptions& compiler_options,
895                                        OptimizingCompilerStats* stats)
896     : CodeGenerator(graph,
897                     kNumberOfAllocatableRegisters,
898                     kNumberOfAllocatableFPRegisters,
899                     kNumberOfAllocatableRegisterPairs,
900                     callee_saved_core_registers.GetList(),
901                     callee_saved_fp_registers.GetList(),
902                     compiler_options,
903                     stats),
904       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
905       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
906       location_builder_neon_(graph, this),
907       instruction_visitor_neon_(graph, this),
908       location_builder_sve_(graph, this),
909       instruction_visitor_sve_(graph, this),
910       move_resolver_(graph->GetAllocator(), this),
911       assembler_(graph->GetAllocator(),
912                  compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
913       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
914       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
915       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
916       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
917       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
918       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
919       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
920       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
921       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
922       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
923       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
924       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
925       uint32_literals_(std::less<uint32_t>(),
926                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
927       uint64_literals_(std::less<uint64_t>(),
928                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
929       jit_string_patches_(StringReferenceValueComparator(),
930                           graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
931       jit_class_patches_(TypeReferenceValueComparator(),
932                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
933       jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
934                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
935   // Save the link register (containing the return address) to mimic Quick.
936   AddAllocatedRegister(LocationFrom(lr));
937 
938   bool use_sve = ShouldUseSVE();
939   if (use_sve) {
940     location_builder_ = &location_builder_sve_;
941     instruction_visitor_ = &instruction_visitor_sve_;
942   } else {
943     location_builder_ = &location_builder_neon_;
944     instruction_visitor_ = &instruction_visitor_neon_;
945   }
946 }
947 
ShouldUseSVE() const948 bool CodeGeneratorARM64::ShouldUseSVE() const {
949   return GetInstructionSetFeatures().HasSVE();
950 }
951 
GetSIMDRegisterWidth() const952 size_t CodeGeneratorARM64::GetSIMDRegisterWidth() const {
953   return SupportsPredicatedSIMD()
954       ? GetInstructionSetFeatures().GetSVEVectorLength() / kBitsPerByte
955       : vixl::aarch64::kQRegSizeInBytes;
956 }
957 
958 #define __ GetVIXLAssembler()->
959 
EmitJumpTables()960 void CodeGeneratorARM64::EmitJumpTables() {
961   for (auto&& jump_table : jump_tables_) {
962     jump_table->EmitTable(this);
963   }
964 }
965 
Finalize(CodeAllocator * allocator)966 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
967   EmitJumpTables();
968 
969   // Emit JIT baker read barrier slow paths.
970   DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
971   for (auto& entry : jit_baker_read_barrier_slow_paths_) {
972     uint32_t encoded_data = entry.first;
973     vixl::aarch64::Label* slow_path_entry = &entry.second.label;
974     __ Bind(slow_path_entry);
975     CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
976   }
977 
978   // Ensure we emit the literal pool.
979   __ FinalizeCode();
980 
981   CodeGenerator::Finalize(allocator);
982 
983   // Verify Baker read barrier linker patches.
984   if (kIsDebugBuild) {
985     ArrayRef<const uint8_t> code = allocator->GetMemory();
986     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
987       DCHECK(info.label.IsBound());
988       uint32_t literal_offset = info.label.GetLocation();
989       DCHECK_ALIGNED(literal_offset, 4u);
990 
991       auto GetInsn = [&code](uint32_t offset) {
992         DCHECK_ALIGNED(offset, 4u);
993         return
994             (static_cast<uint32_t>(code[offset + 0]) << 0) +
995             (static_cast<uint32_t>(code[offset + 1]) << 8) +
996             (static_cast<uint32_t>(code[offset + 2]) << 16)+
997             (static_cast<uint32_t>(code[offset + 3]) << 24);
998       };
999 
1000       const uint32_t encoded_data = info.custom_data;
1001       BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
1002       // Check that the next instruction matches the expected LDR.
1003       switch (kind) {
1004         case BakerReadBarrierKind::kField:
1005         case BakerReadBarrierKind::kAcquire: {
1006           DCHECK_GE(code.size() - literal_offset, 8u);
1007           uint32_t next_insn = GetInsn(literal_offset + 4u);
1008           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
1009           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1010           if (kind == BakerReadBarrierKind::kField) {
1011             // LDR (immediate) with correct base_reg.
1012             CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
1013           } else {
1014             DCHECK(kind == BakerReadBarrierKind::kAcquire);
1015             // LDAR with correct base_reg.
1016             CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5));
1017           }
1018           break;
1019         }
1020         case BakerReadBarrierKind::kArray: {
1021           DCHECK_GE(code.size() - literal_offset, 8u);
1022           uint32_t next_insn = GetInsn(literal_offset + 4u);
1023           // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
1024           // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
1025           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
1026           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1027           CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
1028           CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
1029           break;
1030         }
1031         case BakerReadBarrierKind::kGcRoot: {
1032           DCHECK_GE(literal_offset, 4u);
1033           uint32_t prev_insn = GetInsn(literal_offset - 4u);
1034           const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1035           // Usually LDR (immediate) with correct root_reg but
1036           // we may have a "MOV marked, old_value" for intrinsic CAS.
1037           if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) {    // MOV?
1038             CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);  // LDR?
1039           }
1040           break;
1041         }
1042         default:
1043           LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
1044           UNREACHABLE();
1045       }
1046     }
1047   }
1048 }
1049 
PrepareForEmitNativeCode()1050 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1051   // Note: There are 6 kinds of moves:
1052   // 1. constant -> GPR/FPR (non-cycle)
1053   // 2. constant -> stack (non-cycle)
1054   // 3. GPR/FPR -> GPR/FPR
1055   // 4. GPR/FPR -> stack
1056   // 5. stack -> GPR/FPR
1057   // 6. stack -> stack (non-cycle)
1058   // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1059   // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1060   // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1061   // dependency.
1062   vixl_temps_.Open(GetVIXLAssembler());
1063 }
1064 
FinishEmitNativeCode()1065 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1066   vixl_temps_.Close();
1067 }
1068 
AllocateScratchLocationFor(Location::Kind kind)1069 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1070   DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1071          || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1072          || kind == Location::kSIMDStackSlot);
1073   kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1074       ? Location::kFpuRegister
1075       : Location::kRegister;
1076   Location scratch = GetScratchLocation(kind);
1077   if (!scratch.Equals(Location::NoLocation())) {
1078     return scratch;
1079   }
1080   // Allocate from VIXL temp registers.
1081   if (kind == Location::kRegister) {
1082     scratch = LocationFrom(vixl_temps_.AcquireX());
1083   } else {
1084     DCHECK_EQ(kind, Location::kFpuRegister);
1085     scratch = codegen_->GetGraph()->HasSIMD()
1086         ? codegen_->GetInstructionCodeGeneratorArm64()->AllocateSIMDScratchLocation(&vixl_temps_)
1087         : LocationFrom(vixl_temps_.AcquireD());
1088   }
1089   AddScratchLocation(scratch);
1090   return scratch;
1091 }
1092 
FreeScratchLocation(Location loc)1093 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1094   if (loc.IsRegister()) {
1095     vixl_temps_.Release(XRegisterFrom(loc));
1096   } else {
1097     DCHECK(loc.IsFpuRegister());
1098     if (codegen_->GetGraph()->HasSIMD()) {
1099       codegen_->GetInstructionCodeGeneratorArm64()->FreeSIMDScratchLocation(loc, &vixl_temps_);
1100     } else {
1101       vixl_temps_.Release(DRegisterFrom(loc));
1102     }
1103   }
1104   RemoveScratchLocation(loc);
1105 }
1106 
EmitMove(size_t index)1107 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1108   MoveOperands* move = moves_[index];
1109   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
1110 }
1111 
MaybeIncrementHotness(bool is_frame_entry)1112 void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
1113   MacroAssembler* masm = GetVIXLAssembler();
1114   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1115     UseScratchRegisterScope temps(masm);
1116     Register counter = temps.AcquireX();
1117     Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX();
1118     if (!is_frame_entry) {
1119       __ Ldr(method, MemOperand(sp, 0));
1120     }
1121     __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1122     __ Add(counter, counter, 1);
1123     // Subtract one if the counter would overflow.
1124     __ Sub(counter, counter, Operand(counter, LSR, 16));
1125     __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1126   }
1127 
1128   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1129     ScopedProfilingInfoUse spiu(
1130         Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
1131     ProfilingInfo* info = spiu.GetProfilingInfo();
1132     if (info != nullptr) {
1133       uint64_t address = reinterpret_cast64<uint64_t>(info);
1134       vixl::aarch64::Label done;
1135       UseScratchRegisterScope temps(masm);
1136       Register temp = temps.AcquireX();
1137       Register counter = temps.AcquireW();
1138       __ Mov(temp, address);
1139       __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1140       __ Add(counter, counter, 1);
1141       __ And(counter, counter, interpreter::kTieredHotnessMask);
1142       __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1143       __ Cbnz(counter, &done);
1144       if (is_frame_entry) {
1145         if (HasEmptyFrame()) {
1146           // The entrypoint expects the method at the bottom of the stack. We
1147           // claim stack space necessary for alignment.
1148           IncreaseFrame(kStackAlignment);
1149           __ Stp(kArtMethodRegister, lr, MemOperand(sp, 0));
1150         } else if (!RequiresCurrentMethod()) {
1151           __ Str(kArtMethodRegister, MemOperand(sp, 0));
1152         }
1153       } else {
1154         CHECK(RequiresCurrentMethod());
1155       }
1156       uint32_t entrypoint_offset =
1157           GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
1158       __ Ldr(lr, MemOperand(tr, entrypoint_offset));
1159       // Note: we don't record the call here (and therefore don't generate a stack
1160       // map), as the entrypoint should never be suspended.
1161       __ Blr(lr);
1162       if (HasEmptyFrame()) {
1163         CHECK(is_frame_entry);
1164         __ Ldr(lr, MemOperand(sp, 8));
1165         DecreaseFrame(kStackAlignment);
1166       }
1167       __ Bind(&done);
1168     }
1169   }
1170 }
1171 
GenerateFrameEntry()1172 void CodeGeneratorARM64::GenerateFrameEntry() {
1173   MacroAssembler* masm = GetVIXLAssembler();
1174   __ Bind(&frame_entry_label_);
1175 
1176   bool do_overflow_check =
1177       FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
1178   if (do_overflow_check) {
1179     UseScratchRegisterScope temps(masm);
1180     Register temp = temps.AcquireX();
1181     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1182     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64)));
1183     {
1184       // Ensure that between load and RecordPcInfo there are no pools emitted.
1185       ExactAssemblyScope eas(GetVIXLAssembler(),
1186                              kInstructionSize,
1187                              CodeBufferCheckScope::kExactSize);
1188       __ ldr(wzr, MemOperand(temp, 0));
1189       RecordPcInfo(nullptr, 0);
1190     }
1191   }
1192 
1193   if (!HasEmptyFrame()) {
1194     // Stack layout:
1195     //      sp[frame_size - 8]        : lr.
1196     //      ...                       : other preserved core registers.
1197     //      ...                       : other preserved fp registers.
1198     //      ...                       : reserved frame space.
1199     //      sp[0]                     : current method.
1200     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1201     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1202     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1203     DCHECK(!preserved_core_registers.IsEmpty());
1204     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1205     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1206 
1207     // Save the current method if we need it, or if using STP reduces code
1208     // size. Note that we do not do this in HCurrentMethod, as the
1209     // instruction might have been removed in the SSA graph.
1210     CPURegister lowest_spill;
1211     if (core_spills_offset == kXRegSizeInBytes) {
1212       // If there is no gap between the method and the lowest core spill, use
1213       // aligned STP pre-index to store both. Max difference is 512. We do
1214       // that to reduce code size even if we do not have to save the method.
1215       DCHECK_LE(frame_size, 512);  // 32 core registers are only 256 bytes.
1216       lowest_spill = preserved_core_registers.PopLowestIndex();
1217       __ Stp(kArtMethodRegister, lowest_spill, MemOperand(sp, -frame_size, PreIndex));
1218     } else if (RequiresCurrentMethod()) {
1219       __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1220     } else {
1221       __ Claim(frame_size);
1222     }
1223     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1224     if (lowest_spill.IsValid()) {
1225       GetAssembler()->cfi().RelOffset(DWARFReg(lowest_spill), core_spills_offset);
1226       core_spills_offset += kXRegSizeInBytes;
1227     }
1228     GetAssembler()->SpillRegisters(preserved_core_registers, core_spills_offset);
1229     GetAssembler()->SpillRegisters(preserved_fp_registers, fp_spills_offset);
1230 
1231     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1232       // Initialize should_deoptimize flag to 0.
1233       Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1234       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1235     }
1236   }
1237   MaybeIncrementHotness(/* is_frame_entry= */ true);
1238   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
1239 }
1240 
GenerateFrameExit()1241 void CodeGeneratorARM64::GenerateFrameExit() {
1242   GetAssembler()->cfi().RememberState();
1243   if (!HasEmptyFrame()) {
1244     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1245     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1246     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1247     DCHECK(!preserved_core_registers.IsEmpty());
1248     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1249     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1250 
1251     CPURegister lowest_spill;
1252     if (core_spills_offset == kXRegSizeInBytes) {
1253       // If there is no gap between the method and the lowest core spill, use
1254       // aligned LDP pre-index to pop both. Max difference is 504. We do
1255       // that to reduce code size even though the loaded method is unused.
1256       DCHECK_LE(frame_size, 504);  // 32 core registers are only 256 bytes.
1257       lowest_spill = preserved_core_registers.PopLowestIndex();
1258       core_spills_offset += kXRegSizeInBytes;
1259     }
1260     GetAssembler()->UnspillRegisters(preserved_fp_registers, fp_spills_offset);
1261     GetAssembler()->UnspillRegisters(preserved_core_registers, core_spills_offset);
1262     if (lowest_spill.IsValid()) {
1263       __ Ldp(xzr, lowest_spill, MemOperand(sp, frame_size, PostIndex));
1264       GetAssembler()->cfi().Restore(DWARFReg(lowest_spill));
1265     } else {
1266       __ Drop(frame_size);
1267     }
1268     GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1269   }
1270   __ Ret();
1271   GetAssembler()->cfi().RestoreState();
1272   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1273 }
1274 
GetFramePreservedCoreRegisters() const1275 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1276   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1277   return CPURegList(CPURegister::kRegister, kXRegSize,
1278                     core_spill_mask_);
1279 }
1280 
GetFramePreservedFPRegisters() const1281 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1282   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1283                                          GetNumberOfFloatingPointRegisters()));
1284   return CPURegList(CPURegister::kVRegister, kDRegSize,
1285                     fpu_spill_mask_);
1286 }
1287 
Bind(HBasicBlock * block)1288 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1289   __ Bind(GetLabelOf(block));
1290 }
1291 
MoveConstant(Location location,int32_t value)1292 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1293   DCHECK(location.IsRegister());
1294   __ Mov(RegisterFrom(location, DataType::Type::kInt32), value);
1295 }
1296 
AddLocationAsTemp(Location location,LocationSummary * locations)1297 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1298   if (location.IsRegister()) {
1299     locations->AddTemp(location);
1300   } else {
1301     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1302   }
1303 }
1304 
MarkGCCard(Register object,Register value,bool value_can_be_null)1305 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
1306   UseScratchRegisterScope temps(GetVIXLAssembler());
1307   Register card = temps.AcquireX();
1308   Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
1309   vixl::aarch64::Label done;
1310   if (value_can_be_null) {
1311     __ Cbz(value, &done);
1312   }
1313   // Load the address of the card table into `card`.
1314   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1315   // Calculate the offset (in the card table) of the card corresponding to
1316   // `object`.
1317   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1318   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
1319   // `object`'s card.
1320   //
1321   // Register `card` contains the address of the card table. Note that the card
1322   // table's base is biased during its creation so that it always starts at an
1323   // address whose least-significant byte is equal to `kCardDirty` (see
1324   // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
1325   // below writes the `kCardDirty` (byte) value into the `object`'s card
1326   // (located at `card + object >> kCardShift`).
1327   //
1328   // This dual use of the value in register `card` (1. to calculate the location
1329   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
1330   // (no need to explicitly load `kCardDirty` as an immediate value).
1331   __ Strb(card, MemOperand(card, temp.X()));
1332   if (value_can_be_null) {
1333     __ Bind(&done);
1334   }
1335 }
1336 
SetupBlockedRegisters() const1337 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1338   // Blocked core registers:
1339   //      lr        : Runtime reserved.
1340   //      tr        : Runtime reserved.
1341   //      mr        : Runtime reserved.
1342   //      ip1       : VIXL core temp.
1343   //      ip0       : VIXL core temp.
1344   //      x18       : Platform register.
1345   //
1346   // Blocked fp registers:
1347   //      d31       : VIXL fp temp.
1348   CPURegList reserved_core_registers = vixl_reserved_core_registers;
1349   reserved_core_registers.Combine(runtime_reserved_core_registers);
1350   while (!reserved_core_registers.IsEmpty()) {
1351     blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1352   }
1353   blocked_core_registers_[X18] = true;
1354 
1355   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1356   while (!reserved_fp_registers.IsEmpty()) {
1357     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1358   }
1359 
1360   if (GetGraph()->IsDebuggable()) {
1361     // Stubs do not save callee-save floating point registers. If the graph
1362     // is debuggable, we need to deal with these registers differently. For
1363     // now, just block them.
1364     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1365     while (!reserved_fp_registers_debuggable.IsEmpty()) {
1366       blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1367     }
1368   }
1369 }
1370 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1371 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1372   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1373   __ Str(reg, MemOperand(sp, stack_index));
1374   return kArm64WordSize;
1375 }
1376 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1377 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1378   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1379   __ Ldr(reg, MemOperand(sp, stack_index));
1380   return kArm64WordSize;
1381 }
1382 
SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1383 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1384                                                      uint32_t reg_id ATTRIBUTE_UNUSED) {
1385   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1386              << "use SaveRestoreLiveRegistersHelper";
1387   UNREACHABLE();
1388 }
1389 
RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1390 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1391                                                         uint32_t reg_id ATTRIBUTE_UNUSED) {
1392   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1393              << "use SaveRestoreLiveRegistersHelper";
1394   UNREACHABLE();
1395 }
1396 
DumpCoreRegister(std::ostream & stream,int reg) const1397 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1398   stream << XRegister(reg);
1399 }
1400 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1401 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1402   stream << DRegister(reg);
1403 }
1404 
GetInstructionSetFeatures() const1405 const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const {
1406   return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures();
1407 }
1408 
MoveConstant(CPURegister destination,HConstant * constant)1409 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1410   if (constant->IsIntConstant()) {
1411     __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1412   } else if (constant->IsLongConstant()) {
1413     __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1414   } else if (constant->IsNullConstant()) {
1415     __ Mov(Register(destination), 0);
1416   } else if (constant->IsFloatConstant()) {
1417     __ Fmov(VRegister(destination), constant->AsFloatConstant()->GetValue());
1418   } else {
1419     DCHECK(constant->IsDoubleConstant());
1420     __ Fmov(VRegister(destination), constant->AsDoubleConstant()->GetValue());
1421   }
1422 }
1423 
1424 
CoherentConstantAndType(Location constant,DataType::Type type)1425 static bool CoherentConstantAndType(Location constant, DataType::Type type) {
1426   DCHECK(constant.IsConstant());
1427   HConstant* cst = constant.GetConstant();
1428   return (cst->IsIntConstant() && type == DataType::Type::kInt32) ||
1429          // Null is mapped to a core W register, which we associate with kPrimInt.
1430          (cst->IsNullConstant() && type == DataType::Type::kInt32) ||
1431          (cst->IsLongConstant() && type == DataType::Type::kInt64) ||
1432          (cst->IsFloatConstant() && type == DataType::Type::kFloat32) ||
1433          (cst->IsDoubleConstant() && type == DataType::Type::kFloat64);
1434 }
1435 
1436 // Allocate a scratch register from the VIXL pool, querying first
1437 // the floating-point register pool, and then the core register
1438 // pool. This is essentially a reimplementation of
1439 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1440 // using a different allocation strategy.
AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler * masm,vixl::aarch64::UseScratchRegisterScope * temps,int size_in_bits)1441 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1442                                                     vixl::aarch64::UseScratchRegisterScope* temps,
1443                                                     int size_in_bits) {
1444   return masm->GetScratchVRegisterList()->IsEmpty()
1445       ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1446       : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1447 }
1448 
MoveLocation(Location destination,Location source,DataType::Type dst_type)1449 void CodeGeneratorARM64::MoveLocation(Location destination,
1450                                       Location source,
1451                                       DataType::Type dst_type) {
1452   if (source.Equals(destination)) {
1453     return;
1454   }
1455 
1456   // A valid move can always be inferred from the destination and source
1457   // locations. When moving from and to a register, the argument type can be
1458   // used to generate 32bit instead of 64bit moves. In debug mode we also
1459   // checks the coherency of the locations and the type.
1460   bool unspecified_type = (dst_type == DataType::Type::kVoid);
1461 
1462   if (destination.IsRegister() || destination.IsFpuRegister()) {
1463     if (unspecified_type) {
1464       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1465       if (source.IsStackSlot() ||
1466           (src_cst != nullptr && (src_cst->IsIntConstant()
1467                                   || src_cst->IsFloatConstant()
1468                                   || src_cst->IsNullConstant()))) {
1469         // For stack slots and 32bit constants, a 64bit type is appropriate.
1470         dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
1471       } else {
1472         // If the source is a double stack slot or a 64bit constant, a 64bit
1473         // type is appropriate. Else the source is a register, and since the
1474         // type has not been specified, we chose a 64bit type to force a 64bit
1475         // move.
1476         dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
1477       }
1478     }
1479     DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
1480            (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
1481     CPURegister dst = CPURegisterFrom(destination, dst_type);
1482     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1483       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1484       __ Ldr(dst, StackOperandFrom(source));
1485     } else if (source.IsSIMDStackSlot()) {
1486       GetInstructionCodeGeneratorArm64()->LoadSIMDRegFromStack(destination, source);
1487     } else if (source.IsConstant()) {
1488       DCHECK(CoherentConstantAndType(source, dst_type));
1489       MoveConstant(dst, source.GetConstant());
1490     } else if (source.IsRegister()) {
1491       if (destination.IsRegister()) {
1492         __ Mov(Register(dst), RegisterFrom(source, dst_type));
1493       } else {
1494         DCHECK(destination.IsFpuRegister());
1495         DataType::Type source_type = DataType::Is64BitType(dst_type)
1496             ? DataType::Type::kInt64
1497             : DataType::Type::kInt32;
1498         __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1499       }
1500     } else {
1501       DCHECK(source.IsFpuRegister());
1502       if (destination.IsRegister()) {
1503         DataType::Type source_type = DataType::Is64BitType(dst_type)
1504             ? DataType::Type::kFloat64
1505             : DataType::Type::kFloat32;
1506         __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1507       } else {
1508         DCHECK(destination.IsFpuRegister());
1509         if (GetGraph()->HasSIMD()) {
1510           GetInstructionCodeGeneratorArm64()->MoveSIMDRegToSIMDReg(destination, source);
1511         } else {
1512           __ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type));
1513         }
1514       }
1515     }
1516   } else if (destination.IsSIMDStackSlot()) {
1517     GetInstructionCodeGeneratorArm64()->MoveToSIMDStackSlot(destination, source);
1518   } else {  // The destination is not a register. It must be a stack slot.
1519     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1520     if (source.IsRegister() || source.IsFpuRegister()) {
1521       if (unspecified_type) {
1522         if (source.IsRegister()) {
1523           dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
1524         } else {
1525           dst_type =
1526               destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
1527         }
1528       }
1529       DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
1530              (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
1531       __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1532     } else if (source.IsConstant()) {
1533       DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1534           << source << " " << dst_type;
1535       UseScratchRegisterScope temps(GetVIXLAssembler());
1536       HConstant* src_cst = source.GetConstant();
1537       CPURegister temp;
1538       if (src_cst->IsZeroBitPattern()) {
1539         temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1540             ? Register(xzr)
1541             : Register(wzr);
1542       } else {
1543         if (src_cst->IsIntConstant()) {
1544           temp = temps.AcquireW();
1545         } else if (src_cst->IsLongConstant()) {
1546           temp = temps.AcquireX();
1547         } else if (src_cst->IsFloatConstant()) {
1548           temp = temps.AcquireS();
1549         } else {
1550           DCHECK(src_cst->IsDoubleConstant());
1551           temp = temps.AcquireD();
1552         }
1553         MoveConstant(temp, src_cst);
1554       }
1555       __ Str(temp, StackOperandFrom(destination));
1556     } else {
1557       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1558       DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1559       UseScratchRegisterScope temps(GetVIXLAssembler());
1560       // Use any scratch register (a core or a floating-point one)
1561       // from VIXL scratch register pools as a temporary.
1562       //
1563       // We used to only use the FP scratch register pool, but in some
1564       // rare cases the only register from this pool (D31) would
1565       // already be used (e.g. within a ParallelMove instruction, when
1566       // a move is blocked by a another move requiring a scratch FP
1567       // register, which would reserve D31). To prevent this issue, we
1568       // ask for a scratch register of any type (core or FP).
1569       //
1570       // Also, we start by asking for a FP scratch register first, as the
1571       // demand of scratch core registers is higher. This is why we
1572       // use AcquireFPOrCoreCPURegisterOfSize instead of
1573       // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1574       // allocates core scratch registers first.
1575       CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1576           GetVIXLAssembler(),
1577           &temps,
1578           (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1579       __ Ldr(temp, StackOperandFrom(source));
1580       __ Str(temp, StackOperandFrom(destination));
1581     }
1582   }
1583 }
1584 
Load(DataType::Type type,CPURegister dst,const MemOperand & src)1585 void CodeGeneratorARM64::Load(DataType::Type type,
1586                               CPURegister dst,
1587                               const MemOperand& src) {
1588   switch (type) {
1589     case DataType::Type::kBool:
1590     case DataType::Type::kUint8:
1591       __ Ldrb(Register(dst), src);
1592       break;
1593     case DataType::Type::kInt8:
1594       __ Ldrsb(Register(dst), src);
1595       break;
1596     case DataType::Type::kUint16:
1597       __ Ldrh(Register(dst), src);
1598       break;
1599     case DataType::Type::kInt16:
1600       __ Ldrsh(Register(dst), src);
1601       break;
1602     case DataType::Type::kInt32:
1603     case DataType::Type::kReference:
1604     case DataType::Type::kInt64:
1605     case DataType::Type::kFloat32:
1606     case DataType::Type::kFloat64:
1607       DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1608       __ Ldr(dst, src);
1609       break;
1610     case DataType::Type::kUint32:
1611     case DataType::Type::kUint64:
1612     case DataType::Type::kVoid:
1613       LOG(FATAL) << "Unreachable type " << type;
1614   }
1615 }
1616 
LoadAcquire(HInstruction * instruction,DataType::Type type,CPURegister dst,const MemOperand & src,bool needs_null_check)1617 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1618                                      DataType::Type type,
1619                                      CPURegister dst,
1620                                      const MemOperand& src,
1621                                      bool needs_null_check) {
1622   MacroAssembler* masm = GetVIXLAssembler();
1623   UseScratchRegisterScope temps(masm);
1624   Register temp_base = temps.AcquireX();
1625 
1626   DCHECK(!src.IsPreIndex());
1627   DCHECK(!src.IsPostIndex());
1628 
1629   // TODO(vixl): Let the MacroAssembler handle MemOperand.
1630   __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1631   {
1632     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1633     MemOperand base = MemOperand(temp_base);
1634     switch (type) {
1635       case DataType::Type::kBool:
1636       case DataType::Type::kUint8:
1637       case DataType::Type::kInt8:
1638         {
1639           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1640           __ ldarb(Register(dst), base);
1641           if (needs_null_check) {
1642             MaybeRecordImplicitNullCheck(instruction);
1643           }
1644         }
1645         if (type == DataType::Type::kInt8) {
1646           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1647         }
1648         break;
1649       case DataType::Type::kUint16:
1650       case DataType::Type::kInt16:
1651         {
1652           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1653           __ ldarh(Register(dst), base);
1654           if (needs_null_check) {
1655             MaybeRecordImplicitNullCheck(instruction);
1656           }
1657         }
1658         if (type == DataType::Type::kInt16) {
1659           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1660         }
1661         break;
1662       case DataType::Type::kInt32:
1663       case DataType::Type::kReference:
1664       case DataType::Type::kInt64:
1665         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1666         {
1667           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1668           __ ldar(Register(dst), base);
1669           if (needs_null_check) {
1670             MaybeRecordImplicitNullCheck(instruction);
1671           }
1672         }
1673         break;
1674       case DataType::Type::kFloat32:
1675       case DataType::Type::kFloat64: {
1676         DCHECK(dst.IsFPRegister());
1677         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1678 
1679         Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1680         {
1681           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1682           __ ldar(temp, base);
1683           if (needs_null_check) {
1684             MaybeRecordImplicitNullCheck(instruction);
1685           }
1686         }
1687         __ Fmov(VRegister(dst), temp);
1688         break;
1689       }
1690       case DataType::Type::kUint32:
1691       case DataType::Type::kUint64:
1692       case DataType::Type::kVoid:
1693         LOG(FATAL) << "Unreachable type " << type;
1694     }
1695   }
1696 }
1697 
Store(DataType::Type type,CPURegister src,const MemOperand & dst)1698 void CodeGeneratorARM64::Store(DataType::Type type,
1699                                CPURegister src,
1700                                const MemOperand& dst) {
1701   switch (type) {
1702     case DataType::Type::kBool:
1703     case DataType::Type::kUint8:
1704     case DataType::Type::kInt8:
1705       __ Strb(Register(src), dst);
1706       break;
1707     case DataType::Type::kUint16:
1708     case DataType::Type::kInt16:
1709       __ Strh(Register(src), dst);
1710       break;
1711     case DataType::Type::kInt32:
1712     case DataType::Type::kReference:
1713     case DataType::Type::kInt64:
1714     case DataType::Type::kFloat32:
1715     case DataType::Type::kFloat64:
1716       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1717       __ Str(src, dst);
1718       break;
1719     case DataType::Type::kUint32:
1720     case DataType::Type::kUint64:
1721     case DataType::Type::kVoid:
1722       LOG(FATAL) << "Unreachable type " << type;
1723   }
1724 }
1725 
StoreRelease(HInstruction * instruction,DataType::Type type,CPURegister src,const MemOperand & dst,bool needs_null_check)1726 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
1727                                       DataType::Type type,
1728                                       CPURegister src,
1729                                       const MemOperand& dst,
1730                                       bool needs_null_check) {
1731   MacroAssembler* masm = GetVIXLAssembler();
1732   UseScratchRegisterScope temps(GetVIXLAssembler());
1733   Register temp_base = temps.AcquireX();
1734 
1735   DCHECK(!dst.IsPreIndex());
1736   DCHECK(!dst.IsPostIndex());
1737 
1738   // TODO(vixl): Let the MacroAssembler handle this.
1739   Operand op = OperandFromMemOperand(dst);
1740   __ Add(temp_base, dst.GetBaseRegister(), op);
1741   MemOperand base = MemOperand(temp_base);
1742   // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
1743   switch (type) {
1744     case DataType::Type::kBool:
1745     case DataType::Type::kUint8:
1746     case DataType::Type::kInt8:
1747       {
1748         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1749         __ stlrb(Register(src), base);
1750         if (needs_null_check) {
1751           MaybeRecordImplicitNullCheck(instruction);
1752         }
1753       }
1754       break;
1755     case DataType::Type::kUint16:
1756     case DataType::Type::kInt16:
1757       {
1758         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1759         __ stlrh(Register(src), base);
1760         if (needs_null_check) {
1761           MaybeRecordImplicitNullCheck(instruction);
1762         }
1763       }
1764       break;
1765     case DataType::Type::kInt32:
1766     case DataType::Type::kReference:
1767     case DataType::Type::kInt64:
1768       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1769       {
1770         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1771         __ stlr(Register(src), base);
1772         if (needs_null_check) {
1773           MaybeRecordImplicitNullCheck(instruction);
1774         }
1775       }
1776       break;
1777     case DataType::Type::kFloat32:
1778     case DataType::Type::kFloat64: {
1779       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1780       Register temp_src;
1781       if (src.IsZero()) {
1782         // The zero register is used to avoid synthesizing zero constants.
1783         temp_src = Register(src);
1784       } else {
1785         DCHECK(src.IsFPRegister());
1786         temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1787         __ Fmov(temp_src, VRegister(src));
1788       }
1789       {
1790         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1791         __ stlr(temp_src, base);
1792         if (needs_null_check) {
1793           MaybeRecordImplicitNullCheck(instruction);
1794         }
1795       }
1796       break;
1797     }
1798     case DataType::Type::kUint32:
1799     case DataType::Type::kUint64:
1800     case DataType::Type::kVoid:
1801       LOG(FATAL) << "Unreachable type " << type;
1802   }
1803 }
1804 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1805 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1806                                        HInstruction* instruction,
1807                                        uint32_t dex_pc,
1808                                        SlowPathCode* slow_path) {
1809   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1810 
1811   ThreadOffset64 entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entrypoint);
1812   // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
1813   // entire oat file. This adds an extra branch and we do not want to slow down the main path.
1814   // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
1815   if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
1816     __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
1817     // Ensure the pc position is recorded immediately after the `blr` instruction.
1818     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
1819     __ blr(lr);
1820     if (EntrypointRequiresStackMap(entrypoint)) {
1821       RecordPcInfo(instruction, dex_pc, slow_path);
1822     }
1823   } else {
1824     // Ensure the pc position is recorded immediately after the `bl` instruction.
1825     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
1826     EmitEntrypointThunkCall(entrypoint_offset);
1827     if (EntrypointRequiresStackMap(entrypoint)) {
1828       RecordPcInfo(instruction, dex_pc, slow_path);
1829     }
1830   }
1831 }
1832 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1833 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1834                                                              HInstruction* instruction,
1835                                                              SlowPathCode* slow_path) {
1836   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1837   __ Ldr(lr, MemOperand(tr, entry_point_offset));
1838   __ Blr(lr);
1839 }
1840 
GenerateClassInitializationCheck(SlowPathCodeARM64 * slow_path,Register class_reg)1841 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
1842                                                                      Register class_reg) {
1843   UseScratchRegisterScope temps(GetVIXLAssembler());
1844   Register temp = temps.AcquireW();
1845   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
1846   const size_t status_byte_offset =
1847       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
1848   constexpr uint32_t shifted_visibly_initialized_value =
1849       enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
1850 
1851   // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize
1852   // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code
1853   // size, load only the high byte of the field and compare with 0xf0.
1854   // Note: The same code size could be achieved with LDR+MNV(asr #24)+CBNZ but benchmarks
1855   // show that this pattern is slower (tested on little cores).
1856   __ Ldrb(temp, HeapOperand(class_reg, status_byte_offset));
1857   __ Cmp(temp, shifted_visibly_initialized_value);
1858   __ B(lo, slow_path->GetEntryLabel());
1859   __ Bind(slow_path->GetExitLabel());
1860 }
1861 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl::aarch64::Register temp)1862 void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
1863     HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
1864   uint32_t path_to_root = check->GetBitstringPathToRoot();
1865   uint32_t mask = check->GetBitstringMask();
1866   DCHECK(IsPowerOfTwo(mask + 1));
1867   size_t mask_bits = WhichPowerOf2(mask + 1);
1868 
1869   if (mask_bits == 16u) {
1870     // Load only the bitstring part of the status word.
1871     __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
1872   } else {
1873     // /* uint32_t */ temp = temp->status_
1874     __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
1875     // Extract the bitstring bits.
1876     __ Ubfx(temp, temp, 0, mask_bits);
1877   }
1878   // Compare the bitstring bits to `path_to_root`.
1879   __ Cmp(temp, path_to_root);
1880 }
1881 
GenerateMemoryBarrier(MemBarrierKind kind)1882 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
1883   BarrierType type = BarrierAll;
1884 
1885   switch (kind) {
1886     case MemBarrierKind::kAnyAny:
1887     case MemBarrierKind::kAnyStore: {
1888       type = BarrierAll;
1889       break;
1890     }
1891     case MemBarrierKind::kLoadAny: {
1892       type = BarrierReads;
1893       break;
1894     }
1895     case MemBarrierKind::kStoreStore: {
1896       type = BarrierWrites;
1897       break;
1898     }
1899     default:
1900       LOG(FATAL) << "Unexpected memory barrier " << kind;
1901   }
1902   __ Dmb(InnerShareable, type);
1903 }
1904 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)1905 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
1906                                                          HBasicBlock* successor) {
1907   SuspendCheckSlowPathARM64* slow_path =
1908       down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
1909   if (slow_path == nullptr) {
1910     slow_path =
1911         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor);
1912     instruction->SetSlowPath(slow_path);
1913     codegen_->AddSlowPath(slow_path);
1914     if (successor != nullptr) {
1915       DCHECK(successor->IsLoopHeader());
1916     }
1917   } else {
1918     DCHECK_EQ(slow_path->GetSuccessor(), successor);
1919   }
1920 
1921   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
1922   Register temp = temps.AcquireW();
1923 
1924   __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
1925   if (successor == nullptr) {
1926     __ Cbnz(temp, slow_path->GetEntryLabel());
1927     __ Bind(slow_path->GetReturnLabel());
1928   } else {
1929     __ Cbz(temp, codegen_->GetLabelOf(successor));
1930     __ B(slow_path->GetEntryLabel());
1931     // slow_path will return to GetLabelOf(successor).
1932   }
1933 }
1934 
InstructionCodeGeneratorARM64(HGraph * graph,CodeGeneratorARM64 * codegen)1935 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
1936                                                              CodeGeneratorARM64* codegen)
1937       : InstructionCodeGenerator(graph, codegen),
1938         assembler_(codegen->GetAssembler()),
1939         codegen_(codegen) {}
1940 
HandleBinaryOp(HBinaryOperation * instr)1941 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
1942   DCHECK_EQ(instr->InputCount(), 2U);
1943   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
1944   DataType::Type type = instr->GetResultType();
1945   switch (type) {
1946     case DataType::Type::kInt32:
1947     case DataType::Type::kInt64:
1948       locations->SetInAt(0, Location::RequiresRegister());
1949       locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
1950       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1951       break;
1952 
1953     case DataType::Type::kFloat32:
1954     case DataType::Type::kFloat64:
1955       locations->SetInAt(0, Location::RequiresFpuRegister());
1956       locations->SetInAt(1, Location::RequiresFpuRegister());
1957       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
1958       break;
1959 
1960     default:
1961       LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
1962   }
1963 }
1964 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)1965 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
1966                                            const FieldInfo& field_info) {
1967   DCHECK(instruction->IsInstanceFieldGet() ||
1968          instruction->IsStaticFieldGet() ||
1969          instruction->IsPredicatedInstanceFieldGet());
1970 
1971   bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
1972 
1973   bool object_field_get_with_read_barrier =
1974       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
1975   LocationSummary* locations =
1976       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
1977                                                        object_field_get_with_read_barrier
1978                                                            ? LocationSummary::kCallOnSlowPath
1979                                                            : LocationSummary::kNoCall);
1980   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
1981     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1982     // We need a temporary register for the read barrier load in
1983     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
1984     // only if the field is volatile or the offset is too big.
1985     if (field_info.IsVolatile() ||
1986         field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
1987       locations->AddTemp(FixedTempLocation());
1988     }
1989   }
1990   // Input for object receiver.
1991   locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
1992   if (DataType::IsFloatingPointType(instruction->GetType())) {
1993     if (is_predicated) {
1994       locations->SetInAt(0, Location::RequiresFpuRegister());
1995       locations->SetOut(Location::SameAsFirstInput());
1996     } else {
1997       locations->SetOut(Location::RequiresFpuRegister());
1998     }
1999   } else {
2000     if (is_predicated) {
2001       locations->SetInAt(0, Location::RequiresRegister());
2002       locations->SetOut(Location::SameAsFirstInput());
2003     } else {
2004       // The output overlaps for an object field get when read barriers
2005       // are enabled: we do not want the load to overwrite the object's
2006       // location, as we need it to emit the read barrier.
2007       locations->SetOut(Location::RequiresRegister(),
2008                         object_field_get_with_read_barrier ? Location::kOutputOverlap
2009                                                            : Location::kNoOutputOverlap);
2010     }
2011   }
2012 }
2013 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2014 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
2015                                                    const FieldInfo& field_info) {
2016   DCHECK(instruction->IsInstanceFieldGet() ||
2017          instruction->IsStaticFieldGet() ||
2018          instruction->IsPredicatedInstanceFieldGet());
2019   bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
2020   LocationSummary* locations = instruction->GetLocations();
2021   uint32_t receiver_input = is_predicated ? 1 : 0;
2022   Location base_loc = locations->InAt(receiver_input);
2023   Location out = locations->Out();
2024   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
2025   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
2026   DataType::Type load_type = instruction->GetType();
2027   MemOperand field =
2028       HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset());
2029 
2030   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier &&
2031       load_type == DataType::Type::kReference) {
2032     // Object FieldGet with Baker's read barrier case.
2033     // /* HeapReference<Object> */ out = *(base + offset)
2034     Register base = RegisterFrom(base_loc, DataType::Type::kReference);
2035     Location maybe_temp =
2036         (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2037     // Note that potential implicit null checks are handled in this
2038     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
2039     codegen_->GenerateFieldLoadWithBakerReadBarrier(
2040         instruction,
2041         out,
2042         base,
2043         offset,
2044         maybe_temp,
2045         /* needs_null_check= */ true,
2046         field_info.IsVolatile());
2047   } else {
2048     // General case.
2049     if (field_info.IsVolatile()) {
2050       // Note that a potential implicit null check is handled in this
2051       // CodeGeneratorARM64::LoadAcquire call.
2052       // NB: LoadAcquire will record the pc info if needed.
2053       codegen_->LoadAcquire(instruction,
2054                             load_type,
2055                             OutputCPURegister(instruction),
2056                             field,
2057                             /* needs_null_check= */ true);
2058     } else {
2059       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2060       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2061       codegen_->Load(load_type, OutputCPURegister(instruction), field);
2062       codegen_->MaybeRecordImplicitNullCheck(instruction);
2063     }
2064     if (load_type == DataType::Type::kReference) {
2065       // If read barriers are enabled, emit read barriers other than
2066       // Baker's using a slow path (and also unpoison the loaded
2067       // reference, if heap poisoning is enabled).
2068       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2069     }
2070   }
2071 }
2072 
HandleFieldSet(HInstruction * instruction)2073 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2074   LocationSummary* locations =
2075       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2076   locations->SetInAt(0, Location::RequiresRegister());
2077   if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
2078     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
2079   } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
2080     locations->SetInAt(1, Location::RequiresFpuRegister());
2081   } else {
2082     locations->SetInAt(1, Location::RequiresRegister());
2083   }
2084 }
2085 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)2086 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2087                                                    const FieldInfo& field_info,
2088                                                    bool value_can_be_null) {
2089   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2090   bool is_predicated =
2091       instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
2092 
2093   Register obj = InputRegisterAt(instruction, 0);
2094   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2095   CPURegister source = value;
2096   Offset offset = field_info.GetFieldOffset();
2097   DataType::Type field_type = field_info.GetFieldType();
2098   std::optional<vixl::aarch64::Label> pred_is_null;
2099   if (is_predicated) {
2100     pred_is_null.emplace();
2101     __ Cbz(obj, &*pred_is_null);
2102   }
2103 
2104   {
2105     // We use a block to end the scratch scope before the write barrier, thus
2106     // freeing the temporary registers so they can be used in `MarkGCCard`.
2107     UseScratchRegisterScope temps(GetVIXLAssembler());
2108 
2109     if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
2110       DCHECK(value.IsW());
2111       Register temp = temps.AcquireW();
2112       __ Mov(temp, value.W());
2113       GetAssembler()->PoisonHeapReference(temp.W());
2114       source = temp;
2115     }
2116 
2117     if (field_info.IsVolatile()) {
2118       codegen_->StoreRelease(
2119           instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true);
2120     } else {
2121       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2122       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2123       codegen_->Store(field_type, source, HeapOperand(obj, offset));
2124       codegen_->MaybeRecordImplicitNullCheck(instruction);
2125     }
2126   }
2127 
2128   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
2129     codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
2130   }
2131 
2132   if (is_predicated) {
2133     __ Bind(&*pred_is_null);
2134   }
2135 }
2136 
HandleBinaryOp(HBinaryOperation * instr)2137 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2138   DataType::Type type = instr->GetType();
2139 
2140   switch (type) {
2141     case DataType::Type::kInt32:
2142     case DataType::Type::kInt64: {
2143       Register dst = OutputRegister(instr);
2144       Register lhs = InputRegisterAt(instr, 0);
2145       Operand rhs = InputOperandAt(instr, 1);
2146       if (instr->IsAdd()) {
2147         __ Add(dst, lhs, rhs);
2148       } else if (instr->IsAnd()) {
2149         __ And(dst, lhs, rhs);
2150       } else if (instr->IsOr()) {
2151         __ Orr(dst, lhs, rhs);
2152       } else if (instr->IsSub()) {
2153         __ Sub(dst, lhs, rhs);
2154       } else if (instr->IsRor()) {
2155         if (rhs.IsImmediate()) {
2156           uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2157           __ Ror(dst, lhs, shift);
2158         } else {
2159           // Ensure shift distance is in the same size register as the result. If
2160           // we are rotating a long and the shift comes in a w register originally,
2161           // we don't need to sxtw for use as an x since the shift distances are
2162           // all & reg_bits - 1.
2163           __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2164         }
2165       } else if (instr->IsMin() || instr->IsMax()) {
2166           __ Cmp(lhs, rhs);
2167           __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt);
2168       } else {
2169         DCHECK(instr->IsXor());
2170         __ Eor(dst, lhs, rhs);
2171       }
2172       break;
2173     }
2174     case DataType::Type::kFloat32:
2175     case DataType::Type::kFloat64: {
2176       VRegister dst = OutputFPRegister(instr);
2177       VRegister lhs = InputFPRegisterAt(instr, 0);
2178       VRegister rhs = InputFPRegisterAt(instr, 1);
2179       if (instr->IsAdd()) {
2180         __ Fadd(dst, lhs, rhs);
2181       } else if (instr->IsSub()) {
2182         __ Fsub(dst, lhs, rhs);
2183       } else if (instr->IsMin()) {
2184         __ Fmin(dst, lhs, rhs);
2185       } else if (instr->IsMax()) {
2186         __ Fmax(dst, lhs, rhs);
2187       } else {
2188         LOG(FATAL) << "Unexpected floating-point binary operation";
2189       }
2190       break;
2191     }
2192     default:
2193       LOG(FATAL) << "Unexpected binary operation type " << type;
2194   }
2195 }
2196 
HandleShift(HBinaryOperation * instr)2197 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2198   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2199 
2200   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2201   DataType::Type type = instr->GetResultType();
2202   switch (type) {
2203     case DataType::Type::kInt32:
2204     case DataType::Type::kInt64: {
2205       locations->SetInAt(0, Location::RequiresRegister());
2206       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2207       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2208       break;
2209     }
2210     default:
2211       LOG(FATAL) << "Unexpected shift type " << type;
2212   }
2213 }
2214 
HandleShift(HBinaryOperation * instr)2215 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2216   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2217 
2218   DataType::Type type = instr->GetType();
2219   switch (type) {
2220     case DataType::Type::kInt32:
2221     case DataType::Type::kInt64: {
2222       Register dst = OutputRegister(instr);
2223       Register lhs = InputRegisterAt(instr, 0);
2224       Operand rhs = InputOperandAt(instr, 1);
2225       if (rhs.IsImmediate()) {
2226         uint32_t shift_value = rhs.GetImmediate() &
2227             (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2228         if (instr->IsShl()) {
2229           __ Lsl(dst, lhs, shift_value);
2230         } else if (instr->IsShr()) {
2231           __ Asr(dst, lhs, shift_value);
2232         } else {
2233           __ Lsr(dst, lhs, shift_value);
2234         }
2235       } else {
2236         Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2237 
2238         if (instr->IsShl()) {
2239           __ Lsl(dst, lhs, rhs_reg);
2240         } else if (instr->IsShr()) {
2241           __ Asr(dst, lhs, rhs_reg);
2242         } else {
2243           __ Lsr(dst, lhs, rhs_reg);
2244         }
2245       }
2246       break;
2247     }
2248     default:
2249       LOG(FATAL) << "Unexpected shift operation type " << type;
2250   }
2251 }
2252 
VisitAdd(HAdd * instruction)2253 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2254   HandleBinaryOp(instruction);
2255 }
2256 
VisitAdd(HAdd * instruction)2257 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2258   HandleBinaryOp(instruction);
2259 }
2260 
VisitAnd(HAnd * instruction)2261 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2262   HandleBinaryOp(instruction);
2263 }
2264 
VisitAnd(HAnd * instruction)2265 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2266   HandleBinaryOp(instruction);
2267 }
2268 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2269 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2270   DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType();
2271   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2272   locations->SetInAt(0, Location::RequiresRegister());
2273   // There is no immediate variant of negated bitwise instructions in AArch64.
2274   locations->SetInAt(1, Location::RequiresRegister());
2275   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2276 }
2277 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2278 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2279   Register dst = OutputRegister(instr);
2280   Register lhs = InputRegisterAt(instr, 0);
2281   Register rhs = InputRegisterAt(instr, 1);
2282 
2283   switch (instr->GetOpKind()) {
2284     case HInstruction::kAnd:
2285       __ Bic(dst, lhs, rhs);
2286       break;
2287     case HInstruction::kOr:
2288       __ Orn(dst, lhs, rhs);
2289       break;
2290     case HInstruction::kXor:
2291       __ Eon(dst, lhs, rhs);
2292       break;
2293     default:
2294       LOG(FATAL) << "Unreachable";
2295   }
2296 }
2297 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2298 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2299     HDataProcWithShifterOp* instruction) {
2300   DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
2301          instruction->GetType() == DataType::Type::kInt64);
2302   LocationSummary* locations =
2303       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2304   if (instruction->GetInstrKind() == HInstruction::kNeg) {
2305     locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
2306   } else {
2307     locations->SetInAt(0, Location::RequiresRegister());
2308   }
2309   locations->SetInAt(1, Location::RequiresRegister());
2310   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2311 }
2312 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2313 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2314     HDataProcWithShifterOp* instruction) {
2315   DataType::Type type = instruction->GetType();
2316   HInstruction::InstructionKind kind = instruction->GetInstrKind();
2317   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
2318   Register out = OutputRegister(instruction);
2319   Register left;
2320   if (kind != HInstruction::kNeg) {
2321     left = InputRegisterAt(instruction, 0);
2322   }
2323   // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2324   // shifter operand operation, the IR generating `right_reg` (input to the type
2325   // conversion) can have a different type from the current instruction's type,
2326   // so we manually indicate the type.
2327   Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2328   Operand right_operand(0);
2329 
2330   HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2331   if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2332     right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2333   } else {
2334     right_operand = Operand(right_reg,
2335                             helpers::ShiftFromOpKind(op_kind),
2336                             instruction->GetShiftAmount());
2337   }
2338 
2339   // Logical binary operations do not support extension operations in the
2340   // operand. Note that VIXL would still manage if it was passed by generating
2341   // the extension as a separate instruction.
2342   // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2343   DCHECK(!right_operand.IsExtendedRegister() ||
2344          (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
2345           kind != HInstruction::kNeg));
2346   switch (kind) {
2347     case HInstruction::kAdd:
2348       __ Add(out, left, right_operand);
2349       break;
2350     case HInstruction::kAnd:
2351       __ And(out, left, right_operand);
2352       break;
2353     case HInstruction::kNeg:
2354       DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2355       __ Neg(out, right_operand);
2356       break;
2357     case HInstruction::kOr:
2358       __ Orr(out, left, right_operand);
2359       break;
2360     case HInstruction::kSub:
2361       __ Sub(out, left, right_operand);
2362       break;
2363     case HInstruction::kXor:
2364       __ Eor(out, left, right_operand);
2365       break;
2366     default:
2367       LOG(FATAL) << "Unexpected operation kind: " << kind;
2368       UNREACHABLE();
2369   }
2370 }
2371 
VisitIntermediateAddress(HIntermediateAddress * instruction)2372 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2373   LocationSummary* locations =
2374       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2375   locations->SetInAt(0, Location::RequiresRegister());
2376   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2377   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2378 }
2379 
VisitIntermediateAddress(HIntermediateAddress * instruction)2380 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2381   __ Add(OutputRegister(instruction),
2382          InputRegisterAt(instruction, 0),
2383          Operand(InputOperandAt(instruction, 1)));
2384 }
2385 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2386 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
2387   LocationSummary* locations =
2388       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2389 
2390   HIntConstant* shift = instruction->GetShift()->AsIntConstant();
2391 
2392   locations->SetInAt(0, Location::RequiresRegister());
2393   // For byte case we don't need to shift the index variable so we can encode the data offset into
2394   // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
2395   // data offset constant generation out of the loop and reduce the critical path length in the
2396   // loop.
2397   locations->SetInAt(1, shift->GetValue() == 0
2398                         ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
2399                         : Location::RequiresRegister());
2400   locations->SetInAt(2, Location::ConstantLocation(shift));
2401   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2402 }
2403 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2404 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
2405     HIntermediateAddressIndex* instruction) {
2406   Register index_reg = InputRegisterAt(instruction, 0);
2407   uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2));
2408   uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
2409 
2410   if (shift == 0) {
2411     __ Add(OutputRegister(instruction), index_reg, offset);
2412   } else {
2413     Register offset_reg = InputRegisterAt(instruction, 1);
2414     __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
2415   }
2416 }
2417 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2418 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2419   LocationSummary* locations =
2420       new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
2421   HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2422   if (instr->GetOpKind() == HInstruction::kSub &&
2423       accumulator->IsConstant() &&
2424       accumulator->AsConstant()->IsArithmeticZero()) {
2425     // Don't allocate register for Mneg instruction.
2426   } else {
2427     locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2428                        Location::RequiresRegister());
2429   }
2430   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2431   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2432   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2433 }
2434 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2435 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2436   Register res = OutputRegister(instr);
2437   Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2438   Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2439 
2440   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2441   // This fixup should be carried out for all multiply-accumulate instructions:
2442   // madd, msub, smaddl, smsubl, umaddl and umsubl.
2443   if (instr->GetType() == DataType::Type::kInt64 &&
2444       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2445     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2446     vixl::aarch64::Instruction* prev =
2447         masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
2448     if (prev->IsLoadOrStore()) {
2449       // Make sure we emit only exactly one nop.
2450       ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2451       __ nop();
2452     }
2453   }
2454 
2455   if (instr->GetOpKind() == HInstruction::kAdd) {
2456     Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2457     __ Madd(res, mul_left, mul_right, accumulator);
2458   } else {
2459     DCHECK(instr->GetOpKind() == HInstruction::kSub);
2460     HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2461     if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2462       __ Mneg(res, mul_left, mul_right);
2463     } else {
2464       Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2465       __ Msub(res, mul_left, mul_right, accumulator);
2466     }
2467   }
2468 }
2469 
VisitArrayGet(HArrayGet * instruction)2470 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2471   bool object_array_get_with_read_barrier =
2472       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
2473   LocationSummary* locations =
2474       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2475                                                        object_array_get_with_read_barrier
2476                                                            ? LocationSummary::kCallOnSlowPath
2477                                                            : LocationSummary::kNoCall);
2478   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2479     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2480     if (instruction->GetIndex()->IsConstant()) {
2481       // Array loads with constant index are treated as field loads.
2482       // We need a temporary register for the read barrier load in
2483       // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2484       // only if the offset is too big.
2485       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2486       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2487       offset += index << DataType::SizeShift(DataType::Type::kReference);
2488       if (offset >= kReferenceLoadMinFarOffset) {
2489         locations->AddTemp(FixedTempLocation());
2490       }
2491     } else if (!instruction->GetArray()->IsIntermediateAddress()) {
2492       // We need a non-scratch temporary for the array data pointer in
2493       // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no
2494       // intermediate address.
2495       locations->AddTemp(Location::RequiresRegister());
2496     }
2497   }
2498   locations->SetInAt(0, Location::RequiresRegister());
2499   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2500   if (DataType::IsFloatingPointType(instruction->GetType())) {
2501     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2502   } else {
2503     // The output overlaps in the case of an object array get with
2504     // read barriers enabled: we do not want the move to overwrite the
2505     // array's location, as we need it to emit the read barrier.
2506     locations->SetOut(
2507         Location::RequiresRegister(),
2508         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2509   }
2510 }
2511 
VisitArrayGet(HArrayGet * instruction)2512 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2513   DataType::Type type = instruction->GetType();
2514   Register obj = InputRegisterAt(instruction, 0);
2515   LocationSummary* locations = instruction->GetLocations();
2516   Location index = locations->InAt(1);
2517   Location out = locations->Out();
2518   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2519   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2520                                         instruction->IsStringCharAt();
2521   MacroAssembler* masm = GetVIXLAssembler();
2522   UseScratchRegisterScope temps(masm);
2523 
2524   // The non-Baker read barrier instrumentation of object ArrayGet instructions
2525   // does not support the HIntermediateAddress instruction.
2526   DCHECK(!((type == DataType::Type::kReference) &&
2527            instruction->GetArray()->IsIntermediateAddress() &&
2528            kEmitCompilerReadBarrier &&
2529            !kUseBakerReadBarrier));
2530 
2531   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2532     // Object ArrayGet with Baker's read barrier case.
2533     // Note that a potential implicit null check is handled in the
2534     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2535     DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
2536     if (index.IsConstant()) {
2537       DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2538       // Array load with a constant index can be treated as a field load.
2539       offset += Int64FromLocation(index) << DataType::SizeShift(type);
2540       Location maybe_temp =
2541           (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2542       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2543                                                       out,
2544                                                       obj.W(),
2545                                                       offset,
2546                                                       maybe_temp,
2547                                                       /* needs_null_check= */ false,
2548                                                       /* use_load_acquire= */ false);
2549     } else {
2550       codegen_->GenerateArrayLoadWithBakerReadBarrier(
2551           instruction, out, obj.W(), offset, index, /* needs_null_check= */ false);
2552     }
2553   } else {
2554     // General case.
2555     MemOperand source = HeapOperand(obj);
2556     Register length;
2557     if (maybe_compressed_char_at) {
2558       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2559       length = temps.AcquireW();
2560       {
2561         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2562         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2563 
2564         if (instruction->GetArray()->IsIntermediateAddress()) {
2565           DCHECK_LT(count_offset, offset);
2566           int64_t adjusted_offset =
2567               static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2568           // Note that `adjusted_offset` is negative, so this will be a LDUR.
2569           __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2570         } else {
2571           __ Ldr(length, HeapOperand(obj, count_offset));
2572         }
2573         codegen_->MaybeRecordImplicitNullCheck(instruction);
2574       }
2575     }
2576     if (index.IsConstant()) {
2577       if (maybe_compressed_char_at) {
2578         vixl::aarch64::Label uncompressed_load, done;
2579         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2580                       "Expecting 0=compressed, 1=uncompressed");
2581         __ Tbnz(length.W(), 0, &uncompressed_load);
2582         __ Ldrb(Register(OutputCPURegister(instruction)),
2583                 HeapOperand(obj, offset + Int64FromLocation(index)));
2584         __ B(&done);
2585         __ Bind(&uncompressed_load);
2586         __ Ldrh(Register(OutputCPURegister(instruction)),
2587                 HeapOperand(obj, offset + (Int64FromLocation(index) << 1)));
2588         __ Bind(&done);
2589       } else {
2590         offset += Int64FromLocation(index) << DataType::SizeShift(type);
2591         source = HeapOperand(obj, offset);
2592       }
2593     } else {
2594       Register temp = temps.AcquireSameSizeAs(obj);
2595       if (instruction->GetArray()->IsIntermediateAddress()) {
2596         // We do not need to compute the intermediate address from the array: the
2597         // input instruction has done it already. See the comment in
2598         // `TryExtractArrayAccessAddress()`.
2599         if (kIsDebugBuild) {
2600           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2601           DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2602         }
2603         temp = obj;
2604       } else {
2605         __ Add(temp, obj, offset);
2606       }
2607       if (maybe_compressed_char_at) {
2608         vixl::aarch64::Label uncompressed_load, done;
2609         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2610                       "Expecting 0=compressed, 1=uncompressed");
2611         __ Tbnz(length.W(), 0, &uncompressed_load);
2612         __ Ldrb(Register(OutputCPURegister(instruction)),
2613                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2614         __ B(&done);
2615         __ Bind(&uncompressed_load);
2616         __ Ldrh(Register(OutputCPURegister(instruction)),
2617                 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2618         __ Bind(&done);
2619       } else {
2620         source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type));
2621       }
2622     }
2623     if (!maybe_compressed_char_at) {
2624       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2625       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2626       codegen_->Load(type, OutputCPURegister(instruction), source);
2627       codegen_->MaybeRecordImplicitNullCheck(instruction);
2628     }
2629 
2630     if (type == DataType::Type::kReference) {
2631       static_assert(
2632           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2633           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2634       Location obj_loc = locations->InAt(0);
2635       if (index.IsConstant()) {
2636         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2637       } else {
2638         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2639       }
2640     }
2641   }
2642 }
2643 
VisitArrayLength(HArrayLength * instruction)2644 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2645   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2646   locations->SetInAt(0, Location::RequiresRegister());
2647   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2648 }
2649 
VisitArrayLength(HArrayLength * instruction)2650 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2651   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2652   vixl::aarch64::Register out = OutputRegister(instruction);
2653   {
2654     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2655     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2656     __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2657     codegen_->MaybeRecordImplicitNullCheck(instruction);
2658   }
2659   // Mask out compression flag from String's array length.
2660   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2661     __ Lsr(out.W(), out.W(), 1u);
2662   }
2663 }
2664 
VisitArraySet(HArraySet * instruction)2665 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2666   DataType::Type value_type = instruction->GetComponentType();
2667 
2668   bool needs_type_check = instruction->NeedsTypeCheck();
2669   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
2670       instruction,
2671       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
2672   locations->SetInAt(0, Location::RequiresRegister());
2673   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2674   if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
2675     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
2676   } else if (DataType::IsFloatingPointType(value_type)) {
2677     locations->SetInAt(2, Location::RequiresFpuRegister());
2678   } else {
2679     locations->SetInAt(2, Location::RequiresRegister());
2680   }
2681 }
2682 
VisitArraySet(HArraySet * instruction)2683 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2684   DataType::Type value_type = instruction->GetComponentType();
2685   LocationSummary* locations = instruction->GetLocations();
2686   bool needs_type_check = instruction->NeedsTypeCheck();
2687   bool needs_write_barrier =
2688       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
2689 
2690   Register array = InputRegisterAt(instruction, 0);
2691   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2692   CPURegister source = value;
2693   Location index = locations->InAt(1);
2694   size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
2695   MemOperand destination = HeapOperand(array);
2696   MacroAssembler* masm = GetVIXLAssembler();
2697 
2698   if (!needs_write_barrier) {
2699     DCHECK(!needs_type_check);
2700     if (index.IsConstant()) {
2701       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2702       destination = HeapOperand(array, offset);
2703     } else {
2704       UseScratchRegisterScope temps(masm);
2705       Register temp = temps.AcquireSameSizeAs(array);
2706       if (instruction->GetArray()->IsIntermediateAddress()) {
2707         // We do not need to compute the intermediate address from the array: the
2708         // input instruction has done it already. See the comment in
2709         // `TryExtractArrayAccessAddress()`.
2710         if (kIsDebugBuild) {
2711           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2712           DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2713         }
2714         temp = array;
2715       } else {
2716         __ Add(temp, array, offset);
2717       }
2718       destination = HeapOperand(temp,
2719                                 XRegisterFrom(index),
2720                                 LSL,
2721                                 DataType::SizeShift(value_type));
2722     }
2723     {
2724       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2725       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2726       codegen_->Store(value_type, value, destination);
2727       codegen_->MaybeRecordImplicitNullCheck(instruction);
2728     }
2729   } else {
2730     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2731 
2732     bool can_value_be_null = instruction->GetValueCanBeNull();
2733     vixl::aarch64::Label do_store;
2734     if (can_value_be_null) {
2735       __ Cbz(Register(value), &do_store);
2736     }
2737 
2738     SlowPathCodeARM64* slow_path = nullptr;
2739     if (needs_type_check) {
2740       slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
2741       codegen_->AddSlowPath(slow_path);
2742 
2743       const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2744       const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2745       const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2746 
2747       UseScratchRegisterScope temps(masm);
2748       Register temp = temps.AcquireSameSizeAs(array);
2749       Register temp2 = temps.AcquireSameSizeAs(array);
2750 
2751       // Note that when Baker read barriers are enabled, the type
2752       // checks are performed without read barriers.  This is fine,
2753       // even in the case where a class object is in the from-space
2754       // after the flip, as a comparison involving such a type would
2755       // not produce a false positive; it may of course produce a
2756       // false negative, in which case we would take the ArraySet
2757       // slow path.
2758 
2759       // /* HeapReference<Class> */ temp = array->klass_
2760       {
2761         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2762         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2763         __ Ldr(temp, HeapOperand(array, class_offset));
2764         codegen_->MaybeRecordImplicitNullCheck(instruction);
2765       }
2766       GetAssembler()->MaybeUnpoisonHeapReference(temp);
2767 
2768       // /* HeapReference<Class> */ temp = temp->component_type_
2769       __ Ldr(temp, HeapOperand(temp, component_offset));
2770       // /* HeapReference<Class> */ temp2 = value->klass_
2771       __ Ldr(temp2, HeapOperand(Register(value), class_offset));
2772       // If heap poisoning is enabled, no need to unpoison `temp`
2773       // nor `temp2`, as we are comparing two poisoned references.
2774       __ Cmp(temp, temp2);
2775 
2776       if (instruction->StaticTypeOfArrayIsObjectArray()) {
2777         vixl::aarch64::Label do_put;
2778         __ B(eq, &do_put);
2779         // If heap poisoning is enabled, the `temp` reference has
2780         // not been unpoisoned yet; unpoison it now.
2781         GetAssembler()->MaybeUnpoisonHeapReference(temp);
2782 
2783         // /* HeapReference<Class> */ temp = temp->super_class_
2784         __ Ldr(temp, HeapOperand(temp, super_offset));
2785         // If heap poisoning is enabled, no need to unpoison
2786         // `temp`, as we are comparing against null below.
2787         __ Cbnz(temp, slow_path->GetEntryLabel());
2788         __ Bind(&do_put);
2789       } else {
2790         __ B(ne, slow_path->GetEntryLabel());
2791       }
2792     }
2793 
2794     codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false);
2795 
2796     if (can_value_be_null) {
2797       DCHECK(do_store.IsLinked());
2798       __ Bind(&do_store);
2799     }
2800 
2801     UseScratchRegisterScope temps(masm);
2802     if (kPoisonHeapReferences) {
2803       Register temp_source = temps.AcquireSameSizeAs(array);
2804         DCHECK(value.IsW());
2805       __ Mov(temp_source, value.W());
2806       GetAssembler()->PoisonHeapReference(temp_source);
2807       source = temp_source;
2808     }
2809 
2810     if (index.IsConstant()) {
2811       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2812       destination = HeapOperand(array, offset);
2813     } else {
2814       Register temp_base = temps.AcquireSameSizeAs(array);
2815       __ Add(temp_base, array, offset);
2816       destination = HeapOperand(temp_base,
2817                                 XRegisterFrom(index),
2818                                 LSL,
2819                                 DataType::SizeShift(value_type));
2820     }
2821 
2822     {
2823       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2824       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2825       __ Str(source, destination);
2826 
2827       if (can_value_be_null || !needs_type_check) {
2828         codegen_->MaybeRecordImplicitNullCheck(instruction);
2829       }
2830     }
2831 
2832     if (slow_path != nullptr) {
2833       __ Bind(slow_path->GetExitLabel());
2834     }
2835   }
2836 }
2837 
VisitBoundsCheck(HBoundsCheck * instruction)2838 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
2839   RegisterSet caller_saves = RegisterSet::Empty();
2840   InvokeRuntimeCallingConvention calling_convention;
2841   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
2842   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
2843   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
2844 
2845   // If both index and length are constant, we can check the bounds statically and
2846   // generate code accordingly. We want to make sure we generate constant locations
2847   // in that case, regardless of whether they are encodable in the comparison or not.
2848   HInstruction* index = instruction->InputAt(0);
2849   HInstruction* length = instruction->InputAt(1);
2850   bool both_const = index->IsConstant() && length->IsConstant();
2851   locations->SetInAt(0, both_const
2852       ? Location::ConstantLocation(index->AsConstant())
2853       : ARM64EncodableConstantOrRegister(index, instruction));
2854   locations->SetInAt(1, both_const
2855       ? Location::ConstantLocation(length->AsConstant())
2856       : ARM64EncodableConstantOrRegister(length, instruction));
2857 }
2858 
VisitBoundsCheck(HBoundsCheck * instruction)2859 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
2860   LocationSummary* locations = instruction->GetLocations();
2861   Location index_loc = locations->InAt(0);
2862   Location length_loc = locations->InAt(1);
2863 
2864   int cmp_first_input = 0;
2865   int cmp_second_input = 1;
2866   Condition cond = hs;
2867 
2868   if (index_loc.IsConstant()) {
2869     int64_t index = Int64FromLocation(index_loc);
2870     if (length_loc.IsConstant()) {
2871       int64_t length = Int64FromLocation(length_loc);
2872       if (index < 0 || index >= length) {
2873         BoundsCheckSlowPathARM64* slow_path =
2874             new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
2875         codegen_->AddSlowPath(slow_path);
2876         __ B(slow_path->GetEntryLabel());
2877       } else {
2878         // BCE will remove the bounds check if we are guaranteed to pass.
2879         // However, some optimization after BCE may have generated this, and we should not
2880         // generate a bounds check if it is a valid range.
2881       }
2882       return;
2883     }
2884     // Only the index is constant: change the order of the operands and commute the condition
2885     // so we can use an immediate constant for the index (only the second input to a cmp
2886     // instruction can be an immediate).
2887     cmp_first_input = 1;
2888     cmp_second_input = 0;
2889     cond = ls;
2890   }
2891   BoundsCheckSlowPathARM64* slow_path =
2892       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
2893   __ Cmp(InputRegisterAt(instruction, cmp_first_input),
2894          InputOperandAt(instruction, cmp_second_input));
2895   codegen_->AddSlowPath(slow_path);
2896   __ B(slow_path->GetEntryLabel(), cond);
2897 }
2898 
VisitClinitCheck(HClinitCheck * check)2899 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
2900   LocationSummary* locations =
2901       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
2902   locations->SetInAt(0, Location::RequiresRegister());
2903   if (check->HasUses()) {
2904     locations->SetOut(Location::SameAsFirstInput());
2905   }
2906   // Rely on the type initialization to save everything we need.
2907   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
2908 }
2909 
VisitClinitCheck(HClinitCheck * check)2910 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
2911   // We assume the class is not null.
2912   SlowPathCodeARM64* slow_path =
2913       new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check);
2914   codegen_->AddSlowPath(slow_path);
2915   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
2916 }
2917 
IsFloatingPointZeroConstant(HInstruction * inst)2918 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
2919   return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
2920       || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
2921 }
2922 
GenerateFcmp(HInstruction * instruction)2923 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
2924   VRegister lhs_reg = InputFPRegisterAt(instruction, 0);
2925   Location rhs_loc = instruction->GetLocations()->InAt(1);
2926   if (rhs_loc.IsConstant()) {
2927     // 0.0 is the only immediate that can be encoded directly in
2928     // an FCMP instruction.
2929     //
2930     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
2931     // specify that in a floating-point comparison, positive zero
2932     // and negative zero are considered equal, so we can use the
2933     // literal 0.0 for both cases here.
2934     //
2935     // Note however that some methods (Float.equal, Float.compare,
2936     // Float.compareTo, Double.equal, Double.compare,
2937     // Double.compareTo, Math.max, Math.min, StrictMath.max,
2938     // StrictMath.min) consider 0.0 to be (strictly) greater than
2939     // -0.0. So if we ever translate calls to these methods into a
2940     // HCompare instruction, we must handle the -0.0 case with
2941     // care here.
2942     DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
2943     __ Fcmp(lhs_reg, 0.0);
2944   } else {
2945     __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
2946   }
2947 }
2948 
VisitCompare(HCompare * compare)2949 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
2950   LocationSummary* locations =
2951       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2952   DataType::Type in_type = compare->InputAt(0)->GetType();
2953   switch (in_type) {
2954     case DataType::Type::kBool:
2955     case DataType::Type::kUint8:
2956     case DataType::Type::kInt8:
2957     case DataType::Type::kUint16:
2958     case DataType::Type::kInt16:
2959     case DataType::Type::kInt32:
2960     case DataType::Type::kInt64: {
2961       locations->SetInAt(0, Location::RequiresRegister());
2962       locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
2963       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2964       break;
2965     }
2966     case DataType::Type::kFloat32:
2967     case DataType::Type::kFloat64: {
2968       locations->SetInAt(0, Location::RequiresFpuRegister());
2969       locations->SetInAt(1,
2970                          IsFloatingPointZeroConstant(compare->InputAt(1))
2971                              ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
2972                              : Location::RequiresFpuRegister());
2973       locations->SetOut(Location::RequiresRegister());
2974       break;
2975     }
2976     default:
2977       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
2978   }
2979 }
2980 
VisitCompare(HCompare * compare)2981 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
2982   DataType::Type in_type = compare->InputAt(0)->GetType();
2983 
2984   //  0 if: left == right
2985   //  1 if: left  > right
2986   // -1 if: left  < right
2987   switch (in_type) {
2988     case DataType::Type::kBool:
2989     case DataType::Type::kUint8:
2990     case DataType::Type::kInt8:
2991     case DataType::Type::kUint16:
2992     case DataType::Type::kInt16:
2993     case DataType::Type::kInt32:
2994     case DataType::Type::kInt64: {
2995       Register result = OutputRegister(compare);
2996       Register left = InputRegisterAt(compare, 0);
2997       Operand right = InputOperandAt(compare, 1);
2998       __ Cmp(left, right);
2999       __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
3000       __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
3001       break;
3002     }
3003     case DataType::Type::kFloat32:
3004     case DataType::Type::kFloat64: {
3005       Register result = OutputRegister(compare);
3006       GenerateFcmp(compare);
3007       __ Cset(result, ne);
3008       __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
3009       break;
3010     }
3011     default:
3012       LOG(FATAL) << "Unimplemented compare type " << in_type;
3013   }
3014 }
3015 
HandleCondition(HCondition * instruction)3016 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
3017   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
3018 
3019   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3020     locations->SetInAt(0, Location::RequiresFpuRegister());
3021     locations->SetInAt(1,
3022                        IsFloatingPointZeroConstant(instruction->InputAt(1))
3023                            ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
3024                            : Location::RequiresFpuRegister());
3025   } else {
3026     // Integer cases.
3027     locations->SetInAt(0, Location::RequiresRegister());
3028     locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3029   }
3030 
3031   if (!instruction->IsEmittedAtUseSite()) {
3032     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3033   }
3034 }
3035 
HandleCondition(HCondition * instruction)3036 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
3037   if (instruction->IsEmittedAtUseSite()) {
3038     return;
3039   }
3040 
3041   LocationSummary* locations = instruction->GetLocations();
3042   Register res = RegisterFrom(locations->Out(), instruction->GetType());
3043   IfCondition if_cond = instruction->GetCondition();
3044 
3045   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3046     GenerateFcmp(instruction);
3047     __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
3048   } else {
3049     // Integer cases.
3050     Register lhs = InputRegisterAt(instruction, 0);
3051     Operand rhs = InputOperandAt(instruction, 1);
3052     __ Cmp(lhs, rhs);
3053     __ Cset(res, ARM64Condition(if_cond));
3054   }
3055 }
3056 
3057 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
3058   M(Equal)                                                                               \
3059   M(NotEqual)                                                                            \
3060   M(LessThan)                                                                            \
3061   M(LessThanOrEqual)                                                                     \
3062   M(GreaterThan)                                                                         \
3063   M(GreaterThanOrEqual)                                                                  \
3064   M(Below)                                                                               \
3065   M(BelowOrEqual)                                                                        \
3066   M(Above)                                                                               \
3067   M(AboveOrEqual)
3068 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
3069 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
3070 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)3071 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3072 #undef DEFINE_CONDITION_VISITORS
3073 #undef FOR_EACH_CONDITION_INSTRUCTION
3074 
3075 void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) {
3076   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3077   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3078   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
3079 
3080   Register out = OutputRegister(instruction);
3081   Register dividend = InputRegisterAt(instruction, 0);
3082 
3083   Register final_dividend;
3084   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
3085     // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
3086     // NOTE: The generated code for HDiv correctly works for the INT32_MIN/INT64_MIN dividends:
3087     //   imm == 2
3088     //     add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
3089     //     asr out, out(0x80000001), #1 => out = 0xc0000000
3090     //     This is the same as 'asr out, 0x80000000, #1'
3091     //
3092     //   imm > 2
3093     //     add temp, dividend(0x80000000), imm - 1 => temp = 0b10..01..1, where the number
3094     //         of the rightmost 1s is ctz_imm.
3095     //     cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3096     //     csel out, temp(0b10..01..1), dividend(0x80000000), lt => out = 0b10..01..1
3097     //     asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
3098     //         leftmost 1s is ctz_imm + 1.
3099     //     This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
3100     //
3101     //   imm == INT32_MIN
3102     //     add tmp, dividend(0x80000000), #0x7fffffff => tmp = -1
3103     //     cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3104     //     csel out, temp(-1), dividend(0x80000000), lt => out = -1
3105     //     neg out, out(-1), asr #31 => out = 1
3106     //     This is the same as 'neg out, dividend(0x80000000), asr #31'.
3107     final_dividend = dividend;
3108   } else {
3109     if (abs_imm == 2) {
3110       int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte;
3111       __ Add(out, dividend, Operand(dividend, LSR, bits - 1));
3112     } else {
3113       UseScratchRegisterScope temps(GetVIXLAssembler());
3114       Register temp = temps.AcquireSameSizeAs(out);
3115       __ Add(temp, dividend, abs_imm - 1);
3116       __ Cmp(dividend, 0);
3117       __ Csel(out, temp, dividend, lt);
3118     }
3119     final_dividend = out;
3120   }
3121 
3122   int ctz_imm = CTZ(abs_imm);
3123   if (imm > 0) {
3124     __ Asr(out, final_dividend, ctz_imm);
3125   } else {
3126     __ Neg(out, Operand(final_dividend, ASR, ctz_imm));
3127   }
3128 }
3129 
3130 // Return true if the magic number was modified by subtracting 2^32(Int32 div) or 2^64(Int64 div).
3131 // So dividend needs to be added.
NeedToAddDividend(int64_t magic_number,int64_t divisor)3132 static inline bool NeedToAddDividend(int64_t magic_number, int64_t divisor) {
3133   return divisor > 0 && magic_number < 0;
3134 }
3135 
3136 // Return true if the magic number was modified by adding 2^32(Int32 div) or 2^64(Int64 div).
3137 // So dividend needs to be subtracted.
NeedToSubDividend(int64_t magic_number,int64_t divisor)3138 static inline bool NeedToSubDividend(int64_t magic_number, int64_t divisor) {
3139   return divisor < 0 && magic_number > 0;
3140 }
3141 
3142 // Generate code which increments the value in register 'in' by 1 if the value is negative.
3143 // It is done with 'add out, in, in, lsr #31 or #63'.
3144 // If the value is a result of an operation setting the N flag, CINC MI can be used
3145 // instead of ADD. 'use_cond_inc' controls this.
GenerateIncrementNegativeByOne(Register out,Register in,bool use_cond_inc)3146 void InstructionCodeGeneratorARM64::GenerateIncrementNegativeByOne(
3147     Register out,
3148     Register in,
3149     bool use_cond_inc) {
3150   if (use_cond_inc) {
3151     __ Cinc(out, in, mi);
3152   } else {
3153     __ Add(out, in, Operand(in, LSR, in.GetSizeInBits() - 1));
3154   }
3155 }
3156 
3157 // Helper to generate code producing the result of HRem with a constant divisor.
GenerateResultRemWithAnyConstant(Register out,Register dividend,Register quotient,int64_t divisor,UseScratchRegisterScope * temps_scope)3158 void InstructionCodeGeneratorARM64::GenerateResultRemWithAnyConstant(
3159     Register out,
3160     Register dividend,
3161     Register quotient,
3162     int64_t divisor,
3163     UseScratchRegisterScope* temps_scope) {
3164   Register temp_imm = temps_scope->AcquireSameSizeAs(out);
3165   __ Mov(temp_imm, divisor);
3166   __ Msub(out, quotient, temp_imm, dividend);
3167 }
3168 
3169 // Helper to generate code for HDiv/HRem instructions when a dividend is non-negative and
3170 // a divisor is a positive constant, not power of 2.
GenerateInt64UnsignedDivRemWithAnyPositiveConstant(HBinaryOperation * instruction)3171 void InstructionCodeGeneratorARM64::GenerateInt64UnsignedDivRemWithAnyPositiveConstant(
3172     HBinaryOperation* instruction) {
3173   DCHECK(instruction->IsDiv() || instruction->IsRem());
3174   DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
3175 
3176   LocationSummary* locations = instruction->GetLocations();
3177   Location second = locations->InAt(1);
3178   DCHECK(second.IsConstant());
3179 
3180   Register out = OutputRegister(instruction);
3181   Register dividend = InputRegisterAt(instruction, 0);
3182   int64_t imm = Int64FromConstant(second.GetConstant());
3183   DCHECK_GT(imm, 0);
3184 
3185   int64_t magic;
3186   int shift;
3187   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
3188 
3189   UseScratchRegisterScope temps(GetVIXLAssembler());
3190   Register temp = temps.AcquireSameSizeAs(out);
3191 
3192   auto generate_unsigned_div_code = [this, magic, shift](Register out,
3193                                                          Register dividend,
3194                                                          Register temp) {
3195     // temp = get_high(dividend * magic)
3196     __ Mov(temp, magic);
3197     if (magic > 0 && shift == 0) {
3198       __ Smulh(out, dividend, temp);
3199     } else {
3200       __ Smulh(temp, dividend, temp);
3201       if (magic < 0) {
3202         // The negative magic means that the multiplier m is greater than INT64_MAX.
3203         // In such a case shift is never 0. See the proof in
3204         // InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant.
3205         __ Add(temp, temp, dividend);
3206       }
3207       DCHECK_NE(shift, 0);
3208       __ Lsr(out, temp, shift);
3209     }
3210   };
3211 
3212   if (instruction->IsDiv()) {
3213     generate_unsigned_div_code(out, dividend, temp);
3214   } else {
3215     generate_unsigned_div_code(temp, dividend, temp);
3216     GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3217   }
3218 }
3219 
3220 // Helper to generate code for HDiv/HRem instructions for any dividend and a constant divisor
3221 // (not power of 2).
GenerateInt64DivRemWithAnyConstant(HBinaryOperation * instruction)3222 void InstructionCodeGeneratorARM64::GenerateInt64DivRemWithAnyConstant(
3223     HBinaryOperation* instruction) {
3224   DCHECK(instruction->IsDiv() || instruction->IsRem());
3225   DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
3226 
3227   LocationSummary* locations = instruction->GetLocations();
3228   Location second = locations->InAt(1);
3229   DCHECK(second.IsConstant());
3230 
3231   Register out = OutputRegister(instruction);
3232   Register dividend = InputRegisterAt(instruction, 0);
3233   int64_t imm = Int64FromConstant(second.GetConstant());
3234 
3235   int64_t magic;
3236   int shift;
3237   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
3238 
3239   UseScratchRegisterScope temps(GetVIXLAssembler());
3240   Register temp = temps.AcquireSameSizeAs(out);
3241 
3242   // temp = get_high(dividend * magic)
3243   __ Mov(temp, magic);
3244   __ Smulh(temp, dividend, temp);
3245 
3246   // The multiplication result might need some corrections to be finalized.
3247   // The last correction is to increment by 1, if the result is negative.
3248   // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3249   // Such ADD usually has latency 2, e.g. on Cortex-A55.
3250   // However if one of the corrections is ADD or SUB, the sign can be detected
3251   // with ADDS/SUBS. They set the N flag if the result is negative.
3252   // This allows to use CINC MI which has latency 1.
3253   bool use_cond_inc = false;
3254 
3255   // Some combinations of magic_number and the divisor require to correct the result.
3256   // Check whether the correction is needed.
3257   if (NeedToAddDividend(magic, imm)) {
3258     __ Adds(temp, temp, dividend);
3259     use_cond_inc = true;
3260   } else if (NeedToSubDividend(magic, imm)) {
3261     __ Subs(temp, temp, dividend);
3262     use_cond_inc = true;
3263   }
3264 
3265   if (shift != 0) {
3266     __ Asr(temp, temp, shift);
3267   }
3268 
3269   if (instruction->IsRem()) {
3270     GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3271     GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3272   } else {
3273     GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3274   }
3275 }
3276 
GenerateInt32DivRemWithAnyConstant(HBinaryOperation * instruction)3277 void InstructionCodeGeneratorARM64::GenerateInt32DivRemWithAnyConstant(
3278     HBinaryOperation* instruction) {
3279   DCHECK(instruction->IsDiv() || instruction->IsRem());
3280   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
3281 
3282   LocationSummary* locations = instruction->GetLocations();
3283   Location second = locations->InAt(1);
3284   DCHECK(second.IsConstant());
3285 
3286   Register out = OutputRegister(instruction);
3287   Register dividend = InputRegisterAt(instruction, 0);
3288   int64_t imm = Int64FromConstant(second.GetConstant());
3289 
3290   int64_t magic;
3291   int shift;
3292   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3293   UseScratchRegisterScope temps(GetVIXLAssembler());
3294   Register temp = temps.AcquireSameSizeAs(out);
3295 
3296   // temp = get_high(dividend * magic)
3297   __ Mov(temp, magic);
3298   __ Smull(temp.X(), dividend, temp);
3299 
3300   // The multiplication result might need some corrections to be finalized.
3301   // The last correction is to increment by 1, if the result is negative.
3302   // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3303   // Such ADD usually has latency 2, e.g. on Cortex-A55.
3304   // However if one of the corrections is ADD or SUB, the sign can be detected
3305   // with ADDS/SUBS. They set the N flag if the result is negative.
3306   // This allows to use CINC MI which has latency 1.
3307   bool use_cond_inc = false;
3308 
3309   // ADD/SUB correction is performed in the high 32 bits
3310   // as high 32 bits are ignored because type are kInt32.
3311   if (NeedToAddDividend(magic, imm)) {
3312     __ Adds(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3313     use_cond_inc = true;
3314   } else if (NeedToSubDividend(magic, imm)) {
3315     __ Subs(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3316     use_cond_inc = true;
3317   }
3318 
3319   // Extract the result from the high 32 bits and apply the final right shift.
3320   DCHECK_LT(shift, 32);
3321   if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
3322     // No need to adjust the result for a non-negative dividend and a positive divisor.
3323     if (instruction->IsDiv()) {
3324       __ Lsr(out.X(), temp.X(), 32 + shift);
3325     } else {
3326       __ Lsr(temp.X(), temp.X(), 32 + shift);
3327       GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3328     }
3329   } else {
3330     __ Asr(temp.X(), temp.X(), 32 + shift);
3331 
3332     if (instruction->IsRem()) {
3333       GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3334       GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3335     } else {
3336       GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3337     }
3338   }
3339 }
3340 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction,int64_t divisor)3341 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction,
3342                                                                   int64_t divisor) {
3343   DCHECK(instruction->IsDiv() || instruction->IsRem());
3344   if (instruction->GetResultType() == DataType::Type::kInt64) {
3345     if (divisor > 0 && HasNonNegativeInputAt(instruction, 0)) {
3346       GenerateInt64UnsignedDivRemWithAnyPositiveConstant(instruction);
3347     } else {
3348       GenerateInt64DivRemWithAnyConstant(instruction);
3349     }
3350   } else {
3351     GenerateInt32DivRemWithAnyConstant(instruction);
3352   }
3353 }
3354 
GenerateIntDivForConstDenom(HDiv * instruction)3355 void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) {
3356   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3357 
3358   if (imm == 0) {
3359     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3360     return;
3361   }
3362 
3363   if (IsPowerOfTwo(AbsOrMin(imm))) {
3364     GenerateIntDivForPower2Denom(instruction);
3365   } else {
3366     // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier.
3367     DCHECK(imm < -2 || imm > 2) << imm;
3368     GenerateDivRemWithAnyConstant(instruction, imm);
3369   }
3370 }
3371 
GenerateIntDiv(HDiv * instruction)3372 void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) {
3373   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
3374        << instruction->GetResultType();
3375 
3376   if (instruction->GetLocations()->InAt(1).IsConstant()) {
3377     GenerateIntDivForConstDenom(instruction);
3378   } else {
3379     Register out = OutputRegister(instruction);
3380     Register dividend = InputRegisterAt(instruction, 0);
3381     Register divisor = InputRegisterAt(instruction, 1);
3382     __ Sdiv(out, dividend, divisor);
3383   }
3384 }
3385 
VisitDiv(HDiv * div)3386 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3387   LocationSummary* locations =
3388       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3389   switch (div->GetResultType()) {
3390     case DataType::Type::kInt32:
3391     case DataType::Type::kInt64:
3392       locations->SetInAt(0, Location::RequiresRegister());
3393       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3394       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3395       break;
3396 
3397     case DataType::Type::kFloat32:
3398     case DataType::Type::kFloat64:
3399       locations->SetInAt(0, Location::RequiresFpuRegister());
3400       locations->SetInAt(1, Location::RequiresFpuRegister());
3401       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3402       break;
3403 
3404     default:
3405       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3406   }
3407 }
3408 
VisitDiv(HDiv * div)3409 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3410   DataType::Type type = div->GetResultType();
3411   switch (type) {
3412     case DataType::Type::kInt32:
3413     case DataType::Type::kInt64:
3414       GenerateIntDiv(div);
3415       break;
3416 
3417     case DataType::Type::kFloat32:
3418     case DataType::Type::kFloat64:
3419       __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3420       break;
3421 
3422     default:
3423       LOG(FATAL) << "Unexpected div type " << type;
3424   }
3425 }
3426 
VisitDivZeroCheck(HDivZeroCheck * instruction)3427 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3428   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3429   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3430 }
3431 
VisitDivZeroCheck(HDivZeroCheck * instruction)3432 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3433   SlowPathCodeARM64* slow_path =
3434       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction);
3435   codegen_->AddSlowPath(slow_path);
3436   Location value = instruction->GetLocations()->InAt(0);
3437 
3438   DataType::Type type = instruction->GetType();
3439 
3440   if (!DataType::IsIntegralType(type)) {
3441     LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3442     UNREACHABLE();
3443   }
3444 
3445   if (value.IsConstant()) {
3446     int64_t divisor = Int64FromLocation(value);
3447     if (divisor == 0) {
3448       __ B(slow_path->GetEntryLabel());
3449     } else {
3450       // A division by a non-null constant is valid. We don't need to perform
3451       // any check, so simply fall through.
3452     }
3453   } else {
3454     __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3455   }
3456 }
3457 
VisitDoubleConstant(HDoubleConstant * constant)3458 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3459   LocationSummary* locations =
3460       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3461   locations->SetOut(Location::ConstantLocation(constant));
3462 }
3463 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)3464 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3465     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3466   // Will be generated at use site.
3467 }
3468 
VisitExit(HExit * exit)3469 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3470   exit->SetLocations(nullptr);
3471 }
3472 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)3473 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
3474 }
3475 
VisitFloatConstant(HFloatConstant * constant)3476 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3477   LocationSummary* locations =
3478       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3479   locations->SetOut(Location::ConstantLocation(constant));
3480 }
3481 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)3482 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
3483   // Will be generated at use site.
3484 }
3485 
HandleGoto(HInstruction * got,HBasicBlock * successor)3486 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3487   if (successor->IsExitBlock()) {
3488     DCHECK(got->GetPrevious()->AlwaysThrows());
3489     return;  // no code needed
3490   }
3491 
3492   HBasicBlock* block = got->GetBlock();
3493   HInstruction* previous = got->GetPrevious();
3494   HLoopInformation* info = block->GetLoopInformation();
3495 
3496   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3497     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
3498     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3499     return;
3500   }
3501   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3502     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3503     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
3504   }
3505   if (!codegen_->GoesToNextBlock(block, successor)) {
3506     __ B(codegen_->GetLabelOf(successor));
3507   }
3508 }
3509 
VisitGoto(HGoto * got)3510 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3511   got->SetLocations(nullptr);
3512 }
3513 
VisitGoto(HGoto * got)3514 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3515   HandleGoto(got, got->GetSuccessor());
3516 }
3517 
VisitTryBoundary(HTryBoundary * try_boundary)3518 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3519   try_boundary->SetLocations(nullptr);
3520 }
3521 
VisitTryBoundary(HTryBoundary * try_boundary)3522 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3523   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3524   if (!successor->IsExitBlock()) {
3525     HandleGoto(try_boundary, successor);
3526   }
3527 }
3528 
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl::aarch64::Label * true_target,vixl::aarch64::Label * false_target)3529 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3530                                                           size_t condition_input_index,
3531                                                           vixl::aarch64::Label* true_target,
3532                                                           vixl::aarch64::Label* false_target) {
3533   HInstruction* cond = instruction->InputAt(condition_input_index);
3534 
3535   if (true_target == nullptr && false_target == nullptr) {
3536     // Nothing to do. The code always falls through.
3537     return;
3538   } else if (cond->IsIntConstant()) {
3539     // Constant condition, statically compared against "true" (integer value 1).
3540     if (cond->AsIntConstant()->IsTrue()) {
3541       if (true_target != nullptr) {
3542         __ B(true_target);
3543       }
3544     } else {
3545       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3546       if (false_target != nullptr) {
3547         __ B(false_target);
3548       }
3549     }
3550     return;
3551   }
3552 
3553   // The following code generates these patterns:
3554   //  (1) true_target == nullptr && false_target != nullptr
3555   //        - opposite condition true => branch to false_target
3556   //  (2) true_target != nullptr && false_target == nullptr
3557   //        - condition true => branch to true_target
3558   //  (3) true_target != nullptr && false_target != nullptr
3559   //        - condition true => branch to true_target
3560   //        - branch to false_target
3561   if (IsBooleanValueOrMaterializedCondition(cond)) {
3562     // The condition instruction has been materialized, compare the output to 0.
3563     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3564     DCHECK(cond_val.IsRegister());
3565       if (true_target == nullptr) {
3566       __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3567     } else {
3568       __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3569     }
3570   } else {
3571     // The condition instruction has not been materialized, use its inputs as
3572     // the comparison and its condition as the branch condition.
3573     HCondition* condition = cond->AsCondition();
3574 
3575     DataType::Type type = condition->InputAt(0)->GetType();
3576     if (DataType::IsFloatingPointType(type)) {
3577       GenerateFcmp(condition);
3578       if (true_target == nullptr) {
3579         IfCondition opposite_condition = condition->GetOppositeCondition();
3580         __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3581       } else {
3582         __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3583       }
3584     } else {
3585       // Integer cases.
3586       Register lhs = InputRegisterAt(condition, 0);
3587       Operand rhs = InputOperandAt(condition, 1);
3588 
3589       Condition arm64_cond;
3590       vixl::aarch64::Label* non_fallthrough_target;
3591       if (true_target == nullptr) {
3592         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3593         non_fallthrough_target = false_target;
3594       } else {
3595         arm64_cond = ARM64Condition(condition->GetCondition());
3596         non_fallthrough_target = true_target;
3597       }
3598 
3599       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3600           rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3601         switch (arm64_cond) {
3602           case eq:
3603             __ Cbz(lhs, non_fallthrough_target);
3604             break;
3605           case ne:
3606             __ Cbnz(lhs, non_fallthrough_target);
3607             break;
3608           case lt:
3609             // Test the sign bit and branch accordingly.
3610             __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3611             break;
3612           case ge:
3613             // Test the sign bit and branch accordingly.
3614             __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3615             break;
3616           default:
3617             // Without the `static_cast` the compiler throws an error for
3618             // `-Werror=sign-promo`.
3619             LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3620         }
3621       } else {
3622         __ Cmp(lhs, rhs);
3623         __ B(arm64_cond, non_fallthrough_target);
3624       }
3625     }
3626   }
3627 
3628   // If neither branch falls through (case 3), the conditional branch to `true_target`
3629   // was already emitted (case 2) and we need to emit a jump to `false_target`.
3630   if (true_target != nullptr && false_target != nullptr) {
3631     __ B(false_target);
3632   }
3633 }
3634 
VisitIf(HIf * if_instr)3635 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3636   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3637   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3638     locations->SetInAt(0, Location::RequiresRegister());
3639   }
3640 }
3641 
VisitIf(HIf * if_instr)3642 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3643   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3644   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3645   vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3646   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3647     true_target = nullptr;
3648   }
3649   vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3650   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3651     false_target = nullptr;
3652   }
3653   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3654 }
3655 
VisitDeoptimize(HDeoptimize * deoptimize)3656 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3657   LocationSummary* locations = new (GetGraph()->GetAllocator())
3658       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3659   InvokeRuntimeCallingConvention calling_convention;
3660   RegisterSet caller_saves = RegisterSet::Empty();
3661   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3662   locations->SetCustomSlowPathCallerSaves(caller_saves);
3663   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3664     locations->SetInAt(0, Location::RequiresRegister());
3665   }
3666 }
3667 
VisitDeoptimize(HDeoptimize * deoptimize)3668 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3669   SlowPathCodeARM64* slow_path =
3670       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3671   GenerateTestAndBranch(deoptimize,
3672                         /* condition_input_index= */ 0,
3673                         slow_path->GetEntryLabel(),
3674                         /* false_target= */ nullptr);
3675 }
3676 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3677 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3678   LocationSummary* locations = new (GetGraph()->GetAllocator())
3679       LocationSummary(flag, LocationSummary::kNoCall);
3680   locations->SetOut(Location::RequiresRegister());
3681 }
3682 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3683 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3684   __ Ldr(OutputRegister(flag),
3685          MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3686 }
3687 
IsConditionOnFloatingPointValues(HInstruction * condition)3688 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3689   return condition->IsCondition() &&
3690          DataType::IsFloatingPointType(condition->InputAt(0)->GetType());
3691 }
3692 
GetConditionForSelect(HCondition * condition)3693 static inline Condition GetConditionForSelect(HCondition* condition) {
3694   IfCondition cond = condition->AsCondition()->GetCondition();
3695   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3696                                                      : ARM64Condition(cond);
3697 }
3698 
VisitSelect(HSelect * select)3699 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
3700   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3701   if (DataType::IsFloatingPointType(select->GetType())) {
3702     locations->SetInAt(0, Location::RequiresFpuRegister());
3703     locations->SetInAt(1, Location::RequiresFpuRegister());
3704     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3705   } else {
3706     HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
3707     HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
3708     bool is_true_value_constant = cst_true_value != nullptr;
3709     bool is_false_value_constant = cst_false_value != nullptr;
3710     // Ask VIXL whether we should synthesize constants in registers.
3711     // We give an arbitrary register to VIXL when dealing with non-constant inputs.
3712     Operand true_op = is_true_value_constant ?
3713         Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
3714     Operand false_op = is_false_value_constant ?
3715         Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
3716     bool true_value_in_register = false;
3717     bool false_value_in_register = false;
3718     MacroAssembler::GetCselSynthesisInformation(
3719         x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
3720     true_value_in_register |= !is_true_value_constant;
3721     false_value_in_register |= !is_false_value_constant;
3722 
3723     locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
3724                                                  : Location::ConstantLocation(cst_true_value));
3725     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
3726                                                   : Location::ConstantLocation(cst_false_value));
3727     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3728   }
3729 
3730   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3731     locations->SetInAt(2, Location::RequiresRegister());
3732   }
3733 }
3734 
VisitSelect(HSelect * select)3735 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
3736   HInstruction* cond = select->GetCondition();
3737   Condition csel_cond;
3738 
3739   if (IsBooleanValueOrMaterializedCondition(cond)) {
3740     if (cond->IsCondition() && cond->GetNext() == select) {
3741       // Use the condition flags set by the previous instruction.
3742       csel_cond = GetConditionForSelect(cond->AsCondition());
3743     } else {
3744       __ Cmp(InputRegisterAt(select, 2), 0);
3745       csel_cond = ne;
3746     }
3747   } else if (IsConditionOnFloatingPointValues(cond)) {
3748     GenerateFcmp(cond);
3749     csel_cond = GetConditionForSelect(cond->AsCondition());
3750   } else {
3751     __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
3752     csel_cond = GetConditionForSelect(cond->AsCondition());
3753   }
3754 
3755   if (DataType::IsFloatingPointType(select->GetType())) {
3756     __ Fcsel(OutputFPRegister(select),
3757              InputFPRegisterAt(select, 1),
3758              InputFPRegisterAt(select, 0),
3759              csel_cond);
3760   } else {
3761     __ Csel(OutputRegister(select),
3762             InputOperandAt(select, 1),
3763             InputOperandAt(select, 0),
3764             csel_cond);
3765   }
3766 }
3767 
VisitNativeDebugInfo(HNativeDebugInfo * info)3768 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
3769   new (GetGraph()->GetAllocator()) LocationSummary(info);
3770 }
3771 
VisitNativeDebugInfo(HNativeDebugInfo *)3772 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
3773   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
3774 }
3775 
IncreaseFrame(size_t adjustment)3776 void CodeGeneratorARM64::IncreaseFrame(size_t adjustment) {
3777   __ Claim(adjustment);
3778   GetAssembler()->cfi().AdjustCFAOffset(adjustment);
3779 }
3780 
DecreaseFrame(size_t adjustment)3781 void CodeGeneratorARM64::DecreaseFrame(size_t adjustment) {
3782   __ Drop(adjustment);
3783   GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
3784 }
3785 
GenerateNop()3786 void CodeGeneratorARM64::GenerateNop() {
3787   __ Nop();
3788 }
3789 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)3790 void LocationsBuilderARM64::VisitPredicatedInstanceFieldGet(
3791     HPredicatedInstanceFieldGet* instruction) {
3792   HandleFieldGet(instruction, instruction->GetFieldInfo());
3793 }
3794 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3795 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3796   HandleFieldGet(instruction, instruction->GetFieldInfo());
3797 }
3798 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)3799 void InstructionCodeGeneratorARM64::VisitPredicatedInstanceFieldGet(
3800     HPredicatedInstanceFieldGet* instruction) {
3801   vixl::aarch64::Label finish;
3802   __ Cbz(InputRegisterAt(instruction, 1), &finish);
3803   HandleFieldGet(instruction, instruction->GetFieldInfo());
3804   __ Bind(&finish);
3805 }
3806 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3807 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3808   HandleFieldGet(instruction, instruction->GetFieldInfo());
3809 }
3810 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3811 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3812   HandleFieldSet(instruction);
3813 }
3814 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3815 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3816   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
3817 }
3818 
3819 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)3820 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
3821   if (kEmitCompilerReadBarrier &&
3822       (kUseBakerReadBarrier ||
3823           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
3824           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
3825           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
3826     return 1;
3827   }
3828   return 0;
3829 }
3830 
3831 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
3832 // interface pointer, one for loading the current interface.
3833 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)3834 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
3835   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
3836     return 3;
3837   }
3838   return 1 + NumberOfInstanceOfTemps(type_check_kind);
3839 }
3840 
VisitInstanceOf(HInstanceOf * instruction)3841 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
3842   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
3843   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3844   bool baker_read_barrier_slow_path = false;
3845   switch (type_check_kind) {
3846     case TypeCheckKind::kExactCheck:
3847     case TypeCheckKind::kAbstractClassCheck:
3848     case TypeCheckKind::kClassHierarchyCheck:
3849     case TypeCheckKind::kArrayObjectCheck: {
3850       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
3851       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
3852       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
3853       break;
3854     }
3855     case TypeCheckKind::kArrayCheck:
3856     case TypeCheckKind::kUnresolvedCheck:
3857     case TypeCheckKind::kInterfaceCheck:
3858       call_kind = LocationSummary::kCallOnSlowPath;
3859       break;
3860     case TypeCheckKind::kBitstringCheck:
3861       break;
3862   }
3863 
3864   LocationSummary* locations =
3865       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
3866   if (baker_read_barrier_slow_path) {
3867     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3868   }
3869   locations->SetInAt(0, Location::RequiresRegister());
3870   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
3871     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
3872     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
3873     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
3874   } else {
3875     locations->SetInAt(1, Location::RequiresRegister());
3876   }
3877   // The "out" register is used as a temporary, so it overlaps with the inputs.
3878   // Note that TypeCheckSlowPathARM64 uses this register too.
3879   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3880   // Add temps if necessary for read barriers.
3881   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
3882 }
3883 
VisitInstanceOf(HInstanceOf * instruction)3884 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
3885   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3886   LocationSummary* locations = instruction->GetLocations();
3887   Location obj_loc = locations->InAt(0);
3888   Register obj = InputRegisterAt(instruction, 0);
3889   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
3890       ? Register()
3891       : InputRegisterAt(instruction, 1);
3892   Location out_loc = locations->Out();
3893   Register out = OutputRegister(instruction);
3894   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
3895   DCHECK_LE(num_temps, 1u);
3896   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
3897   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3898   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3899   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3900   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3901 
3902   vixl::aarch64::Label done, zero;
3903   SlowPathCodeARM64* slow_path = nullptr;
3904 
3905   // Return 0 if `obj` is null.
3906   // Avoid null check if we know `obj` is not null.
3907   if (instruction->MustDoNullCheck()) {
3908     __ Cbz(obj, &zero);
3909   }
3910 
3911   switch (type_check_kind) {
3912     case TypeCheckKind::kExactCheck: {
3913       ReadBarrierOption read_barrier_option =
3914           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3915       // /* HeapReference<Class> */ out = obj->klass_
3916       GenerateReferenceLoadTwoRegisters(instruction,
3917                                         out_loc,
3918                                         obj_loc,
3919                                         class_offset,
3920                                         maybe_temp_loc,
3921                                         read_barrier_option);
3922       __ Cmp(out, cls);
3923       __ Cset(out, eq);
3924       if (zero.IsLinked()) {
3925         __ B(&done);
3926       }
3927       break;
3928     }
3929 
3930     case TypeCheckKind::kAbstractClassCheck: {
3931       ReadBarrierOption read_barrier_option =
3932           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3933       // /* HeapReference<Class> */ out = obj->klass_
3934       GenerateReferenceLoadTwoRegisters(instruction,
3935                                         out_loc,
3936                                         obj_loc,
3937                                         class_offset,
3938                                         maybe_temp_loc,
3939                                         read_barrier_option);
3940       // If the class is abstract, we eagerly fetch the super class of the
3941       // object to avoid doing a comparison we know will fail.
3942       vixl::aarch64::Label loop, success;
3943       __ Bind(&loop);
3944       // /* HeapReference<Class> */ out = out->super_class_
3945       GenerateReferenceLoadOneRegister(instruction,
3946                                        out_loc,
3947                                        super_offset,
3948                                        maybe_temp_loc,
3949                                        read_barrier_option);
3950       // If `out` is null, we use it for the result, and jump to `done`.
3951       __ Cbz(out, &done);
3952       __ Cmp(out, cls);
3953       __ B(ne, &loop);
3954       __ Mov(out, 1);
3955       if (zero.IsLinked()) {
3956         __ B(&done);
3957       }
3958       break;
3959     }
3960 
3961     case TypeCheckKind::kClassHierarchyCheck: {
3962       ReadBarrierOption read_barrier_option =
3963           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3964       // /* HeapReference<Class> */ out = obj->klass_
3965       GenerateReferenceLoadTwoRegisters(instruction,
3966                                         out_loc,
3967                                         obj_loc,
3968                                         class_offset,
3969                                         maybe_temp_loc,
3970                                         read_barrier_option);
3971       // Walk over the class hierarchy to find a match.
3972       vixl::aarch64::Label loop, success;
3973       __ Bind(&loop);
3974       __ Cmp(out, cls);
3975       __ B(eq, &success);
3976       // /* HeapReference<Class> */ out = out->super_class_
3977       GenerateReferenceLoadOneRegister(instruction,
3978                                        out_loc,
3979                                        super_offset,
3980                                        maybe_temp_loc,
3981                                        read_barrier_option);
3982       __ Cbnz(out, &loop);
3983       // If `out` is null, we use it for the result, and jump to `done`.
3984       __ B(&done);
3985       __ Bind(&success);
3986       __ Mov(out, 1);
3987       if (zero.IsLinked()) {
3988         __ B(&done);
3989       }
3990       break;
3991     }
3992 
3993     case TypeCheckKind::kArrayObjectCheck: {
3994       ReadBarrierOption read_barrier_option =
3995           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3996       // /* HeapReference<Class> */ out = obj->klass_
3997       GenerateReferenceLoadTwoRegisters(instruction,
3998                                         out_loc,
3999                                         obj_loc,
4000                                         class_offset,
4001                                         maybe_temp_loc,
4002                                         read_barrier_option);
4003       // Do an exact check.
4004       vixl::aarch64::Label exact_check;
4005       __ Cmp(out, cls);
4006       __ B(eq, &exact_check);
4007       // Otherwise, we need to check that the object's class is a non-primitive array.
4008       // /* HeapReference<Class> */ out = out->component_type_
4009       GenerateReferenceLoadOneRegister(instruction,
4010                                        out_loc,
4011                                        component_offset,
4012                                        maybe_temp_loc,
4013                                        read_barrier_option);
4014       // If `out` is null, we use it for the result, and jump to `done`.
4015       __ Cbz(out, &done);
4016       __ Ldrh(out, HeapOperand(out, primitive_offset));
4017       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4018       __ Cbnz(out, &zero);
4019       __ Bind(&exact_check);
4020       __ Mov(out, 1);
4021       __ B(&done);
4022       break;
4023     }
4024 
4025     case TypeCheckKind::kArrayCheck: {
4026       // No read barrier since the slow path will retry upon failure.
4027       // /* HeapReference<Class> */ out = obj->klass_
4028       GenerateReferenceLoadTwoRegisters(instruction,
4029                                         out_loc,
4030                                         obj_loc,
4031                                         class_offset,
4032                                         maybe_temp_loc,
4033                                         kWithoutReadBarrier);
4034       __ Cmp(out, cls);
4035       DCHECK(locations->OnlyCallsOnSlowPath());
4036       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4037           instruction, /* is_fatal= */ false);
4038       codegen_->AddSlowPath(slow_path);
4039       __ B(ne, slow_path->GetEntryLabel());
4040       __ Mov(out, 1);
4041       if (zero.IsLinked()) {
4042         __ B(&done);
4043       }
4044       break;
4045     }
4046 
4047     case TypeCheckKind::kUnresolvedCheck:
4048     case TypeCheckKind::kInterfaceCheck: {
4049       // Note that we indeed only call on slow path, but we always go
4050       // into the slow path for the unresolved and interface check
4051       // cases.
4052       //
4053       // We cannot directly call the InstanceofNonTrivial runtime
4054       // entry point without resorting to a type checking slow path
4055       // here (i.e. by calling InvokeRuntime directly), as it would
4056       // require to assign fixed registers for the inputs of this
4057       // HInstanceOf instruction (following the runtime calling
4058       // convention), which might be cluttered by the potential first
4059       // read barrier emission at the beginning of this method.
4060       //
4061       // TODO: Introduce a new runtime entry point taking the object
4062       // to test (instead of its class) as argument, and let it deal
4063       // with the read barrier issues. This will let us refactor this
4064       // case of the `switch` code as it was previously (with a direct
4065       // call to the runtime not using a type checking slow path).
4066       // This should also be beneficial for the other cases above.
4067       DCHECK(locations->OnlyCallsOnSlowPath());
4068       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4069           instruction, /* is_fatal= */ false);
4070       codegen_->AddSlowPath(slow_path);
4071       __ B(slow_path->GetEntryLabel());
4072       if (zero.IsLinked()) {
4073         __ B(&done);
4074       }
4075       break;
4076     }
4077 
4078     case TypeCheckKind::kBitstringCheck: {
4079       // /* HeapReference<Class> */ temp = obj->klass_
4080       GenerateReferenceLoadTwoRegisters(instruction,
4081                                         out_loc,
4082                                         obj_loc,
4083                                         class_offset,
4084                                         maybe_temp_loc,
4085                                         kWithoutReadBarrier);
4086 
4087       GenerateBitstringTypeCheckCompare(instruction, out);
4088       __ Cset(out, eq);
4089       if (zero.IsLinked()) {
4090         __ B(&done);
4091       }
4092       break;
4093     }
4094   }
4095 
4096   if (zero.IsLinked()) {
4097     __ Bind(&zero);
4098     __ Mov(out, 0);
4099   }
4100 
4101   if (done.IsLinked()) {
4102     __ Bind(&done);
4103   }
4104 
4105   if (slow_path != nullptr) {
4106     __ Bind(slow_path->GetExitLabel());
4107   }
4108 }
4109 
VisitCheckCast(HCheckCast * instruction)4110 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
4111   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4112   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
4113   LocationSummary* locations =
4114       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4115   locations->SetInAt(0, Location::RequiresRegister());
4116   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
4117     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
4118     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
4119     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
4120   } else {
4121     locations->SetInAt(1, Location::RequiresRegister());
4122   }
4123   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
4124   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
4125 }
4126 
VisitCheckCast(HCheckCast * instruction)4127 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
4128   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4129   LocationSummary* locations = instruction->GetLocations();
4130   Location obj_loc = locations->InAt(0);
4131   Register obj = InputRegisterAt(instruction, 0);
4132   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
4133       ? Register()
4134       : InputRegisterAt(instruction, 1);
4135   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
4136   DCHECK_GE(num_temps, 1u);
4137   DCHECK_LE(num_temps, 3u);
4138   Location temp_loc = locations->GetTemp(0);
4139   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
4140   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
4141   Register temp = WRegisterFrom(temp_loc);
4142   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4143   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4144   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4145   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4146   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4147   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4148   const uint32_t object_array_data_offset =
4149       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4150 
4151   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
4152   SlowPathCodeARM64* type_check_slow_path =
4153       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4154           instruction, is_type_check_slow_path_fatal);
4155   codegen_->AddSlowPath(type_check_slow_path);
4156 
4157   vixl::aarch64::Label done;
4158   // Avoid null check if we know obj is not null.
4159   if (instruction->MustDoNullCheck()) {
4160     __ Cbz(obj, &done);
4161   }
4162 
4163   switch (type_check_kind) {
4164     case TypeCheckKind::kExactCheck:
4165     case TypeCheckKind::kArrayCheck: {
4166       // /* HeapReference<Class> */ temp = obj->klass_
4167       GenerateReferenceLoadTwoRegisters(instruction,
4168                                         temp_loc,
4169                                         obj_loc,
4170                                         class_offset,
4171                                         maybe_temp2_loc,
4172                                         kWithoutReadBarrier);
4173 
4174       __ Cmp(temp, cls);
4175       // Jump to slow path for throwing the exception or doing a
4176       // more involved array check.
4177       __ B(ne, type_check_slow_path->GetEntryLabel());
4178       break;
4179     }
4180 
4181     case TypeCheckKind::kAbstractClassCheck: {
4182       // /* HeapReference<Class> */ temp = obj->klass_
4183       GenerateReferenceLoadTwoRegisters(instruction,
4184                                         temp_loc,
4185                                         obj_loc,
4186                                         class_offset,
4187                                         maybe_temp2_loc,
4188                                         kWithoutReadBarrier);
4189 
4190       // If the class is abstract, we eagerly fetch the super class of the
4191       // object to avoid doing a comparison we know will fail.
4192       vixl::aarch64::Label loop;
4193       __ Bind(&loop);
4194       // /* HeapReference<Class> */ temp = temp->super_class_
4195       GenerateReferenceLoadOneRegister(instruction,
4196                                        temp_loc,
4197                                        super_offset,
4198                                        maybe_temp2_loc,
4199                                        kWithoutReadBarrier);
4200 
4201       // If the class reference currently in `temp` is null, jump to the slow path to throw the
4202       // exception.
4203       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4204       // Otherwise, compare classes.
4205       __ Cmp(temp, cls);
4206       __ B(ne, &loop);
4207       break;
4208     }
4209 
4210     case TypeCheckKind::kClassHierarchyCheck: {
4211       // /* HeapReference<Class> */ temp = obj->klass_
4212       GenerateReferenceLoadTwoRegisters(instruction,
4213                                         temp_loc,
4214                                         obj_loc,
4215                                         class_offset,
4216                                         maybe_temp2_loc,
4217                                         kWithoutReadBarrier);
4218 
4219       // Walk over the class hierarchy to find a match.
4220       vixl::aarch64::Label loop;
4221       __ Bind(&loop);
4222       __ Cmp(temp, cls);
4223       __ B(eq, &done);
4224 
4225       // /* HeapReference<Class> */ temp = temp->super_class_
4226       GenerateReferenceLoadOneRegister(instruction,
4227                                        temp_loc,
4228                                        super_offset,
4229                                        maybe_temp2_loc,
4230                                        kWithoutReadBarrier);
4231 
4232       // If the class reference currently in `temp` is not null, jump
4233       // back at the beginning of the loop.
4234       __ Cbnz(temp, &loop);
4235       // Otherwise, jump to the slow path to throw the exception.
4236       __ B(type_check_slow_path->GetEntryLabel());
4237       break;
4238     }
4239 
4240     case TypeCheckKind::kArrayObjectCheck: {
4241       // /* HeapReference<Class> */ temp = obj->klass_
4242       GenerateReferenceLoadTwoRegisters(instruction,
4243                                         temp_loc,
4244                                         obj_loc,
4245                                         class_offset,
4246                                         maybe_temp2_loc,
4247                                         kWithoutReadBarrier);
4248 
4249       // Do an exact check.
4250       __ Cmp(temp, cls);
4251       __ B(eq, &done);
4252 
4253       // Otherwise, we need to check that the object's class is a non-primitive array.
4254       // /* HeapReference<Class> */ temp = temp->component_type_
4255       GenerateReferenceLoadOneRegister(instruction,
4256                                        temp_loc,
4257                                        component_offset,
4258                                        maybe_temp2_loc,
4259                                        kWithoutReadBarrier);
4260 
4261       // If the component type is null, jump to the slow path to throw the exception.
4262       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4263       // Otherwise, the object is indeed an array. Further check that this component type is not a
4264       // primitive type.
4265       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
4266       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4267       __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
4268       break;
4269     }
4270 
4271     case TypeCheckKind::kUnresolvedCheck:
4272       // We always go into the type check slow path for the unresolved check cases.
4273       //
4274       // We cannot directly call the CheckCast runtime entry point
4275       // without resorting to a type checking slow path here (i.e. by
4276       // calling InvokeRuntime directly), as it would require to
4277       // assign fixed registers for the inputs of this HInstanceOf
4278       // instruction (following the runtime calling convention), which
4279       // might be cluttered by the potential first read barrier
4280       // emission at the beginning of this method.
4281       __ B(type_check_slow_path->GetEntryLabel());
4282       break;
4283     case TypeCheckKind::kInterfaceCheck: {
4284       // /* HeapReference<Class> */ temp = obj->klass_
4285       GenerateReferenceLoadTwoRegisters(instruction,
4286                                         temp_loc,
4287                                         obj_loc,
4288                                         class_offset,
4289                                         maybe_temp2_loc,
4290                                         kWithoutReadBarrier);
4291 
4292       // /* HeapReference<Class> */ temp = temp->iftable_
4293       GenerateReferenceLoadTwoRegisters(instruction,
4294                                         temp_loc,
4295                                         temp_loc,
4296                                         iftable_offset,
4297                                         maybe_temp2_loc,
4298                                         kWithoutReadBarrier);
4299       // Iftable is never null.
4300       __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4301       // Loop through the iftable and check if any class matches.
4302       vixl::aarch64::Label start_loop;
4303       __ Bind(&start_loop);
4304       __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4305       __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4306       GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4307       // Go to next interface.
4308       __ Add(temp, temp, 2 * kHeapReferenceSize);
4309       __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4310       // Compare the classes and continue the loop if they do not match.
4311       __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4312       __ B(ne, &start_loop);
4313       break;
4314     }
4315 
4316     case TypeCheckKind::kBitstringCheck: {
4317       // /* HeapReference<Class> */ temp = obj->klass_
4318       GenerateReferenceLoadTwoRegisters(instruction,
4319                                         temp_loc,
4320                                         obj_loc,
4321                                         class_offset,
4322                                         maybe_temp2_loc,
4323                                         kWithoutReadBarrier);
4324 
4325       GenerateBitstringTypeCheckCompare(instruction, temp);
4326       __ B(ne, type_check_slow_path->GetEntryLabel());
4327       break;
4328     }
4329   }
4330   __ Bind(&done);
4331 
4332   __ Bind(type_check_slow_path->GetExitLabel());
4333 }
4334 
VisitIntConstant(HIntConstant * constant)4335 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4336   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4337   locations->SetOut(Location::ConstantLocation(constant));
4338 }
4339 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)4340 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
4341   // Will be generated at use site.
4342 }
4343 
VisitNullConstant(HNullConstant * constant)4344 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4345   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4346   locations->SetOut(Location::ConstantLocation(constant));
4347 }
4348 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)4349 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
4350   // Will be generated at use site.
4351 }
4352 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4353 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4354   // The trampoline uses the same calling convention as dex calling conventions,
4355   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4356   // the method_idx.
4357   HandleInvoke(invoke);
4358 }
4359 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4360 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4361   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4362   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4363 }
4364 
HandleInvoke(HInvoke * invoke)4365 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4366   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4367   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4368 }
4369 
VisitInvokeInterface(HInvokeInterface * invoke)4370 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4371   HandleInvoke(invoke);
4372   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
4373     // We cannot request ip1 as it's blocked by the register allocator.
4374     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
4375   }
4376 }
4377 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)4378 void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
4379                                                        Register klass) {
4380   DCHECK_EQ(klass.GetCode(), 0u);
4381   // We know the destination of an intrinsic, so no need to record inline
4382   // caches.
4383   if (!instruction->GetLocations()->Intrinsified() &&
4384       GetGraph()->IsCompilingBaseline() &&
4385       !Runtime::Current()->IsAotCompiler()) {
4386     DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
4387     ScopedProfilingInfoUse spiu(
4388         Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
4389     ProfilingInfo* info = spiu.GetProfilingInfo();
4390     if (info != nullptr) {
4391       InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
4392       uint64_t address = reinterpret_cast64<uint64_t>(cache);
4393       vixl::aarch64::Label done;
4394       __ Mov(x8, address);
4395       __ Ldr(x9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
4396       // Fast path for a monomorphic cache.
4397       __ Cmp(klass, x9);
4398       __ B(eq, &done);
4399       InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
4400       __ Bind(&done);
4401     }
4402   }
4403 }
4404 
VisitInvokeInterface(HInvokeInterface * invoke)4405 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4406   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4407   LocationSummary* locations = invoke->GetLocations();
4408   Register temp = XRegisterFrom(locations->GetTemp(0));
4409   Location receiver = locations->InAt(0);
4410   Offset class_offset = mirror::Object::ClassOffset();
4411   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4412 
4413   // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4414   if (receiver.IsStackSlot()) {
4415     __ Ldr(temp.W(), StackOperandFrom(receiver));
4416     {
4417       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4418       // /* HeapReference<Class> */ temp = temp->klass_
4419       __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4420       codegen_->MaybeRecordImplicitNullCheck(invoke);
4421     }
4422   } else {
4423     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4424     // /* HeapReference<Class> */ temp = receiver->klass_
4425     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4426     codegen_->MaybeRecordImplicitNullCheck(invoke);
4427   }
4428 
4429   // Instead of simply (possibly) unpoisoning `temp` here, we should
4430   // emit a read barrier for the previous class reference load.
4431   // However this is not required in practice, as this is an
4432   // intermediate/temporary reference and because the current
4433   // concurrent copying collector keeps the from-space memory
4434   // intact/accessible until the end of the marking phase (the
4435   // concurrent copying collector may not in the future).
4436   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4437 
4438   // If we're compiling baseline, update the inline cache.
4439   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
4440 
4441   // The register ip1 is required to be used for the hidden argument in
4442   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4443   MacroAssembler* masm = GetVIXLAssembler();
4444   UseScratchRegisterScope scratch_scope(masm);
4445   scratch_scope.Exclude(ip1);
4446   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
4447     Location interface_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
4448     if (interface_method.IsStackSlot()) {
4449       __ Ldr(ip1, StackOperandFrom(interface_method));
4450     } else {
4451       __ Mov(ip1, XRegisterFrom(interface_method));
4452     }
4453   // If the load kind is through a runtime call, we will pass the method we
4454   // fetch the IMT, which will either be a no-op if we don't hit the conflict
4455   // stub, or will make us always go through the trampoline when there is a
4456   // conflict.
4457   } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
4458     codegen_->LoadMethod(
4459         invoke->GetHiddenArgumentLoadKind(), Location::RegisterLocation(ip1.GetCode()), invoke);
4460   }
4461 
4462   __ Ldr(temp,
4463       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4464   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4465       invoke->GetImtIndex(), kArm64PointerSize));
4466   // temp = temp->GetImtEntryAt(method_offset);
4467   __ Ldr(temp, MemOperand(temp, method_offset));
4468   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
4469     // We pass the method from the IMT in case of a conflict. This will ensure
4470     // we go into the runtime to resolve the actual method.
4471     __ Mov(ip1, temp);
4472   }
4473   // lr = temp->GetEntryPoint();
4474   __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4475 
4476   {
4477     // Ensure the pc position is recorded immediately after the `blr` instruction.
4478     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4479 
4480     // lr();
4481     __ blr(lr);
4482     DCHECK(!codegen_->IsLeafMethod());
4483     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4484   }
4485 
4486   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4487 }
4488 
VisitInvokeVirtual(HInvokeVirtual * invoke)4489 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4490   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4491   if (intrinsic.TryDispatch(invoke)) {
4492     return;
4493   }
4494 
4495   HandleInvoke(invoke);
4496 }
4497 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4498 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4499   // Explicit clinit checks triggered by static invokes must have been pruned by
4500   // art::PrepareForRegisterAllocation.
4501   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4502 
4503   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4504   if (intrinsic.TryDispatch(invoke)) {
4505     return;
4506   }
4507 
4508   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
4509     CriticalNativeCallingConventionVisitorARM64 calling_convention_visitor(
4510         /*for_register_allocation=*/ true);
4511     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4512   } else {
4513     HandleInvoke(invoke);
4514   }
4515 }
4516 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARM64 * codegen)4517 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4518   if (invoke->GetLocations()->Intrinsified()) {
4519     IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4520     intrinsic.Dispatch(invoke);
4521     return true;
4522   }
4523   return false;
4524 }
4525 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)4526 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4527       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4528       ArtMethod* method ATTRIBUTE_UNUSED) {
4529   // On ARM64 we support all dispatch types.
4530   return desired_dispatch_info;
4531 }
4532 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)4533 void CodeGeneratorARM64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
4534   switch (load_kind) {
4535     case MethodLoadKind::kBootImageLinkTimePcRelative: {
4536       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4537       // Add ADRP with its PC-relative method patch.
4538       vixl::aarch64::Label* adrp_label =
4539           NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
4540       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4541       // Add ADD with its PC-relative method patch.
4542       vixl::aarch64::Label* add_label =
4543           NewBootImageMethodPatch(invoke->GetResolvedMethodReference(), adrp_label);
4544       EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
4545       break;
4546     }
4547     case MethodLoadKind::kBootImageRelRo: {
4548       // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
4549       uint32_t boot_image_offset = GetBootImageOffset(invoke);
4550       vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
4551       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4552       // Add LDR with its PC-relative .data.bimg.rel.ro patch.
4553       vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
4554       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
4555       EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp));
4556       break;
4557     }
4558     case MethodLoadKind::kBssEntry: {
4559       // Add ADRP with its PC-relative .bss entry patch.
4560       vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(invoke->GetMethodReference());
4561       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4562       // Add LDR with its PC-relative .bss entry patch.
4563       vixl::aarch64::Label* ldr_label =
4564           NewMethodBssEntryPatch(invoke->GetMethodReference(), adrp_label);
4565       // All aligned loads are implicitly atomic consume operations on ARM64.
4566       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4567       break;
4568     }
4569     case MethodLoadKind::kJitDirectAddress: {
4570       // Load method address from literal pool.
4571       __ Ldr(XRegisterFrom(temp),
4572              DeduplicateUint64Literal(reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
4573       break;
4574     }
4575     case MethodLoadKind::kRuntimeCall: {
4576       // Test situation, don't do anything.
4577       break;
4578     }
4579     default: {
4580       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
4581       UNREACHABLE();
4582     }
4583   }
4584 }
4585 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4586 void CodeGeneratorARM64::GenerateStaticOrDirectCall(
4587     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4588   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4589   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
4590   switch (invoke->GetMethodLoadKind()) {
4591     case MethodLoadKind::kStringInit: {
4592       uint32_t offset =
4593           GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4594       // temp = thread->string_init_entrypoint
4595       __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4596       break;
4597     }
4598     case MethodLoadKind::kRecursive: {
4599       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
4600       break;
4601     }
4602     case MethodLoadKind::kRuntimeCall: {
4603       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4604       return;  // No code pointer retrieval; the runtime performs the call directly.
4605     }
4606     case MethodLoadKind::kBootImageLinkTimePcRelative:
4607       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4608       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
4609         // Do not materialize the method pointer, load directly the entrypoint.
4610         // Add ADRP with its PC-relative JNI entrypoint patch.
4611         vixl::aarch64::Label* adrp_label =
4612             NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference());
4613         EmitAdrpPlaceholder(adrp_label, lr);
4614         // Add the LDR with its PC-relative method patch.
4615         vixl::aarch64::Label* add_label =
4616             NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), adrp_label);
4617         EmitLdrOffsetPlaceholder(add_label, lr, lr);
4618         break;
4619       }
4620       FALLTHROUGH_INTENDED;
4621     default: {
4622       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
4623       break;
4624     }
4625   }
4626 
4627   auto call_lr = [&]() {
4628     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4629     ExactAssemblyScope eas(GetVIXLAssembler(),
4630                            kInstructionSize,
4631                            CodeBufferCheckScope::kExactSize);
4632     // lr()
4633     __ blr(lr);
4634     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4635   };
4636   switch (invoke->GetCodePtrLocation()) {
4637     case CodePtrLocation::kCallSelf:
4638       {
4639         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4640         ExactAssemblyScope eas(GetVIXLAssembler(),
4641                                kInstructionSize,
4642                                CodeBufferCheckScope::kExactSize);
4643         __ bl(&frame_entry_label_);
4644         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4645       }
4646       break;
4647     case CodePtrLocation::kCallCriticalNative: {
4648       size_t out_frame_size =
4649           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
4650                                     kAapcs64StackAlignment,
4651                                     GetCriticalNativeDirectCallFrameSize>(invoke);
4652       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
4653         call_lr();
4654       } else {
4655         // LR = callee_method->ptr_sized_fields_.data_;  // EntryPointFromJni
4656         MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kArm64PointerSize);
4657         __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
4658         // lr()
4659         call_lr();
4660       }
4661       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
4662       switch (invoke->GetType()) {
4663         case DataType::Type::kBool:
4664           __ Ubfx(w0, w0, 0, 8);
4665           break;
4666         case DataType::Type::kInt8:
4667           __ Sbfx(w0, w0, 0, 8);
4668           break;
4669         case DataType::Type::kUint16:
4670           __ Ubfx(w0, w0, 0, 16);
4671           break;
4672         case DataType::Type::kInt16:
4673           __ Sbfx(w0, w0, 0, 16);
4674           break;
4675         case DataType::Type::kInt32:
4676         case DataType::Type::kInt64:
4677         case DataType::Type::kFloat32:
4678         case DataType::Type::kFloat64:
4679         case DataType::Type::kVoid:
4680           break;
4681         default:
4682           DCHECK(false) << invoke->GetType();
4683           break;
4684       }
4685       if (out_frame_size != 0u) {
4686         DecreaseFrame(out_frame_size);
4687       }
4688       break;
4689     }
4690     case CodePtrLocation::kCallArtMethod: {
4691       // LR = callee_method->ptr_sized_fields_.entry_point_from_quick_compiled_code_;
4692       MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4693       __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
4694       // lr()
4695       call_lr();
4696       break;
4697     }
4698   }
4699 
4700   DCHECK(!IsLeafMethod());
4701 }
4702 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)4703 void CodeGeneratorARM64::GenerateVirtualCall(
4704     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
4705   // Use the calling convention instead of the location of the receiver, as
4706   // intrinsics may have put the receiver in a different register. In the intrinsics
4707   // slow path, the arguments have been moved to the right place, so here we are
4708   // guaranteed that the receiver is the first register of the calling convention.
4709   InvokeDexCallingConvention calling_convention;
4710   Register receiver = calling_convention.GetRegisterAt(0);
4711   Register temp = XRegisterFrom(temp_in);
4712   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4713       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
4714   Offset class_offset = mirror::Object::ClassOffset();
4715   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4716 
4717   DCHECK(receiver.IsRegister());
4718 
4719   {
4720     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4721     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4722     // /* HeapReference<Class> */ temp = receiver->klass_
4723     __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
4724     MaybeRecordImplicitNullCheck(invoke);
4725   }
4726   // Instead of simply (possibly) unpoisoning `temp` here, we should
4727   // emit a read barrier for the previous class reference load.
4728   // intermediate/temporary reference and because the current
4729   // concurrent copying collector keeps the from-space memory
4730   // intact/accessible until the end of the marking phase (the
4731   // concurrent copying collector may not in the future).
4732   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4733 
4734   // If we're compiling baseline, update the inline cache.
4735   MaybeGenerateInlineCacheCheck(invoke, temp);
4736 
4737   // temp = temp->GetMethodAt(method_offset);
4738   __ Ldr(temp, MemOperand(temp, method_offset));
4739   // lr = temp->GetEntryPoint();
4740   __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
4741   {
4742     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4743     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4744     // lr();
4745     __ blr(lr);
4746     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4747   }
4748 }
4749 
MoveFromReturnRegister(Location trg,DataType::Type type)4750 void CodeGeneratorARM64::MoveFromReturnRegister(Location trg, DataType::Type type) {
4751   if (!trg.IsValid()) {
4752     DCHECK(type == DataType::Type::kVoid);
4753     return;
4754   }
4755 
4756   DCHECK_NE(type, DataType::Type::kVoid);
4757 
4758   if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
4759     Register trg_reg = RegisterFrom(trg, type);
4760     Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
4761     __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
4762   } else {
4763     VRegister trg_reg = FPRegisterFrom(trg, type);
4764     VRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
4765     __ Fmov(trg_reg, res_reg);
4766   }
4767 }
4768 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4769 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4770   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4771   if (intrinsic.TryDispatch(invoke)) {
4772     return;
4773   }
4774   HandleInvoke(invoke);
4775 }
4776 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4777 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4778   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4779     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4780     return;
4781   }
4782   codegen_->GenerateInvokePolymorphicCall(invoke);
4783   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4784 }
4785 
VisitInvokeCustom(HInvokeCustom * invoke)4786 void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
4787   HandleInvoke(invoke);
4788 }
4789 
VisitInvokeCustom(HInvokeCustom * invoke)4790 void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
4791   codegen_->GenerateInvokeCustomCall(invoke);
4792   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4793 }
4794 
NewBootImageIntrinsicPatch(uint32_t intrinsic_data,vixl::aarch64::Label * adrp_label)4795 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch(
4796     uint32_t intrinsic_data,
4797     vixl::aarch64::Label* adrp_label) {
4798   return NewPcRelativePatch(
4799       /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_other_patches_);
4800 }
4801 
NewBootImageRelRoPatch(uint32_t boot_image_offset,vixl::aarch64::Label * adrp_label)4802 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
4803     uint32_t boot_image_offset,
4804     vixl::aarch64::Label* adrp_label) {
4805   return NewPcRelativePatch(
4806       /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_other_patches_);
4807 }
4808 
NewBootImageMethodPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4809 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
4810     MethodReference target_method,
4811     vixl::aarch64::Label* adrp_label) {
4812   return NewPcRelativePatch(
4813       target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_);
4814 }
4815 
NewMethodBssEntryPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4816 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
4817     MethodReference target_method,
4818     vixl::aarch64::Label* adrp_label) {
4819   return NewPcRelativePatch(
4820       target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_);
4821 }
4822 
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4823 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch(
4824     const DexFile& dex_file,
4825     dex::TypeIndex type_index,
4826     vixl::aarch64::Label* adrp_label) {
4827   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_);
4828 }
4829 
NewBssEntryTypePatch(HLoadClass * load_class,vixl::aarch64::Label * adrp_label)4830 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
4831     HLoadClass* load_class,
4832     vixl::aarch64::Label* adrp_label) {
4833   const DexFile& dex_file = load_class->GetDexFile();
4834   dex::TypeIndex type_index = load_class->GetTypeIndex();
4835   ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
4836   switch (load_class->GetLoadKind()) {
4837     case HLoadClass::LoadKind::kBssEntry:
4838       patches = &type_bss_entry_patches_;
4839       break;
4840     case HLoadClass::LoadKind::kBssEntryPublic:
4841       patches = &public_type_bss_entry_patches_;
4842       break;
4843     case HLoadClass::LoadKind::kBssEntryPackage:
4844       patches = &package_type_bss_entry_patches_;
4845       break;
4846     default:
4847       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
4848       UNREACHABLE();
4849   }
4850   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, patches);
4851 }
4852 
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4853 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch(
4854     const DexFile& dex_file,
4855     dex::StringIndex string_index,
4856     vixl::aarch64::Label* adrp_label) {
4857   return NewPcRelativePatch(
4858       &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_);
4859 }
4860 
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4861 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
4862     const DexFile& dex_file,
4863     dex::StringIndex string_index,
4864     vixl::aarch64::Label* adrp_label) {
4865   return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
4866 }
4867 
NewBootImageJniEntrypointPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4868 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageJniEntrypointPatch(
4869     MethodReference target_method,
4870     vixl::aarch64::Label* adrp_label) {
4871   return NewPcRelativePatch(
4872       target_method.dex_file, target_method.index, adrp_label, &boot_image_jni_entrypoint_patches_);
4873 }
4874 
EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset)4875 void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) {
4876   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
4877   DCHECK(!GetCompilerOptions().IsJitCompiler());
4878   call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
4879   vixl::aarch64::Label* bl_label = &call_entrypoint_patches_.back().label;
4880   __ bind(bl_label);
4881   __ bl(static_cast<int64_t>(0));  // Placeholder, patched at link-time.
4882 }
4883 
EmitBakerReadBarrierCbnz(uint32_t custom_data)4884 void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) {
4885   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
4886   if (GetCompilerOptions().IsJitCompiler()) {
4887     auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
4888     vixl::aarch64::Label* slow_path_entry = &it->second.label;
4889     __ cbnz(mr, slow_path_entry);
4890   } else {
4891     baker_read_barrier_patches_.emplace_back(custom_data);
4892     vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label;
4893     __ bind(cbnz_label);
4894     __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
4895   }
4896 }
4897 
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,vixl::aarch64::Label * adrp_label,ArenaDeque<PcRelativePatchInfo> * patches)4898 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
4899     const DexFile* dex_file,
4900     uint32_t offset_or_index,
4901     vixl::aarch64::Label* adrp_label,
4902     ArenaDeque<PcRelativePatchInfo>* patches) {
4903   // Add a patch entry and return the label.
4904   patches->emplace_back(dex_file, offset_or_index);
4905   PcRelativePatchInfo* info = &patches->back();
4906   vixl::aarch64::Label* label = &info->label;
4907   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
4908   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
4909   return label;
4910 }
4911 
DeduplicateBootImageAddressLiteral(uint64_t address)4912 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
4913     uint64_t address) {
4914   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
4915 }
4916 
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)4917 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
4918     const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
4919   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
4920   return jit_string_patches_.GetOrCreate(
4921       StringReference(&dex_file, string_index),
4922       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
4923 }
4924 
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)4925 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
4926     const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
4927   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
4928   return jit_class_patches_.GetOrCreate(
4929       TypeReference(&dex_file, type_index),
4930       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
4931 }
4932 
EmitAdrpPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register reg)4933 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
4934                                              vixl::aarch64::Register reg) {
4935   DCHECK(reg.IsX());
4936   SingleEmissionCheckScope guard(GetVIXLAssembler());
4937   __ Bind(fixup_label);
4938   __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
4939 }
4940 
EmitAddPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4941 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
4942                                             vixl::aarch64::Register out,
4943                                             vixl::aarch64::Register base) {
4944   DCHECK(out.IsX());
4945   DCHECK(base.IsX());
4946   SingleEmissionCheckScope guard(GetVIXLAssembler());
4947   __ Bind(fixup_label);
4948   __ add(out, base, Operand(/* offset placeholder */ 0));
4949 }
4950 
EmitLdrOffsetPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4951 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
4952                                                   vixl::aarch64::Register out,
4953                                                   vixl::aarch64::Register base) {
4954   DCHECK(base.IsX());
4955   SingleEmissionCheckScope guard(GetVIXLAssembler());
4956   __ Bind(fixup_label);
4957   __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
4958 }
4959 
LoadBootImageAddress(vixl::aarch64::Register reg,uint32_t boot_image_reference)4960 void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
4961                                               uint32_t boot_image_reference) {
4962   if (GetCompilerOptions().IsBootImage()) {
4963     // Add ADRP with its PC-relative type patch.
4964     vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference);
4965     EmitAdrpPlaceholder(adrp_label, reg.X());
4966     // Add ADD with its PC-relative type patch.
4967     vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label);
4968     EmitAddPlaceholder(add_label, reg.X(), reg.X());
4969   } else if (GetCompilerOptions().GetCompilePic()) {
4970     // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
4971     vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_reference);
4972     EmitAdrpPlaceholder(adrp_label, reg.X());
4973     // Add LDR with its PC-relative .data.bimg.rel.ro patch.
4974     vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_reference, adrp_label);
4975     EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X());
4976   } else {
4977     DCHECK(GetCompilerOptions().IsJitCompiler());
4978     gc::Heap* heap = Runtime::Current()->GetHeap();
4979     DCHECK(!heap->GetBootImageSpaces().empty());
4980     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
4981     __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address)));
4982   }
4983 }
4984 
LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,TypeReference target_type)4985 void CodeGeneratorARM64::LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,
4986                                                        TypeReference target_type) {
4987   // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
4988   DCHECK(GetCompilerOptions().IsBootImage());
4989   // Add ADRP with its PC-relative type patch.
4990   vixl::aarch64::Label* adrp_label =
4991       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
4992   EmitAdrpPlaceholder(adrp_label, reg.X());
4993   // Add ADD with its PC-relative type patch.
4994   vixl::aarch64::Label* add_label =
4995       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex(), adrp_label);
4996   EmitAddPlaceholder(add_label, reg.X(), reg.X());
4997 }
4998 
LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg,HInvoke * invoke)4999 void CodeGeneratorARM64::LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg, HInvoke* invoke) {
5000   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5001   if (GetCompilerOptions().IsBootImage()) {
5002     MethodReference target_method = invoke->GetResolvedMethodReference();
5003     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5004     LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
5005   } else {
5006     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5007     LoadBootImageAddress(reg, boot_image_offset);
5008   }
5009 }
5010 
LoadClassRootForIntrinsic(vixl::aarch64::Register reg,ClassRoot class_root)5011 void CodeGeneratorARM64::LoadClassRootForIntrinsic(vixl::aarch64::Register reg,
5012                                                    ClassRoot class_root) {
5013   if (GetCompilerOptions().IsBootImage()) {
5014     ScopedObjectAccess soa(Thread::Current());
5015     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
5016     TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
5017     LoadTypeForBootImageIntrinsic(reg, target_type);
5018   } else {
5019     uint32_t boot_image_offset = GetBootImageOffset(class_root);
5020     LoadBootImageAddress(reg, boot_image_offset);
5021   }
5022 }
5023 
5024 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5025 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
5026     const ArenaDeque<PcRelativePatchInfo>& infos,
5027     ArenaVector<linker::LinkerPatch>* linker_patches) {
5028   for (const PcRelativePatchInfo& info : infos) {
5029     linker_patches->push_back(Factory(info.label.GetLocation(),
5030                                       info.target_dex_file,
5031                                       info.pc_insn_label->GetLocation(),
5032                                       info.offset_or_index));
5033   }
5034 }
5035 
5036 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5037 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5038                                      const DexFile* target_dex_file,
5039                                      uint32_t pc_insn_offset,
5040                                      uint32_t boot_image_offset) {
5041   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
5042   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5043 }
5044 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5045 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5046   DCHECK(linker_patches->empty());
5047   size_t size =
5048       boot_image_method_patches_.size() +
5049       method_bss_entry_patches_.size() +
5050       boot_image_type_patches_.size() +
5051       type_bss_entry_patches_.size() +
5052       public_type_bss_entry_patches_.size() +
5053       package_type_bss_entry_patches_.size() +
5054       boot_image_string_patches_.size() +
5055       string_bss_entry_patches_.size() +
5056       boot_image_jni_entrypoint_patches_.size() +
5057       boot_image_other_patches_.size() +
5058       call_entrypoint_patches_.size() +
5059       baker_read_barrier_patches_.size();
5060   linker_patches->reserve(size);
5061   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5062     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5063         boot_image_method_patches_, linker_patches);
5064     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5065         boot_image_type_patches_, linker_patches);
5066     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5067         boot_image_string_patches_, linker_patches);
5068   } else {
5069     DCHECK(boot_image_method_patches_.empty());
5070     DCHECK(boot_image_type_patches_.empty());
5071     DCHECK(boot_image_string_patches_.empty());
5072   }
5073   if (GetCompilerOptions().IsBootImage()) {
5074     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5075         boot_image_other_patches_, linker_patches);
5076   } else {
5077     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5078         boot_image_other_patches_, linker_patches);
5079   }
5080   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5081       method_bss_entry_patches_, linker_patches);
5082   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5083       type_bss_entry_patches_, linker_patches);
5084   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5085       public_type_bss_entry_patches_, linker_patches);
5086   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5087       package_type_bss_entry_patches_, linker_patches);
5088   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5089       string_bss_entry_patches_, linker_patches);
5090   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5091       boot_image_jni_entrypoint_patches_, linker_patches);
5092   for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) {
5093     DCHECK(info.target_dex_file == nullptr);
5094     linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
5095         info.label.GetLocation(), info.offset_or_index));
5096   }
5097   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
5098     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
5099         info.label.GetLocation(), info.custom_data));
5100   }
5101   DCHECK_EQ(size, linker_patches->size());
5102 }
5103 
NeedsThunkCode(const linker::LinkerPatch & patch) const5104 bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
5105   return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
5106          patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
5107          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
5108 }
5109 
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)5110 void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
5111                                        /*out*/ ArenaVector<uint8_t>* code,
5112                                        /*out*/ std::string* debug_name) {
5113   Arm64Assembler assembler(GetGraph()->GetAllocator());
5114   switch (patch.GetType()) {
5115     case linker::LinkerPatch::Type::kCallRelative: {
5116       // The thunk just uses the entry point in the ArtMethod. This works even for calls
5117       // to the generic JNI and interpreter trampolines.
5118       Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5119           kArm64PointerSize).Int32Value());
5120       assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
5121       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
5122         *debug_name = "MethodCallThunk";
5123       }
5124       break;
5125     }
5126     case linker::LinkerPatch::Type::kCallEntrypoint: {
5127       Offset offset(patch.EntrypointOffset());
5128       assembler.JumpTo(ManagedRegister(arm64::TR), offset, ManagedRegister(arm64::IP0));
5129       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
5130         *debug_name = "EntrypointCallThunk_" + std::to_string(offset.Uint32Value());
5131       }
5132       break;
5133     }
5134     case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
5135       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
5136       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
5137       break;
5138     }
5139     default:
5140       LOG(FATAL) << "Unexpected patch type " << patch.GetType();
5141       UNREACHABLE();
5142   }
5143 
5144   // Ensure we emit the literal pool if any.
5145   assembler.FinalizeCode();
5146   code->resize(assembler.CodeSize());
5147   MemoryRegion code_region(code->data(), code->size());
5148   assembler.FinalizeInstructions(code_region);
5149 }
5150 
DeduplicateUint32Literal(uint32_t value)5151 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
5152   return uint32_literals_.GetOrCreate(
5153       value,
5154       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
5155 }
5156 
DeduplicateUint64Literal(uint64_t value)5157 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
5158   return uint64_literals_.GetOrCreate(
5159       value,
5160       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
5161 }
5162 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)5163 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
5164   // Explicit clinit checks triggered by static invokes must have been pruned by
5165   // art::PrepareForRegisterAllocation.
5166   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
5167 
5168   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5169     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5170     return;
5171   }
5172 
5173   LocationSummary* locations = invoke->GetLocations();
5174   codegen_->GenerateStaticOrDirectCall(
5175       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
5176 
5177   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5178 }
5179 
VisitInvokeVirtual(HInvokeVirtual * invoke)5180 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
5181   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5182     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5183     return;
5184   }
5185 
5186   {
5187     // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
5188     // are no pools emitted.
5189     EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
5190     codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
5191     DCHECK(!codegen_->IsLeafMethod());
5192   }
5193 
5194   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5195 }
5196 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5197 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
5198     HLoadClass::LoadKind desired_class_load_kind) {
5199   switch (desired_class_load_kind) {
5200     case HLoadClass::LoadKind::kInvalid:
5201       LOG(FATAL) << "UNREACHABLE";
5202       UNREACHABLE();
5203     case HLoadClass::LoadKind::kReferrersClass:
5204       break;
5205     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5206     case HLoadClass::LoadKind::kBootImageRelRo:
5207     case HLoadClass::LoadKind::kBssEntry:
5208     case HLoadClass::LoadKind::kBssEntryPublic:
5209     case HLoadClass::LoadKind::kBssEntryPackage:
5210       DCHECK(!GetCompilerOptions().IsJitCompiler());
5211       break;
5212     case HLoadClass::LoadKind::kJitBootImageAddress:
5213     case HLoadClass::LoadKind::kJitTableAddress:
5214       DCHECK(GetCompilerOptions().IsJitCompiler());
5215       break;
5216     case HLoadClass::LoadKind::kRuntimeCall:
5217       break;
5218   }
5219   return desired_class_load_kind;
5220 }
5221 
VisitLoadClass(HLoadClass * cls)5222 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
5223   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5224   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5225     InvokeRuntimeCallingConvention calling_convention;
5226     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5227         cls,
5228         LocationFrom(calling_convention.GetRegisterAt(0)),
5229         LocationFrom(vixl::aarch64::x0));
5230     DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
5231     return;
5232   }
5233   DCHECK_EQ(cls->NeedsAccessCheck(),
5234             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5235                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
5236 
5237   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
5238   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5239       ? LocationSummary::kCallOnSlowPath
5240       : LocationSummary::kNoCall;
5241   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5242   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5243     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5244   }
5245 
5246   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5247     locations->SetInAt(0, Location::RequiresRegister());
5248   }
5249   locations->SetOut(Location::RequiresRegister());
5250   if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
5251     if (!kUseReadBarrier || kUseBakerReadBarrier) {
5252       // Rely on the type resolution or initialization and marking to save everything we need.
5253       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5254     } else {
5255       // For non-Baker read barrier we have a temp-clobbering call.
5256     }
5257   }
5258 }
5259 
5260 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5261 // move.
VisitLoadClass(HLoadClass * cls)5262 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5263   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5264   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5265     codegen_->GenerateLoadClassRuntimeCall(cls);
5266     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5267     return;
5268   }
5269   DCHECK_EQ(cls->NeedsAccessCheck(),
5270             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5271                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
5272 
5273   Location out_loc = cls->GetLocations()->Out();
5274   Register out = OutputRegister(cls);
5275 
5276   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
5277       ? kWithoutReadBarrier
5278       : kCompilerReadBarrierOption;
5279   bool generate_null_check = false;
5280   switch (load_kind) {
5281     case HLoadClass::LoadKind::kReferrersClass: {
5282       DCHECK(!cls->CanCallRuntime());
5283       DCHECK(!cls->MustGenerateClinitCheck());
5284       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5285       Register current_method = InputRegisterAt(cls, 0);
5286       codegen_->GenerateGcRootFieldLoad(cls,
5287                                         out_loc,
5288                                         current_method,
5289                                         ArtMethod::DeclaringClassOffset().Int32Value(),
5290                                         /* fixup_label= */ nullptr,
5291                                         read_barrier_option);
5292       break;
5293     }
5294     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
5295       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
5296              codegen_->GetCompilerOptions().IsBootImageExtension());
5297       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5298       // Add ADRP with its PC-relative type patch.
5299       const DexFile& dex_file = cls->GetDexFile();
5300       dex::TypeIndex type_index = cls->GetTypeIndex();
5301       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
5302       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5303       // Add ADD with its PC-relative type patch.
5304       vixl::aarch64::Label* add_label =
5305           codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
5306       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5307       break;
5308     }
5309     case HLoadClass::LoadKind::kBootImageRelRo: {
5310       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5311       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
5312       // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
5313       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
5314       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5315       // Add LDR with its PC-relative .data.bimg.rel.ro patch.
5316       vixl::aarch64::Label* ldr_label =
5317           codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
5318       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
5319       break;
5320     }
5321     case HLoadClass::LoadKind::kBssEntry:
5322     case HLoadClass::LoadKind::kBssEntryPublic:
5323     case HLoadClass::LoadKind::kBssEntryPackage: {
5324       // Add ADRP with its PC-relative Class .bss entry patch.
5325       vixl::aarch64::Register temp = XRegisterFrom(out_loc);
5326       vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(cls);
5327       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5328       // Add LDR with its PC-relative Class .bss entry patch.
5329       vixl::aarch64::Label* ldr_label = codegen_->NewBssEntryTypePatch(cls, adrp_label);
5330       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
5331       // All aligned loads are implicitly atomic consume operations on ARM64.
5332       codegen_->GenerateGcRootFieldLoad(cls,
5333                                         out_loc,
5334                                         temp,
5335                                         /* offset placeholder */ 0u,
5336                                         ldr_label,
5337                                         read_barrier_option);
5338       generate_null_check = true;
5339       break;
5340     }
5341     case HLoadClass::LoadKind::kJitBootImageAddress: {
5342       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5343       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
5344       DCHECK_NE(address, 0u);
5345       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5346       break;
5347     }
5348     case HLoadClass::LoadKind::kJitTableAddress: {
5349       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
5350                                                        cls->GetTypeIndex(),
5351                                                        cls->GetClass()));
5352       codegen_->GenerateGcRootFieldLoad(cls,
5353                                         out_loc,
5354                                         out.X(),
5355                                         /* offset= */ 0,
5356                                         /* fixup_label= */ nullptr,
5357                                         read_barrier_option);
5358       break;
5359     }
5360     case HLoadClass::LoadKind::kRuntimeCall:
5361     case HLoadClass::LoadKind::kInvalid:
5362       LOG(FATAL) << "UNREACHABLE";
5363       UNREACHABLE();
5364   }
5365 
5366   bool do_clinit = cls->MustGenerateClinitCheck();
5367   if (generate_null_check || do_clinit) {
5368     DCHECK(cls->CanCallRuntime());
5369     SlowPathCodeARM64* slow_path =
5370         new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls);
5371     codegen_->AddSlowPath(slow_path);
5372     if (generate_null_check) {
5373       __ Cbz(out, slow_path->GetEntryLabel());
5374     }
5375     if (cls->MustGenerateClinitCheck()) {
5376       GenerateClassInitializationCheck(slow_path, out);
5377     } else {
5378       __ Bind(slow_path->GetExitLabel());
5379     }
5380     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5381   }
5382 }
5383 
VisitLoadMethodHandle(HLoadMethodHandle * load)5384 void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5385   InvokeRuntimeCallingConvention calling_convention;
5386   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5387   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
5388 }
5389 
VisitLoadMethodHandle(HLoadMethodHandle * load)5390 void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5391   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
5392 }
5393 
VisitLoadMethodType(HLoadMethodType * load)5394 void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) {
5395   InvokeRuntimeCallingConvention calling_convention;
5396   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5397   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
5398 }
5399 
VisitLoadMethodType(HLoadMethodType * load)5400 void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) {
5401   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
5402 }
5403 
GetExceptionTlsAddress()5404 static MemOperand GetExceptionTlsAddress() {
5405   return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
5406 }
5407 
VisitLoadException(HLoadException * load)5408 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
5409   LocationSummary* locations =
5410       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
5411   locations->SetOut(Location::RequiresRegister());
5412 }
5413 
VisitLoadException(HLoadException * instruction)5414 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
5415   __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
5416 }
5417 
VisitClearException(HClearException * clear)5418 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
5419   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
5420 }
5421 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5422 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5423   __ Str(wzr, GetExceptionTlsAddress());
5424 }
5425 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5426 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
5427     HLoadString::LoadKind desired_string_load_kind) {
5428   switch (desired_string_load_kind) {
5429     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5430     case HLoadString::LoadKind::kBootImageRelRo:
5431     case HLoadString::LoadKind::kBssEntry:
5432       DCHECK(!GetCompilerOptions().IsJitCompiler());
5433       break;
5434     case HLoadString::LoadKind::kJitBootImageAddress:
5435     case HLoadString::LoadKind::kJitTableAddress:
5436       DCHECK(GetCompilerOptions().IsJitCompiler());
5437       break;
5438     case HLoadString::LoadKind::kRuntimeCall:
5439       break;
5440   }
5441   return desired_string_load_kind;
5442 }
5443 
VisitLoadString(HLoadString * load)5444 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
5445   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5446   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
5447   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
5448     InvokeRuntimeCallingConvention calling_convention;
5449     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
5450   } else {
5451     locations->SetOut(Location::RequiresRegister());
5452     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5453       if (!kUseReadBarrier || kUseBakerReadBarrier) {
5454         // Rely on the pResolveString and marking to save everything we need.
5455         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5456       } else {
5457         // For non-Baker read barrier we have a temp-clobbering call.
5458       }
5459     }
5460   }
5461 }
5462 
5463 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5464 // move.
VisitLoadString(HLoadString * load)5465 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5466   Register out = OutputRegister(load);
5467   Location out_loc = load->GetLocations()->Out();
5468 
5469   switch (load->GetLoadKind()) {
5470     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5471       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
5472              codegen_->GetCompilerOptions().IsBootImageExtension());
5473       // Add ADRP with its PC-relative String patch.
5474       const DexFile& dex_file = load->GetDexFile();
5475       const dex::StringIndex string_index = load->GetStringIndex();
5476       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
5477       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5478       // Add ADD with its PC-relative String patch.
5479       vixl::aarch64::Label* add_label =
5480           codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
5481       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5482       return;
5483     }
5484     case HLoadString::LoadKind::kBootImageRelRo: {
5485       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5486       // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
5487       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
5488       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
5489       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5490       // Add LDR with its PC-relative .data.bimg.rel.ro patch.
5491       vixl::aarch64::Label* ldr_label =
5492           codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
5493       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
5494       return;
5495     }
5496     case HLoadString::LoadKind::kBssEntry: {
5497       // Add ADRP with its PC-relative String .bss entry patch.
5498       const DexFile& dex_file = load->GetDexFile();
5499       const dex::StringIndex string_index = load->GetStringIndex();
5500       Register temp = XRegisterFrom(out_loc);
5501       vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
5502       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5503       // Add LDR with its PC-relative String .bss entry patch.
5504       vixl::aarch64::Label* ldr_label =
5505           codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
5506       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
5507       // All aligned loads are implicitly atomic consume operations on ARM64.
5508       codegen_->GenerateGcRootFieldLoad(load,
5509                                         out_loc,
5510                                         temp,
5511                                         /* offset placeholder */ 0u,
5512                                         ldr_label,
5513                                         kCompilerReadBarrierOption);
5514       SlowPathCodeARM64* slow_path =
5515           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
5516       codegen_->AddSlowPath(slow_path);
5517       __ Cbz(out.X(), slow_path->GetEntryLabel());
5518       __ Bind(slow_path->GetExitLabel());
5519       codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5520       return;
5521     }
5522     case HLoadString::LoadKind::kJitBootImageAddress: {
5523       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
5524       DCHECK_NE(address, 0u);
5525       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5526       return;
5527     }
5528     case HLoadString::LoadKind::kJitTableAddress: {
5529       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
5530                                                         load->GetStringIndex(),
5531                                                         load->GetString()));
5532       codegen_->GenerateGcRootFieldLoad(load,
5533                                         out_loc,
5534                                         out.X(),
5535                                         /* offset= */ 0,
5536                                         /* fixup_label= */ nullptr,
5537                                         kCompilerReadBarrierOption);
5538       return;
5539     }
5540     default:
5541       break;
5542   }
5543 
5544   // TODO: Re-add the compiler code to do string dex cache lookup again.
5545   InvokeRuntimeCallingConvention calling_convention;
5546   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
5547   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
5548   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
5549   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5550   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5551 }
5552 
VisitLongConstant(HLongConstant * constant)5553 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
5554   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
5555   locations->SetOut(Location::ConstantLocation(constant));
5556 }
5557 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)5558 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
5559   // Will be generated at use site.
5560 }
5561 
VisitMonitorOperation(HMonitorOperation * instruction)5562 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5563   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5564       instruction, LocationSummary::kCallOnMainOnly);
5565   InvokeRuntimeCallingConvention calling_convention;
5566   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5567 }
5568 
VisitMonitorOperation(HMonitorOperation * instruction)5569 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5570   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
5571                           instruction,
5572                           instruction->GetDexPc());
5573   if (instruction->IsEnter()) {
5574     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
5575   } else {
5576     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
5577   }
5578   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5579 }
5580 
VisitMul(HMul * mul)5581 void LocationsBuilderARM64::VisitMul(HMul* mul) {
5582   LocationSummary* locations =
5583       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
5584   switch (mul->GetResultType()) {
5585     case DataType::Type::kInt32:
5586     case DataType::Type::kInt64:
5587       locations->SetInAt(0, Location::RequiresRegister());
5588       locations->SetInAt(1, Location::RequiresRegister());
5589       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5590       break;
5591 
5592     case DataType::Type::kFloat32:
5593     case DataType::Type::kFloat64:
5594       locations->SetInAt(0, Location::RequiresFpuRegister());
5595       locations->SetInAt(1, Location::RequiresFpuRegister());
5596       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5597       break;
5598 
5599     default:
5600       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5601   }
5602 }
5603 
VisitMul(HMul * mul)5604 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
5605   switch (mul->GetResultType()) {
5606     case DataType::Type::kInt32:
5607     case DataType::Type::kInt64:
5608       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
5609       break;
5610 
5611     case DataType::Type::kFloat32:
5612     case DataType::Type::kFloat64:
5613       __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
5614       break;
5615 
5616     default:
5617       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5618   }
5619 }
5620 
VisitNeg(HNeg * neg)5621 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
5622   LocationSummary* locations =
5623       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
5624   switch (neg->GetResultType()) {
5625     case DataType::Type::kInt32:
5626     case DataType::Type::kInt64:
5627       locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
5628       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5629       break;
5630 
5631     case DataType::Type::kFloat32:
5632     case DataType::Type::kFloat64:
5633       locations->SetInAt(0, Location::RequiresFpuRegister());
5634       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5635       break;
5636 
5637     default:
5638       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5639   }
5640 }
5641 
VisitNeg(HNeg * neg)5642 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
5643   switch (neg->GetResultType()) {
5644     case DataType::Type::kInt32:
5645     case DataType::Type::kInt64:
5646       __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
5647       break;
5648 
5649     case DataType::Type::kFloat32:
5650     case DataType::Type::kFloat64:
5651       __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
5652       break;
5653 
5654     default:
5655       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5656   }
5657 }
5658 
VisitNewArray(HNewArray * instruction)5659 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5660   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5661       instruction, LocationSummary::kCallOnMainOnly);
5662   InvokeRuntimeCallingConvention calling_convention;
5663   locations->SetOut(LocationFrom(x0));
5664   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5665   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5666 }
5667 
VisitNewArray(HNewArray * instruction)5668 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5669   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5670   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5671   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5672   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5673   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5674 }
5675 
VisitNewInstance(HNewInstance * instruction)5676 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5677   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5678       instruction, LocationSummary::kCallOnMainOnly);
5679   InvokeRuntimeCallingConvention calling_convention;
5680   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5681   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
5682 }
5683 
VisitNewInstance(HNewInstance * instruction)5684 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
5685   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5686   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5687   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5688 }
5689 
VisitNot(HNot * instruction)5690 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
5691   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5692   locations->SetInAt(0, Location::RequiresRegister());
5693   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5694 }
5695 
VisitNot(HNot * instruction)5696 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
5697   switch (instruction->GetResultType()) {
5698     case DataType::Type::kInt32:
5699     case DataType::Type::kInt64:
5700       __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
5701       break;
5702 
5703     default:
5704       LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
5705   }
5706 }
5707 
VisitBooleanNot(HBooleanNot * instruction)5708 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
5709   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5710   locations->SetInAt(0, Location::RequiresRegister());
5711   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5712 }
5713 
VisitBooleanNot(HBooleanNot * instruction)5714 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
5715   __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
5716 }
5717 
VisitNullCheck(HNullCheck * instruction)5718 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
5719   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5720   locations->SetInAt(0, Location::RequiresRegister());
5721 }
5722 
GenerateImplicitNullCheck(HNullCheck * instruction)5723 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5724   if (CanMoveNullCheckToUser(instruction)) {
5725     return;
5726   }
5727   {
5728     // Ensure that between load and RecordPcInfo there are no pools emitted.
5729     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5730     Location obj = instruction->GetLocations()->InAt(0);
5731     __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
5732     RecordPcInfo(instruction, instruction->GetDexPc());
5733   }
5734 }
5735 
GenerateExplicitNullCheck(HNullCheck * instruction)5736 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5737   SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction);
5738   AddSlowPath(slow_path);
5739 
5740   LocationSummary* locations = instruction->GetLocations();
5741   Location obj = locations->InAt(0);
5742 
5743   __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
5744 }
5745 
VisitNullCheck(HNullCheck * instruction)5746 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
5747   codegen_->GenerateNullCheck(instruction);
5748 }
5749 
VisitOr(HOr * instruction)5750 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
5751   HandleBinaryOp(instruction);
5752 }
5753 
VisitOr(HOr * instruction)5754 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
5755   HandleBinaryOp(instruction);
5756 }
5757 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5758 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5759   LOG(FATAL) << "Unreachable";
5760 }
5761 
VisitParallelMove(HParallelMove * instruction)5762 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
5763   if (instruction->GetNext()->IsSuspendCheck() &&
5764       instruction->GetBlock()->GetLoopInformation() != nullptr) {
5765     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5766     // The back edge will generate the suspend check.
5767     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5768   }
5769 
5770   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5771 }
5772 
VisitParameterValue(HParameterValue * instruction)5773 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
5774   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5775   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5776   if (location.IsStackSlot()) {
5777     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5778   } else if (location.IsDoubleStackSlot()) {
5779     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5780   }
5781   locations->SetOut(location);
5782 }
5783 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5784 void InstructionCodeGeneratorARM64::VisitParameterValue(
5785     HParameterValue* instruction ATTRIBUTE_UNUSED) {
5786   // Nothing to do, the parameter is already at its location.
5787 }
5788 
VisitCurrentMethod(HCurrentMethod * instruction)5789 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
5790   LocationSummary* locations =
5791       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5792   locations->SetOut(LocationFrom(kArtMethodRegister));
5793 }
5794 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5795 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
5796     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5797   // Nothing to do, the method is already at its location.
5798 }
5799 
VisitPhi(HPhi * instruction)5800 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
5801   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5802   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5803     locations->SetInAt(i, Location::Any());
5804   }
5805   locations->SetOut(Location::Any());
5806 }
5807 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5808 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5809   LOG(FATAL) << "Unreachable";
5810 }
5811 
VisitRem(HRem * rem)5812 void LocationsBuilderARM64::VisitRem(HRem* rem) {
5813   DataType::Type type = rem->GetResultType();
5814   LocationSummary::CallKind call_kind =
5815       DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
5816                                            : LocationSummary::kNoCall;
5817   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
5818 
5819   switch (type) {
5820     case DataType::Type::kInt32:
5821     case DataType::Type::kInt64:
5822       locations->SetInAt(0, Location::RequiresRegister());
5823       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
5824       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5825       break;
5826 
5827     case DataType::Type::kFloat32:
5828     case DataType::Type::kFloat64: {
5829       InvokeRuntimeCallingConvention calling_convention;
5830       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
5831       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
5832       locations->SetOut(calling_convention.GetReturnLocation(type));
5833 
5834       break;
5835     }
5836 
5837     default:
5838       LOG(FATAL) << "Unexpected rem type " << type;
5839   }
5840 }
5841 
GenerateIntRemForPower2Denom(HRem * instruction)5842 void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) {
5843   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
5844   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
5845   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
5846 
5847   Register out = OutputRegister(instruction);
5848   Register dividend = InputRegisterAt(instruction, 0);
5849 
5850   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
5851     // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
5852     // NOTE: The generated code for HRem correctly works for the INT32_MIN/INT64_MIN dividends.
5853     // INT*_MIN % imm must be 0 for any imm of power 2. 'and' works only with bits
5854     // 0..30 (Int32 case)/0..62 (Int64 case) of a dividend. For INT32_MIN/INT64_MIN they are zeros.
5855     // So 'and' always produces zero.
5856     __ And(out, dividend, abs_imm - 1);
5857   } else {
5858     if (abs_imm == 2) {
5859       __ Cmp(dividend, 0);
5860       __ And(out, dividend, 1);
5861       __ Csneg(out, out, out, ge);
5862     } else {
5863       UseScratchRegisterScope temps(GetVIXLAssembler());
5864       Register temp = temps.AcquireSameSizeAs(out);
5865 
5866       __ Negs(temp, dividend);
5867       __ And(out, dividend, abs_imm - 1);
5868       __ And(temp, temp, abs_imm - 1);
5869       __ Csneg(out, out, temp, mi);
5870     }
5871   }
5872 }
5873 
GenerateIntRemForConstDenom(HRem * instruction)5874 void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) {
5875   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
5876 
5877   if (imm == 0) {
5878     // Do not generate anything.
5879     // DivZeroCheck would prevent any code to be executed.
5880     return;
5881   }
5882 
5883   if (IsPowerOfTwo(AbsOrMin(imm))) {
5884     // Cases imm == -1 or imm == 1 are handled in constant folding by
5885     // InstructionWithAbsorbingInputSimplifier.
5886     // If the cases have survided till code generation they are handled in
5887     // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0).
5888     // The correct code is generated for them, just more instructions.
5889     GenerateIntRemForPower2Denom(instruction);
5890   } else {
5891     DCHECK(imm < -2 || imm > 2) << imm;
5892     GenerateDivRemWithAnyConstant(instruction, imm);
5893   }
5894 }
5895 
GenerateIntRem(HRem * instruction)5896 void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) {
5897   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
5898          << instruction->GetResultType();
5899 
5900   if (instruction->GetLocations()->InAt(1).IsConstant()) {
5901     GenerateIntRemForConstDenom(instruction);
5902   } else {
5903     Register out = OutputRegister(instruction);
5904     Register dividend = InputRegisterAt(instruction, 0);
5905     Register divisor = InputRegisterAt(instruction, 1);
5906     UseScratchRegisterScope temps(GetVIXLAssembler());
5907     Register temp = temps.AcquireSameSizeAs(out);
5908     __ Sdiv(temp, dividend, divisor);
5909     __ Msub(out, temp, divisor, dividend);
5910   }
5911 }
5912 
VisitRem(HRem * rem)5913 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
5914   DataType::Type type = rem->GetResultType();
5915 
5916   switch (type) {
5917     case DataType::Type::kInt32:
5918     case DataType::Type::kInt64: {
5919       GenerateIntRem(rem);
5920       break;
5921     }
5922 
5923     case DataType::Type::kFloat32:
5924     case DataType::Type::kFloat64: {
5925       QuickEntrypointEnum entrypoint =
5926           (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
5927       codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
5928       if (type == DataType::Type::kFloat32) {
5929         CheckEntrypointTypes<kQuickFmodf, float, float, float>();
5930       } else {
5931         CheckEntrypointTypes<kQuickFmod, double, double, double>();
5932       }
5933       break;
5934     }
5935 
5936     default:
5937       LOG(FATAL) << "Unexpected rem type " << type;
5938       UNREACHABLE();
5939   }
5940 }
5941 
VisitMin(HMin * min)5942 void LocationsBuilderARM64::VisitMin(HMin* min) {
5943   HandleBinaryOp(min);
5944 }
5945 
VisitMin(HMin * min)5946 void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
5947   HandleBinaryOp(min);
5948 }
5949 
VisitMax(HMax * max)5950 void LocationsBuilderARM64::VisitMax(HMax* max) {
5951   HandleBinaryOp(max);
5952 }
5953 
VisitMax(HMax * max)5954 void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
5955   HandleBinaryOp(max);
5956 }
5957 
VisitAbs(HAbs * abs)5958 void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
5959   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
5960   switch (abs->GetResultType()) {
5961     case DataType::Type::kInt32:
5962     case DataType::Type::kInt64:
5963       locations->SetInAt(0, Location::RequiresRegister());
5964       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5965       break;
5966     case DataType::Type::kFloat32:
5967     case DataType::Type::kFloat64:
5968       locations->SetInAt(0, Location::RequiresFpuRegister());
5969       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5970       break;
5971     default:
5972       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5973   }
5974 }
5975 
VisitAbs(HAbs * abs)5976 void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) {
5977   switch (abs->GetResultType()) {
5978     case DataType::Type::kInt32:
5979     case DataType::Type::kInt64: {
5980       Register in_reg = InputRegisterAt(abs, 0);
5981       Register out_reg = OutputRegister(abs);
5982       __ Cmp(in_reg, Operand(0));
5983       __ Cneg(out_reg, in_reg, lt);
5984       break;
5985     }
5986     case DataType::Type::kFloat32:
5987     case DataType::Type::kFloat64: {
5988       VRegister in_reg = InputFPRegisterAt(abs, 0);
5989       VRegister out_reg = OutputFPRegister(abs);
5990       __ Fabs(out_reg, in_reg);
5991       break;
5992     }
5993     default:
5994       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5995   }
5996 }
5997 
VisitConstructorFence(HConstructorFence * constructor_fence)5998 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
5999   constructor_fence->SetLocations(nullptr);
6000 }
6001 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)6002 void InstructionCodeGeneratorARM64::VisitConstructorFence(
6003     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
6004   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
6005 }
6006 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)6007 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
6008   memory_barrier->SetLocations(nullptr);
6009 }
6010 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)6011 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
6012   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
6013 }
6014 
VisitReturn(HReturn * instruction)6015 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
6016   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6017   DataType::Type return_type = instruction->InputAt(0)->GetType();
6018   locations->SetInAt(0, ARM64ReturnLocation(return_type));
6019 }
6020 
VisitReturn(HReturn * ret)6021 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* ret) {
6022   if (GetGraph()->IsCompilingOsr()) {
6023     // To simplify callers of an OSR method, we put the return value in both
6024     // floating point and core register.
6025     switch (ret->InputAt(0)->GetType()) {
6026       case DataType::Type::kFloat32:
6027         __ Fmov(w0, s0);
6028         break;
6029       case DataType::Type::kFloat64:
6030         __ Fmov(x0, d0);
6031         break;
6032       default:
6033         break;
6034     }
6035   }
6036   codegen_->GenerateFrameExit();
6037 }
6038 
VisitReturnVoid(HReturnVoid * instruction)6039 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
6040   instruction->SetLocations(nullptr);
6041 }
6042 
VisitReturnVoid(HReturnVoid * instruction ATTRIBUTE_UNUSED)6043 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
6044   codegen_->GenerateFrameExit();
6045 }
6046 
VisitRor(HRor * ror)6047 void LocationsBuilderARM64::VisitRor(HRor* ror) {
6048   HandleBinaryOp(ror);
6049 }
6050 
VisitRor(HRor * ror)6051 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
6052   HandleBinaryOp(ror);
6053 }
6054 
VisitShl(HShl * shl)6055 void LocationsBuilderARM64::VisitShl(HShl* shl) {
6056   HandleShift(shl);
6057 }
6058 
VisitShl(HShl * shl)6059 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
6060   HandleShift(shl);
6061 }
6062 
VisitShr(HShr * shr)6063 void LocationsBuilderARM64::VisitShr(HShr* shr) {
6064   HandleShift(shr);
6065 }
6066 
VisitShr(HShr * shr)6067 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
6068   HandleShift(shr);
6069 }
6070 
VisitSub(HSub * instruction)6071 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
6072   HandleBinaryOp(instruction);
6073 }
6074 
VisitSub(HSub * instruction)6075 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
6076   HandleBinaryOp(instruction);
6077 }
6078 
VisitStaticFieldGet(HStaticFieldGet * instruction)6079 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6080   HandleFieldGet(instruction, instruction->GetFieldInfo());
6081 }
6082 
VisitStaticFieldGet(HStaticFieldGet * instruction)6083 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6084   HandleFieldGet(instruction, instruction->GetFieldInfo());
6085 }
6086 
VisitStaticFieldSet(HStaticFieldSet * instruction)6087 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6088   HandleFieldSet(instruction);
6089 }
6090 
VisitStaticFieldSet(HStaticFieldSet * instruction)6091 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6092   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
6093 }
6094 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6095 void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6096   codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(x0));
6097 }
6098 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6099 void InstructionCodeGeneratorARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6100   __ Mov(w0, instruction->GetFormat()->GetValue());
6101   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6102 }
6103 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6104 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
6105     HUnresolvedInstanceFieldGet* instruction) {
6106   FieldAccessCallingConventionARM64 calling_convention;
6107   codegen_->CreateUnresolvedFieldLocationSummary(
6108       instruction, instruction->GetFieldType(), calling_convention);
6109 }
6110 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6111 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
6112     HUnresolvedInstanceFieldGet* instruction) {
6113   FieldAccessCallingConventionARM64 calling_convention;
6114   codegen_->GenerateUnresolvedFieldAccess(instruction,
6115                                           instruction->GetFieldType(),
6116                                           instruction->GetFieldIndex(),
6117                                           instruction->GetDexPc(),
6118                                           calling_convention);
6119 }
6120 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6121 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
6122     HUnresolvedInstanceFieldSet* instruction) {
6123   FieldAccessCallingConventionARM64 calling_convention;
6124   codegen_->CreateUnresolvedFieldLocationSummary(
6125       instruction, instruction->GetFieldType(), calling_convention);
6126 }
6127 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6128 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
6129     HUnresolvedInstanceFieldSet* instruction) {
6130   FieldAccessCallingConventionARM64 calling_convention;
6131   codegen_->GenerateUnresolvedFieldAccess(instruction,
6132                                           instruction->GetFieldType(),
6133                                           instruction->GetFieldIndex(),
6134                                           instruction->GetDexPc(),
6135                                           calling_convention);
6136 }
6137 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6138 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
6139     HUnresolvedStaticFieldGet* instruction) {
6140   FieldAccessCallingConventionARM64 calling_convention;
6141   codegen_->CreateUnresolvedFieldLocationSummary(
6142       instruction, instruction->GetFieldType(), calling_convention);
6143 }
6144 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6145 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
6146     HUnresolvedStaticFieldGet* instruction) {
6147   FieldAccessCallingConventionARM64 calling_convention;
6148   codegen_->GenerateUnresolvedFieldAccess(instruction,
6149                                           instruction->GetFieldType(),
6150                                           instruction->GetFieldIndex(),
6151                                           instruction->GetDexPc(),
6152                                           calling_convention);
6153 }
6154 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6155 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
6156     HUnresolvedStaticFieldSet* instruction) {
6157   FieldAccessCallingConventionARM64 calling_convention;
6158   codegen_->CreateUnresolvedFieldLocationSummary(
6159       instruction, instruction->GetFieldType(), calling_convention);
6160 }
6161 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6162 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
6163     HUnresolvedStaticFieldSet* instruction) {
6164   FieldAccessCallingConventionARM64 calling_convention;
6165   codegen_->GenerateUnresolvedFieldAccess(instruction,
6166                                           instruction->GetFieldType(),
6167                                           instruction->GetFieldIndex(),
6168                                           instruction->GetDexPc(),
6169                                           calling_convention);
6170 }
6171 
VisitSuspendCheck(HSuspendCheck * instruction)6172 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
6173   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6174       instruction, LocationSummary::kCallOnSlowPath);
6175   // In suspend check slow path, usually there are no caller-save registers at all.
6176   // If SIMD instructions are present, however, we force spilling all live SIMD
6177   // registers in full width (since the runtime only saves/restores lower part).
6178   locations->SetCustomSlowPathCallerSaves(
6179       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6180 }
6181 
VisitSuspendCheck(HSuspendCheck * instruction)6182 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
6183   HBasicBlock* block = instruction->GetBlock();
6184   if (block->GetLoopInformation() != nullptr) {
6185     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6186     // The back edge will generate the suspend check.
6187     return;
6188   }
6189   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6190     // The goto will generate the suspend check.
6191     return;
6192   }
6193   GenerateSuspendCheck(instruction, nullptr);
6194   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6195 }
6196 
VisitThrow(HThrow * instruction)6197 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
6198   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6199       instruction, LocationSummary::kCallOnMainOnly);
6200   InvokeRuntimeCallingConvention calling_convention;
6201   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
6202 }
6203 
VisitThrow(HThrow * instruction)6204 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
6205   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6206   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6207 }
6208 
VisitTypeConversion(HTypeConversion * conversion)6209 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
6210   LocationSummary* locations =
6211       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
6212   DataType::Type input_type = conversion->GetInputType();
6213   DataType::Type result_type = conversion->GetResultType();
6214   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6215       << input_type << " -> " << result_type;
6216   if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
6217       (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
6218     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
6219   }
6220 
6221   if (DataType::IsFloatingPointType(input_type)) {
6222     locations->SetInAt(0, Location::RequiresFpuRegister());
6223   } else {
6224     locations->SetInAt(0, Location::RequiresRegister());
6225   }
6226 
6227   if (DataType::IsFloatingPointType(result_type)) {
6228     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6229   } else {
6230     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6231   }
6232 }
6233 
VisitTypeConversion(HTypeConversion * conversion)6234 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
6235   DataType::Type result_type = conversion->GetResultType();
6236   DataType::Type input_type = conversion->GetInputType();
6237 
6238   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6239       << input_type << " -> " << result_type;
6240 
6241   if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
6242     int result_size = DataType::Size(result_type);
6243     int input_size = DataType::Size(input_type);
6244     int min_size = std::min(result_size, input_size);
6245     Register output = OutputRegister(conversion);
6246     Register source = InputRegisterAt(conversion, 0);
6247     if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) {
6248       // 'int' values are used directly as W registers, discarding the top
6249       // bits, so we don't need to sign-extend and can just perform a move.
6250       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
6251       // top 32 bits of the target register. We theoretically could leave those
6252       // bits unchanged, but we would have to make sure that no code uses a
6253       // 32bit input value as a 64bit value assuming that the top 32 bits are
6254       // zero.
6255       __ Mov(output.W(), source.W());
6256     } else if (DataType::IsUnsignedType(result_type) ||
6257                (DataType::IsUnsignedType(input_type) && input_size < result_size)) {
6258       __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte);
6259     } else {
6260       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
6261     }
6262   } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
6263     __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
6264   } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
6265     CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
6266     __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
6267   } else if (DataType::IsFloatingPointType(result_type) &&
6268              DataType::IsFloatingPointType(input_type)) {
6269     __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
6270   } else {
6271     LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
6272                 << " to " << result_type;
6273   }
6274 }
6275 
VisitUShr(HUShr * ushr)6276 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
6277   HandleShift(ushr);
6278 }
6279 
VisitUShr(HUShr * ushr)6280 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
6281   HandleShift(ushr);
6282 }
6283 
VisitXor(HXor * instruction)6284 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
6285   HandleBinaryOp(instruction);
6286 }
6287 
VisitXor(HXor * instruction)6288 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
6289   HandleBinaryOp(instruction);
6290 }
6291 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6292 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6293   // Nothing to do, this should be removed during prepare for register allocator.
6294   LOG(FATAL) << "Unreachable";
6295 }
6296 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6297 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6298   // Nothing to do, this should be removed during prepare for register allocator.
6299   LOG(FATAL) << "Unreachable";
6300 }
6301 
6302 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6303 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6304   LocationSummary* locations =
6305       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6306   locations->SetInAt(0, Location::RequiresRegister());
6307 }
6308 
VisitPackedSwitch(HPackedSwitch * switch_instr)6309 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6310   int32_t lower_bound = switch_instr->GetStartValue();
6311   uint32_t num_entries = switch_instr->GetNumEntries();
6312   Register value_reg = InputRegisterAt(switch_instr, 0);
6313   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6314 
6315   // Roughly set 16 as max average assemblies generated per HIR in a graph.
6316   static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
6317   // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
6318   // make sure we don't emit it if the target may run out of range.
6319   // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
6320   // ranges and emit the tables only as required.
6321   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
6322 
6323   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
6324       // Current instruction id is an upper bound of the number of HIRs in the graph.
6325       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
6326     // Create a series of compare/jumps.
6327     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6328     Register temp = temps.AcquireW();
6329     __ Subs(temp, value_reg, Operand(lower_bound));
6330 
6331     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6332     // Jump to successors[0] if value == lower_bound.
6333     __ B(eq, codegen_->GetLabelOf(successors[0]));
6334     int32_t last_index = 0;
6335     for (; num_entries - last_index > 2; last_index += 2) {
6336       __ Subs(temp, temp, Operand(2));
6337       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
6338       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
6339       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
6340       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
6341     }
6342     if (num_entries - last_index == 2) {
6343       // The last missing case_value.
6344       __ Cmp(temp, Operand(1));
6345       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
6346     }
6347 
6348     // And the default for any other value.
6349     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6350       __ B(codegen_->GetLabelOf(default_block));
6351     }
6352   } else {
6353     JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
6354 
6355     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6356 
6357     // Below instructions should use at most one blocked register. Since there are two blocked
6358     // registers, we are free to block one.
6359     Register temp_w = temps.AcquireW();
6360     Register index;
6361     // Remove the bias.
6362     if (lower_bound != 0) {
6363       index = temp_w;
6364       __ Sub(index, value_reg, Operand(lower_bound));
6365     } else {
6366       index = value_reg;
6367     }
6368 
6369     // Jump to default block if index is out of the range.
6370     __ Cmp(index, Operand(num_entries));
6371     __ B(hs, codegen_->GetLabelOf(default_block));
6372 
6373     // In current VIXL implementation, it won't require any blocked registers to encode the
6374     // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
6375     // register pressure.
6376     Register table_base = temps.AcquireX();
6377     // Load jump offset from the table.
6378     __ Adr(table_base, jump_table->GetTableStartLabel());
6379     Register jump_offset = temp_w;
6380     __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
6381 
6382     // Jump to target block by branching to table_base(pc related) + offset.
6383     Register target_address = table_base;
6384     __ Add(target_address, table_base, Operand(jump_offset, SXTW));
6385     __ Br(target_address);
6386   }
6387 }
6388 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6389 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
6390     HInstruction* instruction,
6391     Location out,
6392     uint32_t offset,
6393     Location maybe_temp,
6394     ReadBarrierOption read_barrier_option) {
6395   DataType::Type type = DataType::Type::kReference;
6396   Register out_reg = RegisterFrom(out, type);
6397   if (read_barrier_option == kWithReadBarrier) {
6398     CHECK(kEmitCompilerReadBarrier);
6399     if (kUseBakerReadBarrier) {
6400       // Load with fast path based Baker's read barrier.
6401       // /* HeapReference<Object> */ out = *(out + offset)
6402       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6403                                                       out,
6404                                                       out_reg,
6405                                                       offset,
6406                                                       maybe_temp,
6407                                                       /* needs_null_check= */ false,
6408                                                       /* use_load_acquire= */ false);
6409     } else {
6410       // Load with slow path based read barrier.
6411       // Save the value of `out` into `maybe_temp` before overwriting it
6412       // in the following move operation, as we will need it for the
6413       // read barrier below.
6414       Register temp_reg = RegisterFrom(maybe_temp, type);
6415       __ Mov(temp_reg, out_reg);
6416       // /* HeapReference<Object> */ out = *(out + offset)
6417       __ Ldr(out_reg, HeapOperand(out_reg, offset));
6418       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6419     }
6420   } else {
6421     // Plain load with no read barrier.
6422     // /* HeapReference<Object> */ out = *(out + offset)
6423     __ Ldr(out_reg, HeapOperand(out_reg, offset));
6424     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
6425   }
6426 }
6427 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6428 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
6429     HInstruction* instruction,
6430     Location out,
6431     Location obj,
6432     uint32_t offset,
6433     Location maybe_temp,
6434     ReadBarrierOption read_barrier_option) {
6435   DataType::Type type = DataType::Type::kReference;
6436   Register out_reg = RegisterFrom(out, type);
6437   Register obj_reg = RegisterFrom(obj, type);
6438   if (read_barrier_option == kWithReadBarrier) {
6439     CHECK(kEmitCompilerReadBarrier);
6440     if (kUseBakerReadBarrier) {
6441       // Load with fast path based Baker's read barrier.
6442       // /* HeapReference<Object> */ out = *(obj + offset)
6443       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6444                                                       out,
6445                                                       obj_reg,
6446                                                       offset,
6447                                                       maybe_temp,
6448                                                       /* needs_null_check= */ false,
6449                                                       /* use_load_acquire= */ false);
6450     } else {
6451       // Load with slow path based read barrier.
6452       // /* HeapReference<Object> */ out = *(obj + offset)
6453       __ Ldr(out_reg, HeapOperand(obj_reg, offset));
6454       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6455     }
6456   } else {
6457     // Plain load with no read barrier.
6458     // /* HeapReference<Object> */ out = *(obj + offset)
6459     __ Ldr(out_reg, HeapOperand(obj_reg, offset));
6460     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
6461   }
6462 }
6463 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,Register obj,uint32_t offset,vixl::aarch64::Label * fixup_label,ReadBarrierOption read_barrier_option)6464 void CodeGeneratorARM64::GenerateGcRootFieldLoad(
6465     HInstruction* instruction,
6466     Location root,
6467     Register obj,
6468     uint32_t offset,
6469     vixl::aarch64::Label* fixup_label,
6470     ReadBarrierOption read_barrier_option) {
6471   DCHECK(fixup_label == nullptr || offset == 0u);
6472   Register root_reg = RegisterFrom(root, DataType::Type::kReference);
6473   if (read_barrier_option == kWithReadBarrier) {
6474     DCHECK(kEmitCompilerReadBarrier);
6475     if (kUseBakerReadBarrier) {
6476       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6477       // Baker's read barrier are used.
6478 
6479       // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
6480       // the Marking Register) to decide whether we need to enter
6481       // the slow path to mark the GC root.
6482       //
6483       // We use shared thunks for the slow path; shared within the method
6484       // for JIT, across methods for AOT. That thunk checks the reference
6485       // and jumps to the entrypoint if needed.
6486       //
6487       //     lr = &return_address;
6488       //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
6489       //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6490       //       goto gc_root_thunk<root_reg>(lr)
6491       //     }
6492       //   return_address:
6493 
6494       UseScratchRegisterScope temps(GetVIXLAssembler());
6495       DCHECK(temps.IsAvailable(ip0));
6496       DCHECK(temps.IsAvailable(ip1));
6497       temps.Exclude(ip0, ip1);
6498       uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
6499 
6500       ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6501       vixl::aarch64::Label return_address;
6502       __ adr(lr, &return_address);
6503       if (fixup_label != nullptr) {
6504         __ bind(fixup_label);
6505       }
6506       static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6507                     "GC root LDR must be 2 instructions (8B) before the return address label.");
6508       __ ldr(root_reg, MemOperand(obj.X(), offset));
6509       EmitBakerReadBarrierCbnz(custom_data);
6510       __ bind(&return_address);
6511     } else {
6512       // GC root loaded through a slow path for read barriers other
6513       // than Baker's.
6514       // /* GcRoot<mirror::Object>* */ root = obj + offset
6515       if (fixup_label == nullptr) {
6516         __ Add(root_reg.X(), obj.X(), offset);
6517       } else {
6518         EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
6519       }
6520       // /* mirror::Object* */ root = root->Read()
6521       GenerateReadBarrierForRootSlow(instruction, root, root);
6522     }
6523   } else {
6524     // Plain GC root load with no read barrier.
6525     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6526     if (fixup_label == nullptr) {
6527       __ Ldr(root_reg, MemOperand(obj, offset));
6528     } else {
6529       EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
6530     }
6531     // Note that GC roots are not affected by heap poisoning, thus we
6532     // do not have to unpoison `root_reg` here.
6533   }
6534   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6535 }
6536 
GenerateIntrinsicCasMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value,vixl::aarch64::Register old_value)6537 void CodeGeneratorARM64::GenerateIntrinsicCasMoveWithBakerReadBarrier(
6538     vixl::aarch64::Register marked_old_value,
6539     vixl::aarch64::Register old_value) {
6540   DCHECK(kEmitCompilerReadBarrier);
6541   DCHECK(kUseBakerReadBarrier);
6542 
6543   // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
6544   uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode());
6545 
6546   ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6547   vixl::aarch64::Label return_address;
6548   __ adr(lr, &return_address);
6549   static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6550                 "GC root LDR must be 2 instructions (8B) before the return address label.");
6551   __ mov(marked_old_value, old_value);
6552   EmitBakerReadBarrierCbnz(custom_data);
6553   __ bind(&return_address);
6554 }
6555 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl::aarch64::Register obj,const vixl::aarch64::MemOperand & src,bool needs_null_check,bool use_load_acquire)6556 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6557                                                                Location ref,
6558                                                                vixl::aarch64::Register obj,
6559                                                                const vixl::aarch64::MemOperand& src,
6560                                                                bool needs_null_check,
6561                                                                bool use_load_acquire) {
6562   DCHECK(kEmitCompilerReadBarrier);
6563   DCHECK(kUseBakerReadBarrier);
6564 
6565   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6566   // Marking Register) to decide whether we need to enter the slow
6567   // path to mark the reference. Then, in the slow path, check the
6568   // gray bit in the lock word of the reference's holder (`obj`) to
6569   // decide whether to mark `ref` or not.
6570   //
6571   // We use shared thunks for the slow path; shared within the method
6572   // for JIT, across methods for AOT. That thunk checks the holder
6573   // and jumps to the entrypoint if needed. If the holder is not gray,
6574   // it creates a fake dependency and returns to the LDR instruction.
6575   //
6576   //     lr = &gray_return_address;
6577   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6578   //       goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr)
6579   //     }
6580   //   not_gray_return_address:
6581   //     // Original reference load. If the offset is too large to fit
6582   //     // into LDR, we use an adjusted base register here.
6583   //     HeapReference<mirror::Object> reference = *(obj+offset);
6584   //   gray_return_address:
6585 
6586   DCHECK(src.GetAddrMode() == vixl::aarch64::Offset);
6587   DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>));
6588 
6589   UseScratchRegisterScope temps(GetVIXLAssembler());
6590   DCHECK(temps.IsAvailable(ip0));
6591   DCHECK(temps.IsAvailable(ip1));
6592   temps.Exclude(ip0, ip1);
6593   uint32_t custom_data = use_load_acquire
6594       ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode())
6595       : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode());
6596 
6597   {
6598     ExactAssemblyScope guard(GetVIXLAssembler(),
6599                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6600     vixl::aarch64::Label return_address;
6601     __ adr(lr, &return_address);
6602     EmitBakerReadBarrierCbnz(custom_data);
6603     static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6604                   "Field LDR must be 1 instruction (4B) before the return address label; "
6605                   " 2 instructions (8B) for heap poisoning.");
6606     Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6607     if (use_load_acquire) {
6608       DCHECK_EQ(src.GetOffset(), 0);
6609       __ ldar(ref_reg, src);
6610     } else {
6611       __ ldr(ref_reg, src);
6612     }
6613     if (needs_null_check) {
6614       MaybeRecordImplicitNullCheck(instruction);
6615     }
6616     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
6617     // macro instructions disallowed in ExactAssemblyScope.
6618     if (kPoisonHeapReferences) {
6619       __ neg(ref_reg, Operand(ref_reg));
6620     }
6621     __ bind(&return_address);
6622   }
6623   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
6624 }
6625 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check,bool use_load_acquire)6626 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6627                                                                Location ref,
6628                                                                Register obj,
6629                                                                uint32_t offset,
6630                                                                Location maybe_temp,
6631                                                                bool needs_null_check,
6632                                                                bool use_load_acquire) {
6633   DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
6634   Register base = obj;
6635   if (use_load_acquire) {
6636     DCHECK(maybe_temp.IsRegister());
6637     base = WRegisterFrom(maybe_temp);
6638     __ Add(base, obj, offset);
6639     offset = 0u;
6640   } else if (offset >= kReferenceLoadMinFarOffset) {
6641     DCHECK(maybe_temp.IsRegister());
6642     base = WRegisterFrom(maybe_temp);
6643     static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
6644     __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
6645     offset &= (kReferenceLoadMinFarOffset - 1u);
6646   }
6647   MemOperand src(base.X(), offset);
6648   GenerateFieldLoadWithBakerReadBarrier(
6649       instruction, ref, obj, src, needs_null_check, use_load_acquire);
6650 }
6651 
GenerateArrayLoadWithBakerReadBarrier(HArrayGet * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)6652 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
6653                                                                Location ref,
6654                                                                Register obj,
6655                                                                uint32_t data_offset,
6656                                                                Location index,
6657                                                                bool needs_null_check) {
6658   DCHECK(kEmitCompilerReadBarrier);
6659   DCHECK(kUseBakerReadBarrier);
6660 
6661   static_assert(
6662       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6663       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6664   size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
6665 
6666   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6667   // Marking Register) to decide whether we need to enter the slow
6668   // path to mark the reference. Then, in the slow path, check the
6669   // gray bit in the lock word of the reference's holder (`obj`) to
6670   // decide whether to mark `ref` or not.
6671   //
6672   // We use shared thunks for the slow path; shared within the method
6673   // for JIT, across methods for AOT. That thunk checks the holder
6674   // and jumps to the entrypoint if needed. If the holder is not gray,
6675   // it creates a fake dependency and returns to the LDR instruction.
6676   //
6677   //     lr = &gray_return_address;
6678   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6679   //       goto array_thunk<base_reg>(lr)
6680   //     }
6681   //   not_gray_return_address:
6682   //     // Original reference load. If the offset is too large to fit
6683   //     // into LDR, we use an adjusted base register here.
6684   //     HeapReference<mirror::Object> reference = data[index];
6685   //   gray_return_address:
6686 
6687   DCHECK(index.IsValid());
6688   Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
6689   Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6690 
6691   UseScratchRegisterScope temps(GetVIXLAssembler());
6692   DCHECK(temps.IsAvailable(ip0));
6693   DCHECK(temps.IsAvailable(ip1));
6694   temps.Exclude(ip0, ip1);
6695 
6696   Register temp;
6697   if (instruction->GetArray()->IsIntermediateAddress()) {
6698     // We do not need to compute the intermediate address from the array: the
6699     // input instruction has done it already. See the comment in
6700     // `TryExtractArrayAccessAddress()`.
6701     if (kIsDebugBuild) {
6702       HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
6703       DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
6704     }
6705     temp = obj;
6706   } else {
6707     temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
6708     __ Add(temp.X(), obj.X(), Operand(data_offset));
6709   }
6710 
6711   uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
6712 
6713   {
6714     ExactAssemblyScope guard(GetVIXLAssembler(),
6715                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6716     vixl::aarch64::Label return_address;
6717     __ adr(lr, &return_address);
6718     EmitBakerReadBarrierCbnz(custom_data);
6719     static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6720                   "Array LDR must be 1 instruction (4B) before the return address label; "
6721                   " 2 instructions (8B) for heap poisoning.");
6722     __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
6723     DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
6724     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
6725     // macro instructions disallowed in ExactAssemblyScope.
6726     if (kPoisonHeapReferences) {
6727       __ neg(ref_reg, Operand(ref_reg));
6728     }
6729     __ bind(&return_address);
6730   }
6731   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
6732 }
6733 
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)6734 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
6735   // The following condition is a compile-time one, so it does not have a run-time cost.
6736   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
6737     // The following condition is a run-time one; it is executed after the
6738     // previous compile-time test, to avoid penalizing non-debug builds.
6739     if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
6740       UseScratchRegisterScope temps(GetVIXLAssembler());
6741       Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
6742       GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
6743     }
6744   }
6745 }
6746 
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6747 SlowPathCodeARM64* CodeGeneratorARM64::AddReadBarrierSlowPath(HInstruction* instruction,
6748                                                               Location out,
6749                                                               Location ref,
6750                                                               Location obj,
6751                                                               uint32_t offset,
6752                                                               Location index) {
6753   SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
6754       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
6755   AddSlowPath(slow_path);
6756   return slow_path;
6757 }
6758 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6759 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
6760                                                  Location out,
6761                                                  Location ref,
6762                                                  Location obj,
6763                                                  uint32_t offset,
6764                                                  Location index) {
6765   DCHECK(kEmitCompilerReadBarrier);
6766 
6767   // Insert a slow path based read barrier *after* the reference load.
6768   //
6769   // If heap poisoning is enabled, the unpoisoning of the loaded
6770   // reference will be carried out by the runtime within the slow
6771   // path.
6772   //
6773   // Note that `ref` currently does not get unpoisoned (when heap
6774   // poisoning is enabled), which is alright as the `ref` argument is
6775   // not used by the artReadBarrierSlow entry point.
6776   //
6777   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6778   SlowPathCodeARM64* slow_path = AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
6779 
6780   __ B(slow_path->GetEntryLabel());
6781   __ Bind(slow_path->GetExitLabel());
6782 }
6783 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6784 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6785                                                       Location out,
6786                                                       Location ref,
6787                                                       Location obj,
6788                                                       uint32_t offset,
6789                                                       Location index) {
6790   if (kEmitCompilerReadBarrier) {
6791     // Baker's read barriers shall be handled by the fast path
6792     // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
6793     DCHECK(!kUseBakerReadBarrier);
6794     // If heap poisoning is enabled, unpoisoning will be taken care of
6795     // by the runtime within the slow path.
6796     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6797   } else if (kPoisonHeapReferences) {
6798     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
6799   }
6800 }
6801 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6802 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6803                                                         Location out,
6804                                                         Location root) {
6805   DCHECK(kEmitCompilerReadBarrier);
6806 
6807   // Insert a slow path based read barrier *after* the GC root load.
6808   //
6809   // Note that GC roots are not affected by heap poisoning, so we do
6810   // not need to do anything special for this here.
6811   SlowPathCodeARM64* slow_path =
6812       new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
6813   AddSlowPath(slow_path);
6814 
6815   __ B(slow_path->GetEntryLabel());
6816   __ Bind(slow_path->GetExitLabel());
6817 }
6818 
VisitClassTableGet(HClassTableGet * instruction)6819 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
6820   LocationSummary* locations =
6821       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6822   locations->SetInAt(0, Location::RequiresRegister());
6823   locations->SetOut(Location::RequiresRegister());
6824 }
6825 
VisitClassTableGet(HClassTableGet * instruction)6826 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
6827   LocationSummary* locations = instruction->GetLocations();
6828   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
6829     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
6830         instruction->GetIndex(), kArm64PointerSize).SizeValue();
6831     __ Ldr(XRegisterFrom(locations->Out()),
6832            MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
6833   } else {
6834     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
6835         instruction->GetIndex(), kArm64PointerSize));
6836     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
6837         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
6838     __ Ldr(XRegisterFrom(locations->Out()),
6839            MemOperand(XRegisterFrom(locations->Out()), method_offset));
6840   }
6841 }
6842 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,vixl::aarch64::Literal<uint32_t> * literal,uint64_t index_in_table)6843 static void PatchJitRootUse(uint8_t* code,
6844                             const uint8_t* roots_data,
6845                             vixl::aarch64::Literal<uint32_t>* literal,
6846                             uint64_t index_in_table) {
6847   uint32_t literal_offset = literal->GetOffset();
6848   uintptr_t address =
6849       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
6850   uint8_t* data = code + literal_offset;
6851   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
6852 }
6853 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)6854 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
6855   for (const auto& entry : jit_string_patches_) {
6856     const StringReference& string_reference = entry.first;
6857     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6858     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
6859     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6860   }
6861   for (const auto& entry : jit_class_patches_) {
6862     const TypeReference& type_reference = entry.first;
6863     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6864     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
6865     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6866   }
6867 }
6868 
VecNEONAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)6869 MemOperand InstructionCodeGeneratorARM64::VecNEONAddress(
6870     HVecMemoryOperation* instruction,
6871     UseScratchRegisterScope* temps_scope,
6872     size_t size,
6873     bool is_string_char_at,
6874     /*out*/ Register* scratch) {
6875   LocationSummary* locations = instruction->GetLocations();
6876   Register base = InputRegisterAt(instruction, 0);
6877 
6878   if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
6879     DCHECK(!is_string_char_at);
6880     return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
6881   }
6882 
6883   Location index = locations->InAt(1);
6884   uint32_t offset = is_string_char_at
6885       ? mirror::String::ValueOffset().Uint32Value()
6886       : mirror::Array::DataOffset(size).Uint32Value();
6887   size_t shift = ComponentSizeShiftWidth(size);
6888 
6889   // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
6890   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
6891 
6892   if (index.IsConstant()) {
6893     offset += Int64FromLocation(index) << shift;
6894     return HeapOperand(base, offset);
6895   } else {
6896     *scratch = temps_scope->AcquireSameSizeAs(base);
6897     __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
6898     return HeapOperand(*scratch, offset);
6899   }
6900 }
6901 
VecSVEAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)6902 SVEMemOperand InstructionCodeGeneratorARM64::VecSVEAddress(
6903     HVecMemoryOperation* instruction,
6904     UseScratchRegisterScope* temps_scope,
6905     size_t size,
6906     bool is_string_char_at,
6907     /*out*/ Register* scratch) {
6908   LocationSummary* locations = instruction->GetLocations();
6909   Register base = InputRegisterAt(instruction, 0);
6910   Location index = locations->InAt(1);
6911 
6912   // TODO: Support intermediate address sharing for SVE accesses.
6913   DCHECK(!instruction->InputAt(1)->IsIntermediateAddressIndex());
6914   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
6915   DCHECK(!index.IsConstant());
6916 
6917   uint32_t offset = is_string_char_at
6918       ? mirror::String::ValueOffset().Uint32Value()
6919       : mirror::Array::DataOffset(size).Uint32Value();
6920   size_t shift = ComponentSizeShiftWidth(size);
6921 
6922   *scratch = temps_scope->AcquireSameSizeAs(base);
6923   __ Add(*scratch, base, offset);
6924   return SVEMemOperand(scratch->X(), XRegisterFrom(index), LSL, shift);
6925 }
6926 
6927 #undef __
6928 #undef QUICK_ENTRY_POINT
6929 
6930 #define __ assembler.GetVIXLAssembler()->
6931 
EmitGrayCheckAndFastPath(arm64::Arm64Assembler & assembler,vixl::aarch64::Register base_reg,vixl::aarch64::MemOperand & lock_word,vixl::aarch64::Label * slow_path,vixl::aarch64::Label * throw_npe=nullptr)6932 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
6933                                      vixl::aarch64::Register base_reg,
6934                                      vixl::aarch64::MemOperand& lock_word,
6935                                      vixl::aarch64::Label* slow_path,
6936                                      vixl::aarch64::Label* throw_npe = nullptr) {
6937   // Load the lock word containing the rb_state.
6938   __ Ldr(ip0.W(), lock_word);
6939   // Given the numeric representation, it's enough to check the low bit of the rb_state.
6940   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
6941   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
6942   __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
6943   static_assert(
6944       BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
6945       "Field and array LDR offsets must be the same to reuse the same code.");
6946   // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
6947   if (throw_npe != nullptr) {
6948     __ Bind(throw_npe);
6949   }
6950   // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
6951   static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6952                 "Field LDR must be 1 instruction (4B) before the return address label; "
6953                 " 2 instructions (8B) for heap poisoning.");
6954   __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
6955   // Introduce a dependency on the lock_word including rb_state,
6956   // to prevent load-load reordering, and without using
6957   // a memory barrier (which would be more expensive).
6958   __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
6959   __ Br(lr);          // And return back to the function.
6960   // Note: The fake dependency is unnecessary for the slow path.
6961 }
6962 
6963 // Load the read barrier introspection entrypoint in register `entrypoint`.
LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler & assembler,vixl::aarch64::Register entrypoint)6964 static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
6965                                                        vixl::aarch64::Register entrypoint) {
6966   // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
6967   DCHECK_EQ(ip0.GetCode(), 16u);
6968   const int32_t entry_point_offset =
6969       Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
6970   __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
6971 }
6972 
CompileBakerReadBarrierThunk(Arm64Assembler & assembler,uint32_t encoded_data,std::string * debug_name)6973 void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
6974                                                       uint32_t encoded_data,
6975                                                       /*out*/ std::string* debug_name) {
6976   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
6977   switch (kind) {
6978     case BakerReadBarrierKind::kField:
6979     case BakerReadBarrierKind::kAcquire: {
6980       Register base_reg =
6981           vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
6982       CheckValidReg(base_reg.GetCode());
6983       Register holder_reg =
6984           vixl::aarch64::XRegister(BakerReadBarrierSecondRegField::Decode(encoded_data));
6985       CheckValidReg(holder_reg.GetCode());
6986       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
6987       temps.Exclude(ip0, ip1);
6988       // In the case of a field load (with relaxed semantic), if `base_reg` differs from
6989       // `holder_reg`, the offset was too large and we must have emitted (during the construction
6990       // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
6991       // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
6992       // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
6993       // not necessarily do that check before going to the thunk.
6994       //
6995       // In the case of a field load with load-acquire semantics (where `base_reg` always differs
6996       // from `holder_reg`), we also need an explicit null check when implicit null checks are
6997       // allowed, as we do not emit one before going to the thunk.
6998       vixl::aarch64::Label throw_npe_label;
6999       vixl::aarch64::Label* throw_npe = nullptr;
7000       if (GetCompilerOptions().GetImplicitNullChecks() &&
7001           (holder_reg.Is(base_reg) || (kind == BakerReadBarrierKind::kAcquire))) {
7002         throw_npe = &throw_npe_label;
7003         __ Cbz(holder_reg.W(), throw_npe);
7004       }
7005       // Check if the holder is gray and, if not, add fake dependency to the base register
7006       // and return to the LDR instruction to load the reference. Otherwise, use introspection
7007       // to load the reference and call the entrypoint that performs further checks on the
7008       // reference and marks it if needed.
7009       vixl::aarch64::Label slow_path;
7010       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
7011       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
7012       __ Bind(&slow_path);
7013       if (kind == BakerReadBarrierKind::kField) {
7014         MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
7015         __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
7016         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7017         __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
7018         __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
7019       } else {
7020         DCHECK(kind == BakerReadBarrierKind::kAcquire);
7021         DCHECK(!base_reg.Is(holder_reg));
7022         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7023         __ Ldar(ip0.W(), MemOperand(base_reg));
7024       }
7025       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
7026       __ Br(ip1);                           // Jump to the entrypoint.
7027       break;
7028     }
7029     case BakerReadBarrierKind::kArray: {
7030       Register base_reg =
7031           vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7032       CheckValidReg(base_reg.GetCode());
7033       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7034                 BakerReadBarrierSecondRegField::Decode(encoded_data));
7035       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7036       temps.Exclude(ip0, ip1);
7037       vixl::aarch64::Label slow_path;
7038       int32_t data_offset =
7039           mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
7040       MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
7041       DCHECK_LT(lock_word.GetOffset(), 0);
7042       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
7043       __ Bind(&slow_path);
7044       MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
7045       __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
7046       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7047       __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
7048       __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
7049                                             // a switch case target based on the index register.
7050       __ Mov(ip0, base_reg);                // Move the base register to ip0.
7051       __ Br(ip1);                           // Jump to the entrypoint's array switch case.
7052       break;
7053     }
7054     case BakerReadBarrierKind::kGcRoot: {
7055       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
7056       // and it does not have a forwarding address), call the correct introspection entrypoint;
7057       // otherwise return the reference (or the extracted forwarding address).
7058       // There is no gray bit check for GC roots.
7059       Register root_reg =
7060           vixl::aarch64::WRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7061       CheckValidReg(root_reg.GetCode());
7062       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7063                 BakerReadBarrierSecondRegField::Decode(encoded_data));
7064       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7065       temps.Exclude(ip0, ip1);
7066       vixl::aarch64::Label return_label, not_marked, forwarding_address;
7067       __ Cbz(root_reg, &return_label);
7068       MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
7069       __ Ldr(ip0.W(), lock_word);
7070       __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, &not_marked);
7071       __ Bind(&return_label);
7072       __ Br(lr);
7073       __ Bind(&not_marked);
7074       __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
7075       __ B(&forwarding_address, mi);
7076       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7077       // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
7078       // art_quick_read_barrier_mark_introspection_gc_roots.
7079       __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
7080       __ Mov(ip0.W(), root_reg);
7081       __ Br(ip1);
7082       __ Bind(&forwarding_address);
7083       __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
7084       __ Br(lr);
7085       break;
7086     }
7087     default:
7088       LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
7089       UNREACHABLE();
7090   }
7091 
7092   // For JIT, the slow path is considered part of the compiled method,
7093   // so JIT should pass null as `debug_name`.
7094   DCHECK(!GetCompilerOptions().IsJitCompiler() || debug_name == nullptr);
7095   if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
7096     std::ostringstream oss;
7097     oss << "BakerReadBarrierThunk";
7098     switch (kind) {
7099       case BakerReadBarrierKind::kField:
7100         oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
7101             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
7102         break;
7103       case BakerReadBarrierKind::kAcquire:
7104         oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
7105             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
7106         break;
7107       case BakerReadBarrierKind::kArray:
7108         oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
7109         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7110                   BakerReadBarrierSecondRegField::Decode(encoded_data));
7111         break;
7112       case BakerReadBarrierKind::kGcRoot:
7113         oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
7114         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7115                   BakerReadBarrierSecondRegField::Decode(encoded_data));
7116         break;
7117     }
7118     *debug_name = oss.str();
7119   }
7120 }
7121 
7122 #undef __
7123 
7124 }  // namespace arm64
7125 }  // namespace art
7126