1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm64.h"
18
19 #include "aarch64/assembler-aarch64.h"
20 #include "aarch64/registers-aarch64.h"
21 #include "arch/arm64/asm_support_arm64.h"
22 #include "arch/arm64/instruction_set_features_arm64.h"
23 #include "arch/arm64/jni_frame_arm64.h"
24 #include "art_method-inl.h"
25 #include "base/bit_utils.h"
26 #include "base/bit_utils_iterator.h"
27 #include "class_root-inl.h"
28 #include "class_table.h"
29 #include "code_generator_utils.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "entrypoints/quick/quick_entrypoints_enum.h"
32 #include "gc/accounting/card_table.h"
33 #include "gc/space/image_space.h"
34 #include "heap_poisoning.h"
35 #include "interpreter/mterp/nterp.h"
36 #include "intrinsics.h"
37 #include "intrinsics_arm64.h"
38 #include "intrinsics_list.h"
39 #include "intrinsics_utils.h"
40 #include "jit/profiling_info.h"
41 #include "linker/linker_patch.h"
42 #include "lock_word.h"
43 #include "mirror/array-inl.h"
44 #include "mirror/class-inl.h"
45 #include "mirror/var_handle.h"
46 #include "offsets.h"
47 #include "optimizing/common_arm64.h"
48 #include "optimizing/nodes.h"
49 #include "profiling_info_builder.h"
50 #include "thread.h"
51 #include "trace.h"
52 #include "utils/arm64/assembler_arm64.h"
53 #include "utils/assembler.h"
54 #include "utils/stack_checks.h"
55
56 using namespace vixl::aarch64; // NOLINT(build/namespaces)
57 using vixl::ExactAssemblyScope;
58 using vixl::CodeBufferCheckScope;
59 using vixl::EmissionCheckScope;
60
61 #ifdef __
62 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
63 #endif
64
65 namespace art HIDDEN {
66
67 template<class MirrorType>
68 class GcRoot;
69
70 namespace arm64 {
71
72 using helpers::ARM64EncodableConstantOrRegister;
73 using helpers::ArtVixlRegCodeCoherentForRegSet;
74 using helpers::CPURegisterFrom;
75 using helpers::DRegisterFrom;
76 using helpers::FPRegisterFrom;
77 using helpers::HeapOperand;
78 using helpers::HeapOperandFrom;
79 using helpers::InputCPURegisterOrZeroRegAt;
80 using helpers::InputFPRegisterAt;
81 using helpers::InputOperandAt;
82 using helpers::InputRegisterAt;
83 using helpers::Int64FromLocation;
84 using helpers::LocationFrom;
85 using helpers::OperandFromMemOperand;
86 using helpers::OutputCPURegister;
87 using helpers::OutputFPRegister;
88 using helpers::OutputRegister;
89 using helpers::RegisterFrom;
90 using helpers::StackOperandFrom;
91 using helpers::VIXLRegCodeFromART;
92 using helpers::WRegisterFrom;
93 using helpers::XRegisterFrom;
94
95 // TODO(mythria): Expand SystemRegister in vixl to include this value.
96 uint16_t SYS_CNTVCT_EL0 = SystemRegisterEncoder<1, 3, 14, 0, 2>::value;
97
98 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
99 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
100 // generates less code/data with a small num_entries.
101 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
102
103 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
104 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
105 // For the Baker read barrier implementation using link-time generated thunks we need to split
106 // the offset explicitly.
107 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
108
ARM64Condition(IfCondition cond)109 inline Condition ARM64Condition(IfCondition cond) {
110 switch (cond) {
111 case kCondEQ: return eq;
112 case kCondNE: return ne;
113 case kCondLT: return lt;
114 case kCondLE: return le;
115 case kCondGT: return gt;
116 case kCondGE: return ge;
117 case kCondB: return lo;
118 case kCondBE: return ls;
119 case kCondA: return hi;
120 case kCondAE: return hs;
121 }
122 LOG(FATAL) << "Unreachable";
123 UNREACHABLE();
124 }
125
ARM64FPCondition(IfCondition cond,bool gt_bias)126 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
127 // The ARM64 condition codes can express all the necessary branches, see the
128 // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
129 // There is no dex instruction or HIR that would need the missing conditions
130 // "equal or unordered" or "not equal".
131 switch (cond) {
132 case kCondEQ: return eq;
133 case kCondNE: return ne /* unordered */;
134 case kCondLT: return gt_bias ? cc : lt /* unordered */;
135 case kCondLE: return gt_bias ? ls : le /* unordered */;
136 case kCondGT: return gt_bias ? hi /* unordered */ : gt;
137 case kCondGE: return gt_bias ? cs /* unordered */ : ge;
138 default:
139 LOG(FATAL) << "UNREACHABLE";
140 UNREACHABLE();
141 }
142 }
143
ARM64ReturnLocation(DataType::Type return_type)144 Location ARM64ReturnLocation(DataType::Type return_type) {
145 // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
146 // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
147 // but we use the exact registers for clarity.
148 if (return_type == DataType::Type::kFloat32) {
149 return LocationFrom(s0);
150 } else if (return_type == DataType::Type::kFloat64) {
151 return LocationFrom(d0);
152 } else if (return_type == DataType::Type::kInt64) {
153 return LocationFrom(x0);
154 } else if (return_type == DataType::Type::kVoid) {
155 return Location::NoLocation();
156 } else {
157 return LocationFrom(w0);
158 }
159 }
160
GetReturnLocation(DataType::Type return_type)161 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
162 return ARM64ReturnLocation(return_type);
163 }
164
OneRegInReferenceOutSaveEverythingCallerSaves()165 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
166 InvokeRuntimeCallingConvention calling_convention;
167 RegisterSet caller_saves = RegisterSet::Empty();
168 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
169 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
170 RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
171 DataType::Type::kReference).GetCode());
172 return caller_saves;
173 }
174
175 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
176 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
177 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
178
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)179 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
180 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
181 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
182 for (uint32_t i : LowToHighBits(core_spills)) {
183 // If the register holds an object, update the stack mask.
184 if (locations->RegisterContainsObject(i)) {
185 locations->SetStackBit(stack_offset / kVRegSize);
186 }
187 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
188 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
189 saved_core_stack_offsets_[i] = stack_offset;
190 stack_offset += kXRegSizeInBytes;
191 }
192
193 const size_t fp_reg_size = codegen->GetSlowPathFPWidth();
194 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
195 for (uint32_t i : LowToHighBits(fp_spills)) {
196 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
197 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
198 saved_fpu_stack_offsets_[i] = stack_offset;
199 stack_offset += fp_reg_size;
200 }
201
202 InstructionCodeGeneratorARM64* visitor =
203 down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
204 visitor->SaveLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
205 }
206
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)207 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
208 InstructionCodeGeneratorARM64* visitor =
209 down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
210 visitor->RestoreLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
211 }
212
213 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
214 public:
BoundsCheckSlowPathARM64(HBoundsCheck * instruction)215 explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
216
EmitNativeCode(CodeGenerator * codegen)217 void EmitNativeCode(CodeGenerator* codegen) override {
218 LocationSummary* locations = instruction_->GetLocations();
219 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
220
221 __ Bind(GetEntryLabel());
222 if (instruction_->CanThrowIntoCatchBlock()) {
223 // Live registers will be restored in the catch block if caught.
224 SaveLiveRegisters(codegen, instruction_->GetLocations());
225 }
226 // We're moving two locations to locations that could overlap, so we need a parallel
227 // move resolver.
228 InvokeRuntimeCallingConvention calling_convention;
229 codegen->EmitParallelMoves(locations->InAt(0),
230 LocationFrom(calling_convention.GetRegisterAt(0)),
231 DataType::Type::kInt32,
232 locations->InAt(1),
233 LocationFrom(calling_convention.GetRegisterAt(1)),
234 DataType::Type::kInt32);
235 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
236 ? kQuickThrowStringBounds
237 : kQuickThrowArrayBounds;
238 arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
239 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
240 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
241 }
242
IsFatal() const243 bool IsFatal() const override { return true; }
244
GetDescription() const245 const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; }
246
247 private:
248 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
249 };
250
251 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
252 public:
DivZeroCheckSlowPathARM64(HDivZeroCheck * instruction)253 explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
254
EmitNativeCode(CodeGenerator * codegen)255 void EmitNativeCode(CodeGenerator* codegen) override {
256 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
257 __ Bind(GetEntryLabel());
258 arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
259 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
260 }
261
IsFatal() const262 bool IsFatal() const override { return true; }
263
GetDescription() const264 const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; }
265
266 private:
267 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
268 };
269
270 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
271 public:
LoadClassSlowPathARM64(HLoadClass * cls,HInstruction * at)272 LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at)
273 : SlowPathCodeARM64(at), cls_(cls) {
274 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
275 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
276 }
277
EmitNativeCode(CodeGenerator * codegen)278 void EmitNativeCode(CodeGenerator* codegen) override {
279 LocationSummary* locations = instruction_->GetLocations();
280 Location out = locations->Out();
281 const uint32_t dex_pc = instruction_->GetDexPc();
282 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
283 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
284
285 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
286 __ Bind(GetEntryLabel());
287 SaveLiveRegisters(codegen, locations);
288
289 InvokeRuntimeCallingConvention calling_convention;
290 if (must_resolve_type) {
291 DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile()) ||
292 arm64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
293 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
294 &cls_->GetDexFile()));
295 dex::TypeIndex type_index = cls_->GetTypeIndex();
296 __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
297 if (cls_->NeedsAccessCheck()) {
298 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
299 arm64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
300 } else {
301 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
302 arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
303 }
304 // If we also must_do_clinit, the resolved type is now in the correct register.
305 } else {
306 DCHECK(must_do_clinit);
307 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
308 arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)),
309 source,
310 cls_->GetType());
311 }
312 if (must_do_clinit) {
313 arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
314 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
315 }
316
317 // Move the class to the desired location.
318 if (out.IsValid()) {
319 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
320 DataType::Type type = instruction_->GetType();
321 arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
322 }
323 RestoreLiveRegisters(codegen, locations);
324 __ B(GetExitLabel());
325 }
326
GetDescription() const327 const char* GetDescription() const override { return "LoadClassSlowPathARM64"; }
328
329 private:
330 // The class this slow path will load.
331 HLoadClass* const cls_;
332
333 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
334 };
335
336 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
337 public:
LoadStringSlowPathARM64(HLoadString * instruction)338 explicit LoadStringSlowPathARM64(HLoadString* instruction)
339 : SlowPathCodeARM64(instruction) {}
340
EmitNativeCode(CodeGenerator * codegen)341 void EmitNativeCode(CodeGenerator* codegen) override {
342 LocationSummary* locations = instruction_->GetLocations();
343 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
344 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
345
346 __ Bind(GetEntryLabel());
347 SaveLiveRegisters(codegen, locations);
348
349 InvokeRuntimeCallingConvention calling_convention;
350 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
351 __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
352 arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
353 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
354 DataType::Type type = instruction_->GetType();
355 arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
356
357 RestoreLiveRegisters(codegen, locations);
358
359 __ B(GetExitLabel());
360 }
361
GetDescription() const362 const char* GetDescription() const override { return "LoadStringSlowPathARM64"; }
363
364 private:
365 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
366 };
367
368 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
369 public:
NullCheckSlowPathARM64(HNullCheck * instr)370 explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
371
EmitNativeCode(CodeGenerator * codegen)372 void EmitNativeCode(CodeGenerator* codegen) override {
373 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
374 __ Bind(GetEntryLabel());
375 if (instruction_->CanThrowIntoCatchBlock()) {
376 // Live registers will be restored in the catch block if caught.
377 SaveLiveRegisters(codegen, instruction_->GetLocations());
378 }
379 arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
380 instruction_,
381 instruction_->GetDexPc(),
382 this);
383 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
384 }
385
IsFatal() const386 bool IsFatal() const override { return true; }
387
GetDescription() const388 const char* GetDescription() const override { return "NullCheckSlowPathARM64"; }
389
390 private:
391 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
392 };
393
394 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
395 public:
SuspendCheckSlowPathARM64(HSuspendCheck * instruction,HBasicBlock * successor)396 SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
397 : SlowPathCodeARM64(instruction), successor_(successor) {}
398
EmitNativeCode(CodeGenerator * codegen)399 void EmitNativeCode(CodeGenerator* codegen) override {
400 LocationSummary* locations = instruction_->GetLocations();
401 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
402 __ Bind(GetEntryLabel());
403 SaveLiveRegisters(codegen, locations); // Only saves live vector regs for SIMD.
404 arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
405 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
406 RestoreLiveRegisters(codegen, locations); // Only restores live vector regs for SIMD.
407 if (successor_ == nullptr) {
408 __ B(GetReturnLabel());
409 } else {
410 __ B(arm64_codegen->GetLabelOf(successor_));
411 }
412 }
413
GetReturnLabel()414 vixl::aarch64::Label* GetReturnLabel() {
415 DCHECK(successor_ == nullptr);
416 return &return_label_;
417 }
418
GetSuccessor() const419 HBasicBlock* GetSuccessor() const {
420 return successor_;
421 }
422
GetDescription() const423 const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; }
424
425 private:
426 // If not null, the block to branch to after the suspend check.
427 HBasicBlock* const successor_;
428
429 // If `successor_` is null, the label to branch to after the suspend check.
430 vixl::aarch64::Label return_label_;
431
432 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
433 };
434
435 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
436 public:
TypeCheckSlowPathARM64(HInstruction * instruction,bool is_fatal)437 TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
438 : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
439
EmitNativeCode(CodeGenerator * codegen)440 void EmitNativeCode(CodeGenerator* codegen) override {
441 LocationSummary* locations = instruction_->GetLocations();
442
443 DCHECK(instruction_->IsCheckCast()
444 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
445 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
446 uint32_t dex_pc = instruction_->GetDexPc();
447
448 __ Bind(GetEntryLabel());
449
450 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
451 SaveLiveRegisters(codegen, locations);
452 }
453
454 // We're moving two locations to locations that could overlap, so we need a parallel
455 // move resolver.
456 InvokeRuntimeCallingConvention calling_convention;
457 codegen->EmitParallelMoves(locations->InAt(0),
458 LocationFrom(calling_convention.GetRegisterAt(0)),
459 DataType::Type::kReference,
460 locations->InAt(1),
461 LocationFrom(calling_convention.GetRegisterAt(1)),
462 DataType::Type::kReference);
463 if (instruction_->IsInstanceOf()) {
464 arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
465 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
466 DataType::Type ret_type = instruction_->GetType();
467 Location ret_loc = calling_convention.GetReturnLocation(ret_type);
468 arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
469 } else {
470 DCHECK(instruction_->IsCheckCast());
471 arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
472 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
473 }
474
475 if (!is_fatal_) {
476 RestoreLiveRegisters(codegen, locations);
477 __ B(GetExitLabel());
478 }
479 }
480
GetDescription() const481 const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; }
IsFatal() const482 bool IsFatal() const override { return is_fatal_; }
483
484 private:
485 const bool is_fatal_;
486
487 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
488 };
489
490 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
491 public:
DeoptimizationSlowPathARM64(HDeoptimize * instruction)492 explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
493 : SlowPathCodeARM64(instruction) {}
494
EmitNativeCode(CodeGenerator * codegen)495 void EmitNativeCode(CodeGenerator* codegen) override {
496 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
497 __ Bind(GetEntryLabel());
498 LocationSummary* locations = instruction_->GetLocations();
499 SaveLiveRegisters(codegen, locations);
500 InvokeRuntimeCallingConvention calling_convention;
501 __ Mov(calling_convention.GetRegisterAt(0),
502 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
503 arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
504 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
505 }
506
GetDescription() const507 const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; }
508
509 private:
510 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
511 };
512
513 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
514 public:
ArraySetSlowPathARM64(HInstruction * instruction)515 explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
516
EmitNativeCode(CodeGenerator * codegen)517 void EmitNativeCode(CodeGenerator* codegen) override {
518 LocationSummary* locations = instruction_->GetLocations();
519 __ Bind(GetEntryLabel());
520 SaveLiveRegisters(codegen, locations);
521
522 InvokeRuntimeCallingConvention calling_convention;
523 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
524 parallel_move.AddMove(
525 locations->InAt(0),
526 LocationFrom(calling_convention.GetRegisterAt(0)),
527 DataType::Type::kReference,
528 nullptr);
529 parallel_move.AddMove(
530 locations->InAt(1),
531 LocationFrom(calling_convention.GetRegisterAt(1)),
532 DataType::Type::kInt32,
533 nullptr);
534 parallel_move.AddMove(
535 locations->InAt(2),
536 LocationFrom(calling_convention.GetRegisterAt(2)),
537 DataType::Type::kReference,
538 nullptr);
539 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
540
541 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
542 arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
543 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
544 RestoreLiveRegisters(codegen, locations);
545 __ B(GetExitLabel());
546 }
547
GetDescription() const548 const char* GetDescription() const override { return "ArraySetSlowPathARM64"; }
549
550 private:
551 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
552 };
553
EmitTable(CodeGeneratorARM64 * codegen)554 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
555 uint32_t num_entries = switch_instr_->GetNumEntries();
556 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
557
558 // We are about to use the assembler to place literals directly. Make sure we have enough
559 // underlying code buffer and we have generated the jump table with right size.
560 EmissionCheckScope scope(codegen->GetVIXLAssembler(),
561 num_entries * sizeof(int32_t),
562 CodeBufferCheckScope::kExactSize);
563
564 __ Bind(&table_start_);
565 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
566 for (uint32_t i = 0; i < num_entries; i++) {
567 vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
568 DCHECK(target_label->IsBound());
569 ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
570 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
571 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
572 Literal<int32_t> literal(jump_offset);
573 __ place(&literal);
574 }
575 }
576
577 // Slow path generating a read barrier for a heap reference.
578 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
579 public:
ReadBarrierForHeapReferenceSlowPathARM64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)580 ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
581 Location out,
582 Location ref,
583 Location obj,
584 uint32_t offset,
585 Location index)
586 : SlowPathCodeARM64(instruction),
587 out_(out),
588 ref_(ref),
589 obj_(obj),
590 offset_(offset),
591 index_(index) {
592 // If `obj` is equal to `out` or `ref`, it means the initial object
593 // has been overwritten by (or after) the heap object reference load
594 // to be instrumented, e.g.:
595 //
596 // __ Ldr(out, HeapOperand(out, class_offset);
597 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
598 //
599 // In that case, we have lost the information about the original
600 // object, and the emitted read barrier cannot work properly.
601 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
602 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
603 }
604
EmitNativeCode(CodeGenerator * codegen)605 void EmitNativeCode(CodeGenerator* codegen) override {
606 DCHECK(codegen->EmitReadBarrier());
607 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
608 LocationSummary* locations = instruction_->GetLocations();
609 DataType::Type type = DataType::Type::kReference;
610 DCHECK(locations->CanCall());
611 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
612 DCHECK(instruction_->IsInstanceFieldGet() ||
613 instruction_->IsStaticFieldGet() ||
614 instruction_->IsArrayGet() ||
615 instruction_->IsInstanceOf() ||
616 instruction_->IsCheckCast() ||
617 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
618 << "Unexpected instruction in read barrier for heap reference slow path: "
619 << instruction_->DebugName();
620 // The read barrier instrumentation of object ArrayGet
621 // instructions does not support the HIntermediateAddress
622 // instruction.
623 DCHECK(!(instruction_->IsArrayGet() &&
624 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
625
626 __ Bind(GetEntryLabel());
627
628 SaveLiveRegisters(codegen, locations);
629
630 // We may have to change the index's value, but as `index_` is a
631 // constant member (like other "inputs" of this slow path),
632 // introduce a copy of it, `index`.
633 Location index = index_;
634 if (index_.IsValid()) {
635 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
636 if (instruction_->IsArrayGet()) {
637 // Compute the actual memory offset and store it in `index`.
638 Register index_reg = RegisterFrom(index_, DataType::Type::kInt32);
639 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
640 if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
641 // We are about to change the value of `index_reg` (see the
642 // calls to vixl::MacroAssembler::Lsl and
643 // vixl::MacroAssembler::Mov below), but it has
644 // not been saved by the previous call to
645 // art::SlowPathCode::SaveLiveRegisters, as it is a
646 // callee-save register --
647 // art::SlowPathCode::SaveLiveRegisters does not consider
648 // callee-save registers, as it has been designed with the
649 // assumption that callee-save registers are supposed to be
650 // handled by the called function. So, as a callee-save
651 // register, `index_reg` _would_ eventually be saved onto
652 // the stack, but it would be too late: we would have
653 // changed its value earlier. Therefore, we manually save
654 // it here into another freely available register,
655 // `free_reg`, chosen of course among the caller-save
656 // registers (as a callee-save `free_reg` register would
657 // exhibit the same problem).
658 //
659 // Note we could have requested a temporary register from
660 // the register allocator instead; but we prefer not to, as
661 // this is a slow path, and we know we can find a
662 // caller-save register that is available.
663 Register free_reg = FindAvailableCallerSaveRegister(codegen);
664 __ Mov(free_reg.W(), index_reg);
665 index_reg = free_reg;
666 index = LocationFrom(index_reg);
667 } else {
668 // The initial register stored in `index_` has already been
669 // saved in the call to art::SlowPathCode::SaveLiveRegisters
670 // (as it is not a callee-save register), so we can freely
671 // use it.
672 }
673 // Shifting the index value contained in `index_reg` by the scale
674 // factor (2) cannot overflow in practice, as the runtime is
675 // unable to allocate object arrays with a size larger than
676 // 2^26 - 1 (that is, 2^28 - 4 bytes).
677 __ Lsl(index_reg, index_reg, DataType::SizeShift(type));
678 static_assert(
679 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
680 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
681 __ Add(index_reg, index_reg, Operand(offset_));
682 } else {
683 // In the case of the following intrinsics `index_` is not shifted by a scale factor of 2
684 // (as in the case of ArrayGet), as it is actually an offset to an object field within an
685 // object.
686 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
687 DCHECK(instruction_->GetLocations()->Intrinsified());
688 HInvoke* invoke = instruction_->AsInvoke();
689 DCHECK(IsUnsafeGetReference(invoke) ||
690 IsVarHandleGet(invoke) ||
691 IsUnsafeCASReference(invoke) ||
692 IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
693 DCHECK_EQ(offset_, 0u);
694 DCHECK(index_.IsRegister());
695 }
696 }
697
698 // We're moving two or three locations to locations that could
699 // overlap, so we need a parallel move resolver.
700 InvokeRuntimeCallingConvention calling_convention;
701 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
702 parallel_move.AddMove(ref_,
703 LocationFrom(calling_convention.GetRegisterAt(0)),
704 type,
705 nullptr);
706 parallel_move.AddMove(obj_,
707 LocationFrom(calling_convention.GetRegisterAt(1)),
708 type,
709 nullptr);
710 if (index.IsValid()) {
711 parallel_move.AddMove(index,
712 LocationFrom(calling_convention.GetRegisterAt(2)),
713 DataType::Type::kInt32,
714 nullptr);
715 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
716 } else {
717 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
718 arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
719 }
720 arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
721 instruction_,
722 instruction_->GetDexPc(),
723 this);
724 CheckEntrypointTypes<
725 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
726 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
727
728 RestoreLiveRegisters(codegen, locations);
729
730 __ B(GetExitLabel());
731 }
732
GetDescription() const733 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
734
735 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)736 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
737 size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
738 size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
739 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
740 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
741 return Register(VIXLRegCodeFromART(i), kXRegSize);
742 }
743 }
744 // We shall never fail to find a free caller-save register, as
745 // there are more than two core caller-save registers on ARM64
746 // (meaning it is possible to find one which is different from
747 // `ref` and `obj`).
748 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
749 LOG(FATAL) << "Could not find a free register";
750 UNREACHABLE();
751 }
752
753 const Location out_;
754 const Location ref_;
755 const Location obj_;
756 const uint32_t offset_;
757 // An additional location containing an index to an array.
758 // Only used for HArrayGet and the UnsafeGetObject &
759 // UnsafeGetObjectVolatile intrinsics.
760 const Location index_;
761
762 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
763 };
764
765 // Slow path generating a read barrier for a GC root.
766 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
767 public:
ReadBarrierForRootSlowPathARM64(HInstruction * instruction,Location out,Location root)768 ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
769 : SlowPathCodeARM64(instruction), out_(out), root_(root) {
770 }
771
EmitNativeCode(CodeGenerator * codegen)772 void EmitNativeCode(CodeGenerator* codegen) override {
773 DCHECK(codegen->EmitReadBarrier());
774 LocationSummary* locations = instruction_->GetLocations();
775 DataType::Type type = DataType::Type::kReference;
776 DCHECK(locations->CanCall());
777 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
778 DCHECK(instruction_->IsLoadClass() ||
779 instruction_->IsLoadString() ||
780 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
781 << "Unexpected instruction in read barrier for GC root slow path: "
782 << instruction_->DebugName();
783
784 __ Bind(GetEntryLabel());
785 SaveLiveRegisters(codegen, locations);
786
787 InvokeRuntimeCallingConvention calling_convention;
788 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
789 // The argument of the ReadBarrierForRootSlow is not a managed
790 // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
791 // thus we need a 64-bit move here, and we cannot use
792 //
793 // arm64_codegen->MoveLocation(
794 // LocationFrom(calling_convention.GetRegisterAt(0)),
795 // root_,
796 // type);
797 //
798 // which would emit a 32-bit move, as `type` is a (32-bit wide)
799 // reference type (`DataType::Type::kReference`).
800 __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
801 arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
802 instruction_,
803 instruction_->GetDexPc(),
804 this);
805 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
806 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
807
808 RestoreLiveRegisters(codegen, locations);
809 __ B(GetExitLabel());
810 }
811
GetDescription() const812 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; }
813
814 private:
815 const Location out_;
816 const Location root_;
817
818 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
819 };
820
821 class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
822 public:
MethodEntryExitHooksSlowPathARM64(HInstruction * instruction)823 explicit MethodEntryExitHooksSlowPathARM64(HInstruction* instruction)
824 : SlowPathCodeARM64(instruction) {}
825
EmitNativeCode(CodeGenerator * codegen)826 void EmitNativeCode(CodeGenerator* codegen) override {
827 LocationSummary* locations = instruction_->GetLocations();
828 QuickEntrypointEnum entry_point =
829 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
830 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
831 __ Bind(GetEntryLabel());
832 SaveLiveRegisters(codegen, locations);
833 if (instruction_->IsMethodExitHook()) {
834 __ Mov(vixl::aarch64::x4, arm64_codegen->GetFrameSize());
835 }
836 arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
837 RestoreLiveRegisters(codegen, locations);
838 __ B(GetExitLabel());
839 }
840
GetDescription() const841 const char* GetDescription() const override {
842 return "MethodEntryExitHooksSlowPath";
843 }
844
845 private:
846 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARM64);
847 };
848
849 class CompileOptimizedSlowPathARM64 : public SlowPathCodeARM64 {
850 public:
CompileOptimizedSlowPathARM64(HSuspendCheck * check,Register profiling_info)851 CompileOptimizedSlowPathARM64(HSuspendCheck* check, Register profiling_info)
852 : SlowPathCodeARM64(check),
853 profiling_info_(profiling_info) {}
854
EmitNativeCode(CodeGenerator * codegen)855 void EmitNativeCode(CodeGenerator* codegen) override {
856 uint32_t entrypoint_offset =
857 GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
858 __ Bind(GetEntryLabel());
859 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
860 UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
861 Register counter = temps.AcquireW();
862 __ Mov(counter, ProfilingInfo::GetOptimizeThreshold());
863 __ Strh(counter,
864 MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
865 if (instruction_ != nullptr) {
866 // Only saves live vector regs for SIMD.
867 SaveLiveRegisters(codegen, instruction_->GetLocations());
868 }
869 __ Ldr(lr, MemOperand(tr, entrypoint_offset));
870 // Note: we don't record the call here (and therefore don't generate a stack
871 // map), as the entrypoint should never be suspended.
872 __ Blr(lr);
873 if (instruction_ != nullptr) {
874 // Only restores live vector regs for SIMD.
875 RestoreLiveRegisters(codegen, instruction_->GetLocations());
876 }
877 __ B(GetExitLabel());
878 }
879
GetDescription() const880 const char* GetDescription() const override {
881 return "CompileOptimizedSlowPath";
882 }
883
884 private:
885 // The register where the profiling info is stored when entering the slow
886 // path.
887 Register profiling_info_;
888
889 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARM64);
890 };
891
892 #undef __
893
GetNextLocation(DataType::Type type)894 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
895 Location next_location;
896 if (type == DataType::Type::kVoid) {
897 LOG(FATAL) << "Unreachable type " << type;
898 }
899
900 if (DataType::IsFloatingPointType(type) &&
901 (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
902 next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
903 } else if (!DataType::IsFloatingPointType(type) &&
904 (gp_index_ < calling_convention.GetNumberOfRegisters())) {
905 next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
906 } else {
907 size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
908 next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
909 : Location::StackSlot(stack_offset);
910 }
911
912 // Space on the stack is reserved for all arguments.
913 stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
914 return next_location;
915 }
916
GetMethodLocation() const917 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
918 return LocationFrom(kArtMethodRegister);
919 }
920
GetNextLocation(DataType::Type type)921 Location CriticalNativeCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
922 DCHECK_NE(type, DataType::Type::kReference);
923
924 Location location = Location::NoLocation();
925 if (DataType::IsFloatingPointType(type)) {
926 if (fpr_index_ < kParameterFPRegistersLength) {
927 location = LocationFrom(kParameterFPRegisters[fpr_index_]);
928 ++fpr_index_;
929 }
930 } else {
931 // Native ABI uses the same registers as managed, except that the method register x0
932 // is a normal argument.
933 if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
934 location = LocationFrom(gpr_index_ == 0u ? x0 : kParameterCoreRegisters[gpr_index_ - 1u]);
935 ++gpr_index_;
936 }
937 }
938 if (location.IsInvalid()) {
939 if (DataType::Is64BitType(type)) {
940 location = Location::DoubleStackSlot(stack_offset_);
941 } else {
942 location = Location::StackSlot(stack_offset_);
943 }
944 stack_offset_ += kFramePointerSize;
945
946 if (for_register_allocation_) {
947 location = Location::Any();
948 }
949 }
950 return location;
951 }
952
GetReturnLocation(DataType::Type type) const953 Location CriticalNativeCallingConventionVisitorARM64::GetReturnLocation(DataType::Type type) const {
954 // We perform conversion to the managed ABI return register after the call if needed.
955 InvokeDexCallingConventionVisitorARM64 dex_calling_convention;
956 return dex_calling_convention.GetReturnLocation(type);
957 }
958
GetMethodLocation() const959 Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const {
960 // Pass the method in the hidden argument x15.
961 return Location::RegisterLocation(x15.GetCode());
962 }
963
964 namespace detail {
965
966 // Mark which intrinsics we don't have handcrafted code for.
967 template <Intrinsics T>
968 struct IsUnimplemented {
969 bool is_unimplemented = false;
970 };
971
972 #define TRUE_OVERRIDE(Name) \
973 template <> \
974 struct IsUnimplemented<Intrinsics::k##Name> { \
975 bool is_unimplemented = true; \
976 };
977 UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE)
978 #undef TRUE_OVERRIDE
979
980 static constexpr bool kIsIntrinsicUnimplemented[] = {
981 false, // kNone
982 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
983 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
984 ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
985 #undef IS_UNIMPLEMENTED
986 };
987
988 } // namespace detail
989
CodeGeneratorARM64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)990 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
991 const CompilerOptions& compiler_options,
992 OptimizingCompilerStats* stats)
993 : CodeGenerator(graph,
994 kNumberOfAllocatableRegisters,
995 kNumberOfAllocatableFPRegisters,
996 kNumberOfAllocatableRegisterPairs,
997 callee_saved_core_registers.GetList(),
998 callee_saved_fp_registers.GetList(),
999 compiler_options,
1000 stats,
1001 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1002 block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1003 jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1004 location_builder_neon_(graph, this),
1005 instruction_visitor_neon_(graph, this),
1006 location_builder_sve_(graph, this),
1007 instruction_visitor_sve_(graph, this),
1008 move_resolver_(graph->GetAllocator(), this),
1009 assembler_(graph->GetAllocator(),
1010 compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
1011 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1012 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1013 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1014 app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1015 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1016 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1017 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1018 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1019 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1020 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1021 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1022 call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1023 baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1024 jit_patches_(&assembler_, graph->GetAllocator()),
1025 jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1026 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1027 // Save the link register (containing the return address) to mimic Quick.
1028 AddAllocatedRegister(LocationFrom(lr));
1029
1030 bool use_sve = ShouldUseSVE();
1031 if (use_sve) {
1032 location_builder_ = &location_builder_sve_;
1033 instruction_visitor_ = &instruction_visitor_sve_;
1034 } else {
1035 location_builder_ = &location_builder_neon_;
1036 instruction_visitor_ = &instruction_visitor_neon_;
1037 }
1038 }
1039
ShouldUseSVE() const1040 bool CodeGeneratorARM64::ShouldUseSVE() const {
1041 return GetInstructionSetFeatures().HasSVE();
1042 }
1043
GetSIMDRegisterWidth() const1044 size_t CodeGeneratorARM64::GetSIMDRegisterWidth() const {
1045 return SupportsPredicatedSIMD()
1046 ? GetInstructionSetFeatures().GetSVEVectorLength() / kBitsPerByte
1047 : vixl::aarch64::kQRegSizeInBytes;
1048 }
1049
1050 #define __ GetVIXLAssembler()->
1051
EmitJumpTables()1052 void CodeGeneratorARM64::EmitJumpTables() {
1053 for (auto&& jump_table : jump_tables_) {
1054 jump_table->EmitTable(this);
1055 }
1056 }
1057
Finalize()1058 void CodeGeneratorARM64::Finalize() {
1059 EmitJumpTables();
1060
1061 // Emit JIT baker read barrier slow paths.
1062 DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
1063 for (auto& entry : jit_baker_read_barrier_slow_paths_) {
1064 uint32_t encoded_data = entry.first;
1065 vixl::aarch64::Label* slow_path_entry = &entry.second.label;
1066 __ Bind(slow_path_entry);
1067 CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
1068 }
1069
1070 // Ensure we emit the literal pool.
1071 __ FinalizeCode();
1072
1073 CodeGenerator::Finalize();
1074
1075 // Verify Baker read barrier linker patches.
1076 if (kIsDebugBuild) {
1077 ArrayRef<const uint8_t> code(GetCode());
1078 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
1079 DCHECK(info.label.IsBound());
1080 uint32_t literal_offset = info.label.GetLocation();
1081 DCHECK_ALIGNED(literal_offset, 4u);
1082
1083 auto GetInsn = [&code](uint32_t offset) {
1084 DCHECK_ALIGNED(offset, 4u);
1085 return
1086 (static_cast<uint32_t>(code[offset + 0]) << 0) +
1087 (static_cast<uint32_t>(code[offset + 1]) << 8) +
1088 (static_cast<uint32_t>(code[offset + 2]) << 16)+
1089 (static_cast<uint32_t>(code[offset + 3]) << 24);
1090 };
1091
1092 const uint32_t encoded_data = info.custom_data;
1093 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
1094 // Check that the next instruction matches the expected LDR.
1095 switch (kind) {
1096 case BakerReadBarrierKind::kField:
1097 case BakerReadBarrierKind::kAcquire: {
1098 DCHECK_GE(code.size() - literal_offset, 8u);
1099 uint32_t next_insn = GetInsn(literal_offset + 4u);
1100 CheckValidReg(next_insn & 0x1fu); // Check destination register.
1101 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1102 if (kind == BakerReadBarrierKind::kField) {
1103 // LDR (immediate) with correct base_reg.
1104 CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
1105 } else {
1106 DCHECK(kind == BakerReadBarrierKind::kAcquire);
1107 // LDAR with correct base_reg.
1108 CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5));
1109 }
1110 break;
1111 }
1112 case BakerReadBarrierKind::kArray: {
1113 DCHECK_GE(code.size() - literal_offset, 8u);
1114 uint32_t next_insn = GetInsn(literal_offset + 4u);
1115 // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
1116 // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
1117 CheckValidReg(next_insn & 0x1fu); // Check destination register.
1118 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1119 CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
1120 CheckValidReg((next_insn >> 16) & 0x1f); // Check index register
1121 break;
1122 }
1123 case BakerReadBarrierKind::kGcRoot: {
1124 DCHECK_GE(literal_offset, 4u);
1125 uint32_t prev_insn = GetInsn(literal_offset - 4u);
1126 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1127 // Usually LDR (immediate) with correct root_reg but
1128 // we may have a "MOV marked, old_value" for intrinsic CAS.
1129 if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) { // MOV?
1130 CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); // LDR?
1131 }
1132 break;
1133 }
1134 default:
1135 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
1136 UNREACHABLE();
1137 }
1138 }
1139 }
1140 }
1141
PrepareForEmitNativeCode()1142 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1143 // Note: There are 6 kinds of moves:
1144 // 1. constant -> GPR/FPR (non-cycle)
1145 // 2. constant -> stack (non-cycle)
1146 // 3. GPR/FPR -> GPR/FPR
1147 // 4. GPR/FPR -> stack
1148 // 5. stack -> GPR/FPR
1149 // 6. stack -> stack (non-cycle)
1150 // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1151 // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1152 // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1153 // dependency.
1154 vixl_temps_.Open(GetVIXLAssembler());
1155 }
1156
FinishEmitNativeCode()1157 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1158 vixl_temps_.Close();
1159 }
1160
AllocateScratchLocationFor(Location::Kind kind)1161 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1162 DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1163 || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1164 || kind == Location::kSIMDStackSlot);
1165 kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1166 ? Location::kFpuRegister
1167 : Location::kRegister;
1168 Location scratch = GetScratchLocation(kind);
1169 if (!scratch.Equals(Location::NoLocation())) {
1170 return scratch;
1171 }
1172 // Allocate from VIXL temp registers.
1173 if (kind == Location::kRegister) {
1174 scratch = LocationFrom(vixl_temps_.AcquireX());
1175 } else {
1176 DCHECK_EQ(kind, Location::kFpuRegister);
1177 scratch = codegen_->GetGraph()->HasSIMD()
1178 ? codegen_->GetInstructionCodeGeneratorArm64()->AllocateSIMDScratchLocation(&vixl_temps_)
1179 : LocationFrom(vixl_temps_.AcquireD());
1180 }
1181 AddScratchLocation(scratch);
1182 return scratch;
1183 }
1184
FreeScratchLocation(Location loc)1185 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1186 if (loc.IsRegister()) {
1187 vixl_temps_.Release(XRegisterFrom(loc));
1188 } else {
1189 DCHECK(loc.IsFpuRegister());
1190 if (codegen_->GetGraph()->HasSIMD()) {
1191 codegen_->GetInstructionCodeGeneratorArm64()->FreeSIMDScratchLocation(loc, &vixl_temps_);
1192 } else {
1193 vixl_temps_.Release(DRegisterFrom(loc));
1194 }
1195 }
1196 RemoveScratchLocation(loc);
1197 }
1198
EmitMove(size_t index)1199 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1200 MoveOperands* move = moves_[index];
1201 codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
1202 }
1203
VisitMethodExitHook(HMethodExitHook * method_hook)1204 void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1205 LocationSummary* locations = new (GetGraph()->GetAllocator())
1206 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1207 DataType::Type return_type = method_hook->InputAt(0)->GetType();
1208 locations->SetInAt(0, ARM64ReturnLocation(return_type));
1209 }
1210
GenerateMethodEntryExitHook(HInstruction * instruction)1211 void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1212 MacroAssembler* masm = GetVIXLAssembler();
1213 UseScratchRegisterScope temps(masm);
1214 Register addr = temps.AcquireX();
1215 Register index = temps.AcquireX();
1216 Register value = index.W();
1217
1218 SlowPathCodeARM64* slow_path =
1219 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction);
1220 codegen_->AddSlowPath(slow_path);
1221
1222 if (instruction->IsMethodExitHook()) {
1223 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1224 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1225 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1226 // disabled in debuggable runtime. The other bit is used when this method itself requires a
1227 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1228 __ Ldr(value, MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1229 __ Cbnz(value, slow_path->GetEntryLabel());
1230 }
1231
1232 uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1233 MemberOffset offset = instruction->IsMethodExitHook() ?
1234 instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
1235 instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1236 __ Mov(addr, address + offset.Int32Value());
1237 __ Ldrb(value, MemOperand(addr, 0));
1238 __ Cmp(value, Operand(instrumentation::Instrumentation::kFastTraceListeners));
1239 // Check if there are any method entry / exit listeners. If no, continue.
1240 __ B(lt, slow_path->GetExitLabel());
1241 // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
1242 // If yes, just take the slow path.
1243 __ B(gt, slow_path->GetEntryLabel());
1244
1245 // Check if there is place in the buffer to store a new entry, if no, take slow path.
1246 uint32_t trace_buffer_index_offset =
1247 Thread::TraceBufferIndexOffset<kArm64PointerSize>().Int32Value();
1248 __ Ldr(index, MemOperand(tr, trace_buffer_index_offset));
1249 __ Subs(index, index, kNumEntriesForWallClock);
1250 __ B(lt, slow_path->GetEntryLabel());
1251
1252 // Update the index in the `Thread`.
1253 __ Str(index, MemOperand(tr, trace_buffer_index_offset));
1254 // Calculate the entry address in the buffer.
1255 // addr = base_addr + sizeof(void*) * index;
1256 __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue()));
1257 __ ComputeAddress(addr, MemOperand(addr, index, LSL, TIMES_8));
1258
1259 Register tmp = index;
1260 // Record method pointer and trace action.
1261 __ Ldr(tmp, MemOperand(sp, 0));
1262 // Use last two bits to encode trace method action. For MethodEntry it is 0
1263 // so no need to set the bits since they are 0 already.
1264 if (instruction->IsMethodExitHook()) {
1265 DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1266 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1267 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1268 __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1269 }
1270 __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes));
1271 // Record the timestamp.
1272 __ Mrs(tmp, (SystemRegister)SYS_CNTVCT_EL0);
1273 __ Str(tmp, MemOperand(addr, kTimestampOffsetInBytes));
1274 __ Bind(slow_path->GetExitLabel());
1275 }
1276
VisitMethodExitHook(HMethodExitHook * instruction)1277 void InstructionCodeGeneratorARM64::VisitMethodExitHook(HMethodExitHook* instruction) {
1278 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1279 DCHECK(codegen_->RequiresCurrentMethod());
1280 GenerateMethodEntryExitHook(instruction);
1281 }
1282
VisitMethodEntryHook(HMethodEntryHook * method_hook)1283 void LocationsBuilderARM64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1284 new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1285 }
1286
VisitMethodEntryHook(HMethodEntryHook * instruction)1287 void InstructionCodeGeneratorARM64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1288 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1289 DCHECK(codegen_->RequiresCurrentMethod());
1290 GenerateMethodEntryExitHook(instruction);
1291 }
1292
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1293 void CodeGeneratorARM64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1294 MacroAssembler* masm = GetVIXLAssembler();
1295 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1296 UseScratchRegisterScope temps(masm);
1297 Register counter = temps.AcquireX();
1298 Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX();
1299 if (!is_frame_entry) {
1300 __ Ldr(method, MemOperand(sp, 0));
1301 }
1302 __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1303 vixl::aarch64::Label done;
1304 DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
1305 __ Cbz(counter, &done);
1306 __ Add(counter, counter, -1);
1307 __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1308 __ Bind(&done);
1309 }
1310
1311 if (GetGraph()->IsCompilingBaseline() &&
1312 GetGraph()->IsUsefulOptimizing() &&
1313 !Runtime::Current()->IsAotCompiler()) {
1314 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1315 DCHECK(info != nullptr);
1316 DCHECK(!HasEmptyFrame());
1317 uint64_t address = reinterpret_cast64<uint64_t>(info);
1318 UseScratchRegisterScope temps(masm);
1319 Register counter = temps.AcquireW();
1320 SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARM64(
1321 suspend_check, /* profiling_info= */ lr);
1322 AddSlowPath(slow_path);
1323 __ Ldr(lr, jit_patches_.DeduplicateUint64Literal(address));
1324 __ Ldrh(counter, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1325 __ Cbz(counter, slow_path->GetEntryLabel());
1326 __ Add(counter, counter, -1);
1327 __ Strh(counter, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1328 __ Bind(slow_path->GetExitLabel());
1329 }
1330 }
1331
GenerateFrameEntry()1332 void CodeGeneratorARM64::GenerateFrameEntry() {
1333 MacroAssembler* masm = GetVIXLAssembler();
1334
1335 // Check if we need to generate the clinit check. We will jump to the
1336 // resolution stub if the class is not initialized and the executing thread is
1337 // not the thread initializing it.
1338 // We do this before constructing the frame to get the correct stack trace if
1339 // an exception is thrown.
1340 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1341 UseScratchRegisterScope temps(masm);
1342 vixl::aarch64::Label resolution;
1343 vixl::aarch64::Label memory_barrier;
1344
1345 Register temp1 = temps.AcquireW();
1346 Register temp2 = temps.AcquireW();
1347
1348 // Check if we're visibly initialized.
1349
1350 // We don't emit a read barrier here to save on code size. We rely on the
1351 // resolution trampoline to do a suspend check before re-entering this code.
1352 __ Ldr(temp1, MemOperand(kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
1353 __ Ldrb(temp2, HeapOperand(temp1, kClassStatusByteOffset));
1354 __ Cmp(temp2, kShiftedVisiblyInitializedValue);
1355 __ B(hs, &frame_entry_label_);
1356
1357 // Check if we're initialized and jump to code that does a memory barrier if
1358 // so.
1359 __ Cmp(temp2, kShiftedInitializedValue);
1360 __ B(hs, &memory_barrier);
1361
1362 // Check if we're initializing and the thread initializing is the one
1363 // executing the code.
1364 __ Cmp(temp2, kShiftedInitializingValue);
1365 __ B(lo, &resolution);
1366
1367 __ Ldr(temp1, HeapOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
1368 __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArm64PointerSize>().Int32Value()));
1369 __ Cmp(temp1, temp2);
1370 __ B(eq, &frame_entry_label_);
1371 __ Bind(&resolution);
1372
1373 // Jump to the resolution stub.
1374 ThreadOffset64 entrypoint_offset =
1375 GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline);
1376 __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value()));
1377 __ Br(temp1.X());
1378
1379 __ Bind(&memory_barrier);
1380 GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
1381 }
1382 __ Bind(&frame_entry_label_);
1383
1384 bool do_overflow_check =
1385 FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
1386 if (do_overflow_check) {
1387 UseScratchRegisterScope temps(masm);
1388 Register temp = temps.AcquireX();
1389 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1390 __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64)));
1391 {
1392 // Ensure that between load and RecordPcInfo there are no pools emitted.
1393 ExactAssemblyScope eas(GetVIXLAssembler(),
1394 kInstructionSize,
1395 CodeBufferCheckScope::kExactSize);
1396 __ ldr(wzr, MemOperand(temp, 0));
1397 RecordPcInfo(nullptr, 0);
1398 }
1399 }
1400
1401 if (!HasEmptyFrame()) {
1402 // Make sure the frame size isn't unreasonably large.
1403 DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1404
1405 // Stack layout:
1406 // sp[frame_size - 8] : lr.
1407 // ... : other preserved core registers.
1408 // ... : other preserved fp registers.
1409 // ... : reserved frame space.
1410 // sp[0] : current method.
1411 int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1412 uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1413 CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1414 DCHECK(!preserved_core_registers.IsEmpty());
1415 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1416 CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1417
1418 // Save the current method if we need it, or if using STP reduces code
1419 // size. Note that we do not do this in HCurrentMethod, as the
1420 // instruction might have been removed in the SSA graph.
1421 CPURegister lowest_spill;
1422 if (core_spills_offset == kXRegSizeInBytes) {
1423 // If there is no gap between the method and the lowest core spill, use
1424 // aligned STP pre-index to store both. Max difference is 512. We do
1425 // that to reduce code size even if we do not have to save the method.
1426 DCHECK_LE(frame_size, 512); // 32 core registers are only 256 bytes.
1427 lowest_spill = preserved_core_registers.PopLowestIndex();
1428 __ Stp(kArtMethodRegister, lowest_spill, MemOperand(sp, -frame_size, PreIndex));
1429 } else if (RequiresCurrentMethod()) {
1430 __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1431 } else {
1432 __ Claim(frame_size);
1433 }
1434 GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1435 if (lowest_spill.IsValid()) {
1436 GetAssembler()->cfi().RelOffset(DWARFReg(lowest_spill), core_spills_offset);
1437 core_spills_offset += kXRegSizeInBytes;
1438 }
1439 GetAssembler()->SpillRegisters(preserved_core_registers, core_spills_offset);
1440 GetAssembler()->SpillRegisters(preserved_fp_registers, fp_spills_offset);
1441
1442 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1443 // Initialize should_deoptimize flag to 0.
1444 Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1445 __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1446 }
1447 }
1448 MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1449 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
1450 }
1451
GenerateFrameExit()1452 void CodeGeneratorARM64::GenerateFrameExit() {
1453 GetAssembler()->cfi().RememberState();
1454 if (!HasEmptyFrame()) {
1455 int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1456 uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1457 CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1458 DCHECK(!preserved_core_registers.IsEmpty());
1459 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1460 CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1461
1462 CPURegister lowest_spill;
1463 if (core_spills_offset == kXRegSizeInBytes) {
1464 // If there is no gap between the method and the lowest core spill, use
1465 // aligned LDP pre-index to pop both. Max difference is 504. We do
1466 // that to reduce code size even though the loaded method is unused.
1467 DCHECK_LE(frame_size, 504); // 32 core registers are only 256 bytes.
1468 lowest_spill = preserved_core_registers.PopLowestIndex();
1469 core_spills_offset += kXRegSizeInBytes;
1470 }
1471 GetAssembler()->UnspillRegisters(preserved_fp_registers, fp_spills_offset);
1472 GetAssembler()->UnspillRegisters(preserved_core_registers, core_spills_offset);
1473 if (lowest_spill.IsValid()) {
1474 __ Ldp(xzr, lowest_spill, MemOperand(sp, frame_size, PostIndex));
1475 GetAssembler()->cfi().Restore(DWARFReg(lowest_spill));
1476 } else {
1477 __ Drop(frame_size);
1478 }
1479 GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1480 }
1481 __ Ret();
1482 GetAssembler()->cfi().RestoreState();
1483 GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1484 }
1485
GetFramePreservedCoreRegisters() const1486 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1487 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1488 return CPURegList(CPURegister::kRegister, kXRegSize,
1489 core_spill_mask_);
1490 }
1491
GetFramePreservedFPRegisters() const1492 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1493 DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1494 GetNumberOfFloatingPointRegisters()));
1495 return CPURegList(CPURegister::kVRegister, kDRegSize,
1496 fpu_spill_mask_);
1497 }
1498
Bind(HBasicBlock * block)1499 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1500 __ Bind(GetLabelOf(block));
1501 }
1502
MoveConstant(Location location,int32_t value)1503 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1504 DCHECK(location.IsRegister());
1505 __ Mov(RegisterFrom(location, DataType::Type::kInt32), value);
1506 }
1507
AddLocationAsTemp(Location location,LocationSummary * locations)1508 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1509 if (location.IsRegister()) {
1510 locations->AddTemp(location);
1511 } else {
1512 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1513 }
1514 }
1515
MaybeMarkGCCard(Register object,Register value,bool emit_null_check)1516 void CodeGeneratorARM64::MaybeMarkGCCard(Register object, Register value, bool emit_null_check) {
1517 vixl::aarch64::Label done;
1518 if (emit_null_check) {
1519 __ Cbz(value, &done);
1520 }
1521 MarkGCCard(object);
1522 if (emit_null_check) {
1523 __ Bind(&done);
1524 }
1525 }
1526
MarkGCCard(Register object)1527 void CodeGeneratorARM64::MarkGCCard(Register object) {
1528 UseScratchRegisterScope temps(GetVIXLAssembler());
1529 Register card = temps.AcquireX();
1530 Register temp = temps.AcquireW(); // Index within the CardTable - 32bit.
1531 // Load the address of the card table into `card`.
1532 __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1533 // Calculate the offset (in the card table) of the card corresponding to `object`.
1534 __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1535 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
1536 // `object`'s card.
1537 //
1538 // Register `card` contains the address of the card table. Note that the card
1539 // table's base is biased during its creation so that it always starts at an
1540 // address whose least-significant byte is equal to `kCardDirty` (see
1541 // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
1542 // below writes the `kCardDirty` (byte) value into the `object`'s card
1543 // (located at `card + object >> kCardShift`).
1544 //
1545 // This dual use of the value in register `card` (1. to calculate the location
1546 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
1547 // (no need to explicitly load `kCardDirty` as an immediate value).
1548 __ Strb(card, MemOperand(card, temp.X()));
1549 }
1550
CheckGCCardIsValid(Register object)1551 void CodeGeneratorARM64::CheckGCCardIsValid(Register object) {
1552 UseScratchRegisterScope temps(GetVIXLAssembler());
1553 Register card = temps.AcquireX();
1554 Register temp = temps.AcquireW(); // Index within the CardTable - 32bit.
1555 vixl::aarch64::Label done;
1556 // Load the address of the card table into `card`.
1557 __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1558 // Calculate the offset (in the card table) of the card corresponding to `object`.
1559 __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1560 // assert (!clean || !self->is_gc_marking)
1561 __ Ldrb(temp, MemOperand(card, temp.X()));
1562 static_assert(gc::accounting::CardTable::kCardClean == 0);
1563 __ Cbnz(temp, &done);
1564 __ Cbz(mr, &done);
1565 __ Unreachable();
1566 __ Bind(&done);
1567 }
1568
SetupBlockedRegisters() const1569 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1570 // Blocked core registers:
1571 // lr : Runtime reserved.
1572 // tr : Runtime reserved.
1573 // mr : Runtime reserved.
1574 // ip1 : VIXL core temp.
1575 // ip0 : VIXL core temp.
1576 // x18 : Platform register.
1577 //
1578 // Blocked fp registers:
1579 // d31 : VIXL fp temp.
1580 CPURegList reserved_core_registers = vixl_reserved_core_registers;
1581 reserved_core_registers.Combine(runtime_reserved_core_registers);
1582 while (!reserved_core_registers.IsEmpty()) {
1583 blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1584 }
1585 blocked_core_registers_[X18] = true;
1586
1587 CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1588 while (!reserved_fp_registers.IsEmpty()) {
1589 blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1590 }
1591
1592 if (GetGraph()->IsDebuggable()) {
1593 // Stubs do not save callee-save floating point registers. If the graph
1594 // is debuggable, we need to deal with these registers differently. For
1595 // now, just block them.
1596 CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1597 while (!reserved_fp_registers_debuggable.IsEmpty()) {
1598 blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1599 }
1600 }
1601 }
1602
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1603 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1604 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1605 __ Str(reg, MemOperand(sp, stack_index));
1606 return kArm64WordSize;
1607 }
1608
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1609 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1610 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1611 __ Ldr(reg, MemOperand(sp, stack_index));
1612 return kArm64WordSize;
1613 }
1614
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1615 size_t CodeGeneratorARM64::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
1616 [[maybe_unused]] uint32_t reg_id) {
1617 LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1618 << "use SaveRestoreLiveRegistersHelper";
1619 UNREACHABLE();
1620 }
1621
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1622 size_t CodeGeneratorARM64::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
1623 [[maybe_unused]] uint32_t reg_id) {
1624 LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1625 << "use SaveRestoreLiveRegistersHelper";
1626 UNREACHABLE();
1627 }
1628
DumpCoreRegister(std::ostream & stream,int reg) const1629 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1630 stream << XRegister(reg);
1631 }
1632
DumpFloatingPointRegister(std::ostream & stream,int reg) const1633 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1634 stream << DRegister(reg);
1635 }
1636
GetInstructionSetFeatures() const1637 const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const {
1638 return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures();
1639 }
1640
MoveConstant(CPURegister destination,HConstant * constant)1641 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1642 if (constant->IsIntConstant()) {
1643 __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1644 } else if (constant->IsLongConstant()) {
1645 __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1646 } else if (constant->IsNullConstant()) {
1647 __ Mov(Register(destination), 0);
1648 } else if (constant->IsFloatConstant()) {
1649 __ Fmov(VRegister(destination), constant->AsFloatConstant()->GetValue());
1650 } else {
1651 DCHECK(constant->IsDoubleConstant());
1652 __ Fmov(VRegister(destination), constant->AsDoubleConstant()->GetValue());
1653 }
1654 }
1655
1656
CoherentConstantAndType(Location constant,DataType::Type type)1657 static bool CoherentConstantAndType(Location constant, DataType::Type type) {
1658 DCHECK(constant.IsConstant());
1659 HConstant* cst = constant.GetConstant();
1660 return (cst->IsIntConstant() && type == DataType::Type::kInt32) ||
1661 // Null is mapped to a core W register, which we associate with kPrimInt.
1662 (cst->IsNullConstant() && type == DataType::Type::kInt32) ||
1663 (cst->IsLongConstant() && type == DataType::Type::kInt64) ||
1664 (cst->IsFloatConstant() && type == DataType::Type::kFloat32) ||
1665 (cst->IsDoubleConstant() && type == DataType::Type::kFloat64);
1666 }
1667
1668 // Allocate a scratch register from the VIXL pool, querying first
1669 // the floating-point register pool, and then the core register
1670 // pool. This is essentially a reimplementation of
1671 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1672 // using a different allocation strategy.
AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler * masm,vixl::aarch64::UseScratchRegisterScope * temps,int size_in_bits)1673 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1674 vixl::aarch64::UseScratchRegisterScope* temps,
1675 int size_in_bits) {
1676 return masm->GetScratchVRegisterList()->IsEmpty()
1677 ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1678 : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1679 }
1680
MoveLocation(Location destination,Location source,DataType::Type dst_type)1681 void CodeGeneratorARM64::MoveLocation(Location destination,
1682 Location source,
1683 DataType::Type dst_type) {
1684 if (source.Equals(destination)) {
1685 return;
1686 }
1687
1688 // A valid move can always be inferred from the destination and source
1689 // locations. When moving from and to a register, the argument type can be
1690 // used to generate 32bit instead of 64bit moves. In debug mode we also
1691 // checks the coherency of the locations and the type.
1692 bool unspecified_type = (dst_type == DataType::Type::kVoid);
1693
1694 if (destination.IsRegister() || destination.IsFpuRegister()) {
1695 if (unspecified_type) {
1696 HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1697 if (source.IsStackSlot() ||
1698 (src_cst != nullptr && (src_cst->IsIntConstant()
1699 || src_cst->IsFloatConstant()
1700 || src_cst->IsNullConstant()))) {
1701 // For stack slots and 32bit constants, a 64bit type is appropriate.
1702 dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
1703 } else {
1704 // If the source is a double stack slot or a 64bit constant, a 64bit
1705 // type is appropriate. Else the source is a register, and since the
1706 // type has not been specified, we chose a 64bit type to force a 64bit
1707 // move.
1708 dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
1709 }
1710 }
1711 DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
1712 (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
1713 CPURegister dst = CPURegisterFrom(destination, dst_type);
1714 if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1715 DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1716 __ Ldr(dst, StackOperandFrom(source));
1717 } else if (source.IsSIMDStackSlot()) {
1718 GetInstructionCodeGeneratorArm64()->LoadSIMDRegFromStack(destination, source);
1719 } else if (source.IsConstant()) {
1720 DCHECK(CoherentConstantAndType(source, dst_type));
1721 MoveConstant(dst, source.GetConstant());
1722 } else if (source.IsRegister()) {
1723 if (destination.IsRegister()) {
1724 __ Mov(Register(dst), RegisterFrom(source, dst_type));
1725 } else {
1726 DCHECK(destination.IsFpuRegister());
1727 DataType::Type source_type = DataType::Is64BitType(dst_type)
1728 ? DataType::Type::kInt64
1729 : DataType::Type::kInt32;
1730 __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1731 }
1732 } else {
1733 DCHECK(source.IsFpuRegister());
1734 if (destination.IsRegister()) {
1735 DataType::Type source_type = DataType::Is64BitType(dst_type)
1736 ? DataType::Type::kFloat64
1737 : DataType::Type::kFloat32;
1738 __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1739 } else {
1740 DCHECK(destination.IsFpuRegister());
1741 if (GetGraph()->HasSIMD()) {
1742 GetInstructionCodeGeneratorArm64()->MoveSIMDRegToSIMDReg(destination, source);
1743 } else {
1744 __ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type));
1745 }
1746 }
1747 }
1748 } else if (destination.IsSIMDStackSlot()) {
1749 GetInstructionCodeGeneratorArm64()->MoveToSIMDStackSlot(destination, source);
1750 } else { // The destination is not a register. It must be a stack slot.
1751 DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1752 if (source.IsRegister() || source.IsFpuRegister()) {
1753 if (unspecified_type) {
1754 if (source.IsRegister()) {
1755 dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
1756 } else {
1757 dst_type =
1758 destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
1759 }
1760 }
1761 DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
1762 (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
1763 __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1764 } else if (source.IsConstant()) {
1765 DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1766 << source << " " << dst_type;
1767 UseScratchRegisterScope temps(GetVIXLAssembler());
1768 HConstant* src_cst = source.GetConstant();
1769 CPURegister temp;
1770 if (src_cst->IsZeroBitPattern()) {
1771 temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1772 ? Register(xzr)
1773 : Register(wzr);
1774 } else {
1775 if (src_cst->IsIntConstant()) {
1776 temp = temps.AcquireW();
1777 } else if (src_cst->IsLongConstant()) {
1778 temp = temps.AcquireX();
1779 } else if (src_cst->IsFloatConstant()) {
1780 temp = temps.AcquireS();
1781 } else {
1782 DCHECK(src_cst->IsDoubleConstant());
1783 temp = temps.AcquireD();
1784 }
1785 MoveConstant(temp, src_cst);
1786 }
1787 __ Str(temp, StackOperandFrom(destination));
1788 } else {
1789 DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1790 DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1791 UseScratchRegisterScope temps(GetVIXLAssembler());
1792 // Use any scratch register (a core or a floating-point one)
1793 // from VIXL scratch register pools as a temporary.
1794 //
1795 // We used to only use the FP scratch register pool, but in some
1796 // rare cases the only register from this pool (D31) would
1797 // already be used (e.g. within a ParallelMove instruction, when
1798 // a move is blocked by a another move requiring a scratch FP
1799 // register, which would reserve D31). To prevent this issue, we
1800 // ask for a scratch register of any type (core or FP).
1801 //
1802 // Also, we start by asking for a FP scratch register first, as the
1803 // demand of scratch core registers is higher. This is why we
1804 // use AcquireFPOrCoreCPURegisterOfSize instead of
1805 // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1806 // allocates core scratch registers first.
1807 CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1808 GetVIXLAssembler(),
1809 &temps,
1810 (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1811 __ Ldr(temp, StackOperandFrom(source));
1812 __ Str(temp, StackOperandFrom(destination));
1813 }
1814 }
1815 }
1816
Load(DataType::Type type,CPURegister dst,const MemOperand & src)1817 void CodeGeneratorARM64::Load(DataType::Type type,
1818 CPURegister dst,
1819 const MemOperand& src) {
1820 switch (type) {
1821 case DataType::Type::kBool:
1822 case DataType::Type::kUint8:
1823 __ Ldrb(Register(dst), src);
1824 break;
1825 case DataType::Type::kInt8:
1826 __ Ldrsb(Register(dst), src);
1827 break;
1828 case DataType::Type::kUint16:
1829 __ Ldrh(Register(dst), src);
1830 break;
1831 case DataType::Type::kInt16:
1832 __ Ldrsh(Register(dst), src);
1833 break;
1834 case DataType::Type::kInt32:
1835 case DataType::Type::kReference:
1836 case DataType::Type::kInt64:
1837 case DataType::Type::kFloat32:
1838 case DataType::Type::kFloat64:
1839 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1840 __ Ldr(dst, src);
1841 break;
1842 case DataType::Type::kUint32:
1843 case DataType::Type::kUint64:
1844 case DataType::Type::kVoid:
1845 LOG(FATAL) << "Unreachable type " << type;
1846 }
1847 }
1848
LoadAcquire(HInstruction * instruction,DataType::Type type,CPURegister dst,const MemOperand & src,bool needs_null_check)1849 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1850 DataType::Type type,
1851 CPURegister dst,
1852 const MemOperand& src,
1853 bool needs_null_check) {
1854 MacroAssembler* masm = GetVIXLAssembler();
1855 UseScratchRegisterScope temps(masm);
1856 Register temp_base = temps.AcquireX();
1857
1858 DCHECK(!src.IsPreIndex());
1859 DCHECK(!src.IsPostIndex());
1860
1861 // TODO(vixl): Let the MacroAssembler handle MemOperand.
1862 __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1863 {
1864 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1865 MemOperand base = MemOperand(temp_base);
1866 switch (type) {
1867 case DataType::Type::kBool:
1868 case DataType::Type::kUint8:
1869 case DataType::Type::kInt8:
1870 {
1871 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1872 __ ldarb(Register(dst), base);
1873 if (needs_null_check) {
1874 MaybeRecordImplicitNullCheck(instruction);
1875 }
1876 }
1877 if (type == DataType::Type::kInt8) {
1878 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1879 }
1880 break;
1881 case DataType::Type::kUint16:
1882 case DataType::Type::kInt16:
1883 {
1884 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1885 __ ldarh(Register(dst), base);
1886 if (needs_null_check) {
1887 MaybeRecordImplicitNullCheck(instruction);
1888 }
1889 }
1890 if (type == DataType::Type::kInt16) {
1891 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1892 }
1893 break;
1894 case DataType::Type::kInt32:
1895 case DataType::Type::kReference:
1896 case DataType::Type::kInt64:
1897 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1898 {
1899 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1900 __ ldar(Register(dst), base);
1901 if (needs_null_check) {
1902 MaybeRecordImplicitNullCheck(instruction);
1903 }
1904 }
1905 break;
1906 case DataType::Type::kFloat32:
1907 case DataType::Type::kFloat64: {
1908 DCHECK(dst.IsFPRegister());
1909 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1910
1911 Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1912 {
1913 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1914 __ ldar(temp, base);
1915 if (needs_null_check) {
1916 MaybeRecordImplicitNullCheck(instruction);
1917 }
1918 }
1919 __ Fmov(VRegister(dst), temp);
1920 break;
1921 }
1922 case DataType::Type::kUint32:
1923 case DataType::Type::kUint64:
1924 case DataType::Type::kVoid:
1925 LOG(FATAL) << "Unreachable type " << type;
1926 }
1927 }
1928 }
1929
Store(DataType::Type type,CPURegister src,const MemOperand & dst)1930 void CodeGeneratorARM64::Store(DataType::Type type,
1931 CPURegister src,
1932 const MemOperand& dst) {
1933 switch (type) {
1934 case DataType::Type::kBool:
1935 case DataType::Type::kUint8:
1936 case DataType::Type::kInt8:
1937 __ Strb(Register(src), dst);
1938 break;
1939 case DataType::Type::kUint16:
1940 case DataType::Type::kInt16:
1941 __ Strh(Register(src), dst);
1942 break;
1943 case DataType::Type::kInt32:
1944 case DataType::Type::kReference:
1945 case DataType::Type::kInt64:
1946 case DataType::Type::kFloat32:
1947 case DataType::Type::kFloat64:
1948 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1949 __ Str(src, dst);
1950 break;
1951 case DataType::Type::kUint32:
1952 case DataType::Type::kUint64:
1953 case DataType::Type::kVoid:
1954 LOG(FATAL) << "Unreachable type " << type;
1955 }
1956 }
1957
StoreRelease(HInstruction * instruction,DataType::Type type,CPURegister src,const MemOperand & dst,bool needs_null_check)1958 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
1959 DataType::Type type,
1960 CPURegister src,
1961 const MemOperand& dst,
1962 bool needs_null_check) {
1963 MacroAssembler* masm = GetVIXLAssembler();
1964 UseScratchRegisterScope temps(GetVIXLAssembler());
1965 Register temp_base = temps.AcquireX();
1966
1967 DCHECK(!dst.IsPreIndex());
1968 DCHECK(!dst.IsPostIndex());
1969
1970 // TODO(vixl): Let the MacroAssembler handle this.
1971 Operand op = OperandFromMemOperand(dst);
1972 __ Add(temp_base, dst.GetBaseRegister(), op);
1973 MemOperand base = MemOperand(temp_base);
1974 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
1975 switch (type) {
1976 case DataType::Type::kBool:
1977 case DataType::Type::kUint8:
1978 case DataType::Type::kInt8:
1979 {
1980 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1981 __ stlrb(Register(src), base);
1982 if (needs_null_check) {
1983 MaybeRecordImplicitNullCheck(instruction);
1984 }
1985 }
1986 break;
1987 case DataType::Type::kUint16:
1988 case DataType::Type::kInt16:
1989 {
1990 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1991 __ stlrh(Register(src), base);
1992 if (needs_null_check) {
1993 MaybeRecordImplicitNullCheck(instruction);
1994 }
1995 }
1996 break;
1997 case DataType::Type::kInt32:
1998 case DataType::Type::kReference:
1999 case DataType::Type::kInt64:
2000 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
2001 {
2002 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2003 __ stlr(Register(src), base);
2004 if (needs_null_check) {
2005 MaybeRecordImplicitNullCheck(instruction);
2006 }
2007 }
2008 break;
2009 case DataType::Type::kFloat32:
2010 case DataType::Type::kFloat64: {
2011 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
2012 Register temp_src;
2013 if (src.IsZero()) {
2014 // The zero register is used to avoid synthesizing zero constants.
2015 temp_src = Register(src);
2016 } else {
2017 DCHECK(src.IsFPRegister());
2018 temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2019 __ Fmov(temp_src, VRegister(src));
2020 }
2021 {
2022 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2023 __ stlr(temp_src, base);
2024 if (needs_null_check) {
2025 MaybeRecordImplicitNullCheck(instruction);
2026 }
2027 }
2028 break;
2029 }
2030 case DataType::Type::kUint32:
2031 case DataType::Type::kUint64:
2032 case DataType::Type::kVoid:
2033 LOG(FATAL) << "Unreachable type " << type;
2034 }
2035 }
2036
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2037 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
2038 HInstruction* instruction,
2039 uint32_t dex_pc,
2040 SlowPathCode* slow_path) {
2041 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2042
2043 ThreadOffset64 entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entrypoint);
2044 // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
2045 // entire oat file. This adds an extra branch and we do not want to slow down the main path.
2046 // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
2047 if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
2048 __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
2049 // Ensure the pc position is recorded immediately after the `blr` instruction.
2050 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
2051 __ blr(lr);
2052 if (EntrypointRequiresStackMap(entrypoint)) {
2053 RecordPcInfo(instruction, dex_pc, slow_path);
2054 }
2055 } else {
2056 // Ensure the pc position is recorded immediately after the `bl` instruction.
2057 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
2058 EmitEntrypointThunkCall(entrypoint_offset);
2059 if (EntrypointRequiresStackMap(entrypoint)) {
2060 RecordPcInfo(instruction, dex_pc, slow_path);
2061 }
2062 }
2063 }
2064
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2065 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2066 HInstruction* instruction,
2067 SlowPathCode* slow_path) {
2068 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2069 __ Ldr(lr, MemOperand(tr, entry_point_offset));
2070 __ Blr(lr);
2071 }
2072
GenerateClassInitializationCheck(SlowPathCodeARM64 * slow_path,Register class_reg)2073 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
2074 Register class_reg) {
2075 UseScratchRegisterScope temps(GetVIXLAssembler());
2076 Register temp = temps.AcquireW();
2077
2078 // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize
2079 // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code
2080 // size, load only the high byte of the field and compare with 0xf0.
2081 // Note: The same code size could be achieved with LDR+MNV(asr #24)+CBNZ but benchmarks
2082 // show that this pattern is slower (tested on little cores).
2083 __ Ldrb(temp, HeapOperand(class_reg, kClassStatusByteOffset));
2084 __ Cmp(temp, kShiftedVisiblyInitializedValue);
2085 __ B(lo, slow_path->GetEntryLabel());
2086 __ Bind(slow_path->GetExitLabel());
2087 }
2088
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl::aarch64::Register temp)2089 void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
2090 HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
2091 uint32_t path_to_root = check->GetBitstringPathToRoot();
2092 uint32_t mask = check->GetBitstringMask();
2093 DCHECK(IsPowerOfTwo(mask + 1));
2094 size_t mask_bits = WhichPowerOf2(mask + 1);
2095
2096 if (mask_bits == 16u) {
2097 // Load only the bitstring part of the status word.
2098 __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
2099 } else {
2100 // /* uint32_t */ temp = temp->status_
2101 __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
2102 // Extract the bitstring bits.
2103 __ Ubfx(temp, temp, 0, mask_bits);
2104 }
2105 // Compare the bitstring bits to `path_to_root`.
2106 __ Cmp(temp, path_to_root);
2107 }
2108
GenerateMemoryBarrier(MemBarrierKind kind)2109 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
2110 BarrierType type = BarrierAll;
2111
2112 switch (kind) {
2113 case MemBarrierKind::kAnyAny:
2114 case MemBarrierKind::kAnyStore: {
2115 type = BarrierAll;
2116 break;
2117 }
2118 case MemBarrierKind::kLoadAny: {
2119 type = BarrierReads;
2120 break;
2121 }
2122 case MemBarrierKind::kStoreStore: {
2123 type = BarrierWrites;
2124 break;
2125 }
2126 default:
2127 LOG(FATAL) << "Unexpected memory barrier " << kind;
2128 }
2129 __ Dmb(InnerShareable, type);
2130 }
2131
CanUseImplicitSuspendCheck() const2132 bool CodeGeneratorARM64::CanUseImplicitSuspendCheck() const {
2133 // Use implicit suspend checks if requested in compiler options unless there are SIMD
2134 // instructions in the graph. The implicit suspend check saves all FP registers as
2135 // 64-bit (in line with the calling convention) but SIMD instructions can use 128-bit
2136 // registers, so they need to be saved in an explicit slow path.
2137 return GetCompilerOptions().GetImplicitSuspendChecks() && !GetGraph()->HasSIMD();
2138 }
2139
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)2140 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
2141 HBasicBlock* successor) {
2142 if (instruction->IsNoOp()) {
2143 if (successor != nullptr) {
2144 __ B(codegen_->GetLabelOf(successor));
2145 }
2146 return;
2147 }
2148
2149 if (codegen_->CanUseImplicitSuspendCheck()) {
2150 __ Ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister));
2151 codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
2152 if (successor != nullptr) {
2153 __ B(codegen_->GetLabelOf(successor));
2154 }
2155 return;
2156 }
2157
2158 SuspendCheckSlowPathARM64* slow_path =
2159 down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
2160 if (slow_path == nullptr) {
2161 slow_path =
2162 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor);
2163 instruction->SetSlowPath(slow_path);
2164 codegen_->AddSlowPath(slow_path);
2165 if (successor != nullptr) {
2166 DCHECK(successor->IsLoopHeader());
2167 }
2168 } else {
2169 DCHECK_EQ(slow_path->GetSuccessor(), successor);
2170 }
2171
2172 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
2173 Register temp = temps.AcquireW();
2174
2175 __ Ldr(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
2176 __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
2177 if (successor == nullptr) {
2178 __ B(ne, slow_path->GetEntryLabel());
2179 __ Bind(slow_path->GetReturnLabel());
2180 } else {
2181 __ B(eq, codegen_->GetLabelOf(successor));
2182 __ B(slow_path->GetEntryLabel());
2183 // slow_path will return to GetLabelOf(successor).
2184 }
2185 }
2186
InstructionCodeGeneratorARM64(HGraph * graph,CodeGeneratorARM64 * codegen)2187 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
2188 CodeGeneratorARM64* codegen)
2189 : InstructionCodeGenerator(graph, codegen),
2190 assembler_(codegen->GetAssembler()),
2191 codegen_(codegen) {}
2192
HandleBinaryOp(HBinaryOperation * instr)2193 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
2194 DCHECK_EQ(instr->InputCount(), 2U);
2195 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2196 DataType::Type type = instr->GetResultType();
2197 switch (type) {
2198 case DataType::Type::kInt32:
2199 case DataType::Type::kInt64:
2200 locations->SetInAt(0, Location::RequiresRegister());
2201 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
2202 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2203 break;
2204
2205 case DataType::Type::kFloat32:
2206 case DataType::Type::kFloat64:
2207 locations->SetInAt(0, Location::RequiresFpuRegister());
2208 locations->SetInAt(1, Location::RequiresFpuRegister());
2209 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2210 break;
2211
2212 default:
2213 LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
2214 }
2215 }
2216
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2217 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
2218 const FieldInfo& field_info) {
2219 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2220
2221 bool object_field_get_with_read_barrier =
2222 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
2223 LocationSummary* locations =
2224 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2225 object_field_get_with_read_barrier
2226 ? LocationSummary::kCallOnSlowPath
2227 : LocationSummary::kNoCall);
2228 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
2229 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2230 // We need a temporary register for the read barrier load in
2231 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2232 // only if the field is volatile or the offset is too big.
2233 if (field_info.IsVolatile() ||
2234 field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
2235 locations->AddTemp(FixedTempLocation());
2236 }
2237 }
2238 // Input for object receiver.
2239 locations->SetInAt(0, Location::RequiresRegister());
2240 if (DataType::IsFloatingPointType(instruction->GetType())) {
2241 locations->SetOut(Location::RequiresFpuRegister());
2242 } else {
2243 // The output overlaps for an object field get when read barriers
2244 // are enabled: we do not want the load to overwrite the object's
2245 // location, as we need it to emit the read barrier.
2246 locations->SetOut(
2247 Location::RequiresRegister(),
2248 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2249 }
2250 }
2251
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2252 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
2253 const FieldInfo& field_info) {
2254 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2255 LocationSummary* locations = instruction->GetLocations();
2256 uint32_t receiver_input = 0;
2257 Location base_loc = locations->InAt(receiver_input);
2258 Location out = locations->Out();
2259 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
2260 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
2261 DataType::Type load_type = instruction->GetType();
2262 MemOperand field =
2263 HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset());
2264
2265 if (load_type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) {
2266 // Object FieldGet with Baker's read barrier case.
2267 // /* HeapReference<Object> */ out = *(base + offset)
2268 Register base = RegisterFrom(base_loc, DataType::Type::kReference);
2269 Location maybe_temp =
2270 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2271 // Note that potential implicit null checks are handled in this
2272 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
2273 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2274 instruction,
2275 out,
2276 base,
2277 offset,
2278 maybe_temp,
2279 /* needs_null_check= */ true,
2280 field_info.IsVolatile());
2281 } else {
2282 // General case.
2283 if (field_info.IsVolatile()) {
2284 // Note that a potential implicit null check is handled in this
2285 // CodeGeneratorARM64::LoadAcquire call.
2286 // NB: LoadAcquire will record the pc info if needed.
2287 codegen_->LoadAcquire(instruction,
2288 load_type,
2289 OutputCPURegister(instruction),
2290 field,
2291 /* needs_null_check= */ true);
2292 } else {
2293 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2294 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2295 codegen_->Load(load_type, OutputCPURegister(instruction), field);
2296 codegen_->MaybeRecordImplicitNullCheck(instruction);
2297 }
2298 if (load_type == DataType::Type::kReference) {
2299 // If read barriers are enabled, emit read barriers other than
2300 // Baker's using a slow path (and also unpoison the loaded
2301 // reference, if heap poisoning is enabled).
2302 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2303 }
2304 }
2305 }
2306
HandleFieldSet(HInstruction * instruction)2307 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2308 LocationSummary* locations =
2309 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2310 locations->SetInAt(0, Location::RequiresRegister());
2311 HInstruction* value = instruction->InputAt(1);
2312 if (IsZeroBitPattern(value)) {
2313 locations->SetInAt(1, Location::ConstantLocation(value));
2314 } else if (DataType::IsFloatingPointType(value->GetType())) {
2315 locations->SetInAt(1, Location::RequiresFpuRegister());
2316 } else {
2317 locations->SetInAt(1, Location::RequiresRegister());
2318 }
2319 }
2320
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)2321 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2322 const FieldInfo& field_info,
2323 bool value_can_be_null,
2324 WriteBarrierKind write_barrier_kind) {
2325 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2326
2327 Register obj = InputRegisterAt(instruction, 0);
2328 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2329 CPURegister source = value;
2330 Offset offset = field_info.GetFieldOffset();
2331 DataType::Type field_type = field_info.GetFieldType();
2332 {
2333 // We use a block to end the scratch scope before the write barrier, thus
2334 // freeing the temporary registers so they can be used in `MarkGCCard`.
2335 UseScratchRegisterScope temps(GetVIXLAssembler());
2336
2337 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
2338 DCHECK(value.IsW());
2339 Register temp = temps.AcquireW();
2340 __ Mov(temp, value.W());
2341 GetAssembler()->PoisonHeapReference(temp.W());
2342 source = temp;
2343 }
2344
2345 if (field_info.IsVolatile()) {
2346 codegen_->StoreRelease(
2347 instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true);
2348 } else {
2349 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2350 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2351 codegen_->Store(field_type, source, HeapOperand(obj, offset));
2352 codegen_->MaybeRecordImplicitNullCheck(instruction);
2353 }
2354 }
2355
2356 const bool needs_write_barrier =
2357 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
2358
2359 if (needs_write_barrier) {
2360 DCHECK_IMPLIES(Register(value).IsZero(),
2361 write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn);
2362 codegen_->MaybeMarkGCCard(
2363 obj,
2364 Register(value),
2365 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
2366 } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
2367 codegen_->CheckGCCardIsValid(obj);
2368 }
2369 }
2370
HandleBinaryOp(HBinaryOperation * instr)2371 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2372 DataType::Type type = instr->GetType();
2373
2374 switch (type) {
2375 case DataType::Type::kInt32:
2376 case DataType::Type::kInt64: {
2377 Register dst = OutputRegister(instr);
2378 Register lhs = InputRegisterAt(instr, 0);
2379 Operand rhs = InputOperandAt(instr, 1);
2380 if (instr->IsAdd()) {
2381 __ Add(dst, lhs, rhs);
2382 } else if (instr->IsAnd()) {
2383 __ And(dst, lhs, rhs);
2384 } else if (instr->IsOr()) {
2385 __ Orr(dst, lhs, rhs);
2386 } else if (instr->IsSub()) {
2387 __ Sub(dst, lhs, rhs);
2388 } else if (instr->IsRor()) {
2389 if (rhs.IsImmediate()) {
2390 uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2391 __ Ror(dst, lhs, shift);
2392 } else {
2393 // Ensure shift distance is in the same size register as the result. If
2394 // we are rotating a long and the shift comes in a w register originally,
2395 // we don't need to sxtw for use as an x since the shift distances are
2396 // all & reg_bits - 1.
2397 __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2398 }
2399 } else if (instr->IsMin() || instr->IsMax()) {
2400 __ Cmp(lhs, rhs);
2401 __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt);
2402 } else {
2403 DCHECK(instr->IsXor());
2404 __ Eor(dst, lhs, rhs);
2405 }
2406 break;
2407 }
2408 case DataType::Type::kFloat32:
2409 case DataType::Type::kFloat64: {
2410 VRegister dst = OutputFPRegister(instr);
2411 VRegister lhs = InputFPRegisterAt(instr, 0);
2412 VRegister rhs = InputFPRegisterAt(instr, 1);
2413 if (instr->IsAdd()) {
2414 __ Fadd(dst, lhs, rhs);
2415 } else if (instr->IsSub()) {
2416 __ Fsub(dst, lhs, rhs);
2417 } else if (instr->IsMin()) {
2418 __ Fmin(dst, lhs, rhs);
2419 } else if (instr->IsMax()) {
2420 __ Fmax(dst, lhs, rhs);
2421 } else {
2422 LOG(FATAL) << "Unexpected floating-point binary operation";
2423 }
2424 break;
2425 }
2426 default:
2427 LOG(FATAL) << "Unexpected binary operation type " << type;
2428 }
2429 }
2430
HandleShift(HBinaryOperation * instr)2431 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2432 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2433
2434 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2435 DataType::Type type = instr->GetResultType();
2436 switch (type) {
2437 case DataType::Type::kInt32:
2438 case DataType::Type::kInt64: {
2439 locations->SetInAt(0, Location::RequiresRegister());
2440 locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2441 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2442 break;
2443 }
2444 default:
2445 LOG(FATAL) << "Unexpected shift type " << type;
2446 }
2447 }
2448
HandleShift(HBinaryOperation * instr)2449 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2450 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2451
2452 DataType::Type type = instr->GetType();
2453 switch (type) {
2454 case DataType::Type::kInt32:
2455 case DataType::Type::kInt64: {
2456 Register dst = OutputRegister(instr);
2457 Register lhs = InputRegisterAt(instr, 0);
2458 Operand rhs = InputOperandAt(instr, 1);
2459 if (rhs.IsImmediate()) {
2460 uint32_t shift_value = rhs.GetImmediate() &
2461 (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2462 if (instr->IsShl()) {
2463 __ Lsl(dst, lhs, shift_value);
2464 } else if (instr->IsShr()) {
2465 __ Asr(dst, lhs, shift_value);
2466 } else {
2467 __ Lsr(dst, lhs, shift_value);
2468 }
2469 } else {
2470 Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2471
2472 if (instr->IsShl()) {
2473 __ Lsl(dst, lhs, rhs_reg);
2474 } else if (instr->IsShr()) {
2475 __ Asr(dst, lhs, rhs_reg);
2476 } else {
2477 __ Lsr(dst, lhs, rhs_reg);
2478 }
2479 }
2480 break;
2481 }
2482 default:
2483 LOG(FATAL) << "Unexpected shift operation type " << type;
2484 }
2485 }
2486
VisitAdd(HAdd * instruction)2487 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2488 HandleBinaryOp(instruction);
2489 }
2490
VisitAdd(HAdd * instruction)2491 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2492 HandleBinaryOp(instruction);
2493 }
2494
VisitAnd(HAnd * instruction)2495 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2496 HandleBinaryOp(instruction);
2497 }
2498
VisitAnd(HAnd * instruction)2499 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2500 HandleBinaryOp(instruction);
2501 }
2502
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2503 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2504 DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType();
2505 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2506 locations->SetInAt(0, Location::RequiresRegister());
2507 // There is no immediate variant of negated bitwise instructions in AArch64.
2508 locations->SetInAt(1, Location::RequiresRegister());
2509 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2510 }
2511
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2512 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2513 Register dst = OutputRegister(instr);
2514 Register lhs = InputRegisterAt(instr, 0);
2515 Register rhs = InputRegisterAt(instr, 1);
2516
2517 switch (instr->GetOpKind()) {
2518 case HInstruction::kAnd:
2519 __ Bic(dst, lhs, rhs);
2520 break;
2521 case HInstruction::kOr:
2522 __ Orn(dst, lhs, rhs);
2523 break;
2524 case HInstruction::kXor:
2525 __ Eon(dst, lhs, rhs);
2526 break;
2527 default:
2528 LOG(FATAL) << "Unreachable";
2529 }
2530 }
2531
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2532 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2533 HDataProcWithShifterOp* instruction) {
2534 DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
2535 instruction->GetType() == DataType::Type::kInt64);
2536 LocationSummary* locations =
2537 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2538 if (instruction->GetInstrKind() == HInstruction::kNeg) {
2539 locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)));
2540 } else {
2541 locations->SetInAt(0, Location::RequiresRegister());
2542 }
2543 locations->SetInAt(1, Location::RequiresRegister());
2544 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2545 }
2546
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2547 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2548 HDataProcWithShifterOp* instruction) {
2549 DataType::Type type = instruction->GetType();
2550 HInstruction::InstructionKind kind = instruction->GetInstrKind();
2551 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
2552 Register out = OutputRegister(instruction);
2553 Register left;
2554 if (kind != HInstruction::kNeg) {
2555 left = InputRegisterAt(instruction, 0);
2556 }
2557 // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2558 // shifter operand operation, the IR generating `right_reg` (input to the type
2559 // conversion) can have a different type from the current instruction's type,
2560 // so we manually indicate the type.
2561 Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2562 Operand right_operand(0);
2563
2564 HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2565 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2566 right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2567 } else {
2568 right_operand = Operand(right_reg,
2569 helpers::ShiftFromOpKind(op_kind),
2570 instruction->GetShiftAmount());
2571 }
2572
2573 // Logical binary operations do not support extension operations in the
2574 // operand. Note that VIXL would still manage if it was passed by generating
2575 // the extension as a separate instruction.
2576 // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2577 DCHECK_IMPLIES(right_operand.IsExtendedRegister(),
2578 kind != HInstruction::kAnd && kind != HInstruction::kOr &&
2579 kind != HInstruction::kXor && kind != HInstruction::kNeg);
2580 switch (kind) {
2581 case HInstruction::kAdd:
2582 __ Add(out, left, right_operand);
2583 break;
2584 case HInstruction::kAnd:
2585 __ And(out, left, right_operand);
2586 break;
2587 case HInstruction::kNeg:
2588 DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2589 __ Neg(out, right_operand);
2590 break;
2591 case HInstruction::kOr:
2592 __ Orr(out, left, right_operand);
2593 break;
2594 case HInstruction::kSub:
2595 __ Sub(out, left, right_operand);
2596 break;
2597 case HInstruction::kXor:
2598 __ Eor(out, left, right_operand);
2599 break;
2600 default:
2601 LOG(FATAL) << "Unexpected operation kind: " << kind;
2602 UNREACHABLE();
2603 }
2604 }
2605
VisitIntermediateAddress(HIntermediateAddress * instruction)2606 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2607 LocationSummary* locations =
2608 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2609 locations->SetInAt(0, Location::RequiresRegister());
2610 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2611 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2612 }
2613
VisitIntermediateAddress(HIntermediateAddress * instruction)2614 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2615 __ Add(OutputRegister(instruction),
2616 InputRegisterAt(instruction, 0),
2617 Operand(InputOperandAt(instruction, 1)));
2618 }
2619
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2620 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
2621 LocationSummary* locations =
2622 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2623
2624 HIntConstant* shift = instruction->GetShift()->AsIntConstant();
2625
2626 locations->SetInAt(0, Location::RequiresRegister());
2627 // For byte case we don't need to shift the index variable so we can encode the data offset into
2628 // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
2629 // data offset constant generation out of the loop and reduce the critical path length in the
2630 // loop.
2631 locations->SetInAt(1, shift->GetValue() == 0
2632 ? Location::ConstantLocation(instruction->GetOffset())
2633 : Location::RequiresRegister());
2634 locations->SetInAt(2, Location::ConstantLocation(shift));
2635 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2636 }
2637
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2638 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
2639 HIntermediateAddressIndex* instruction) {
2640 Register index_reg = InputRegisterAt(instruction, 0);
2641 uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2));
2642 uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
2643
2644 if (shift == 0) {
2645 __ Add(OutputRegister(instruction), index_reg, offset);
2646 } else {
2647 Register offset_reg = InputRegisterAt(instruction, 1);
2648 __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
2649 }
2650 }
2651
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2652 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2653 LocationSummary* locations =
2654 new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
2655 HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2656 if (instr->GetOpKind() == HInstruction::kSub &&
2657 accumulator->IsConstant() &&
2658 accumulator->AsConstant()->IsArithmeticZero()) {
2659 // Don't allocate register for Mneg instruction.
2660 } else {
2661 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2662 Location::RequiresRegister());
2663 }
2664 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2665 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2666 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2667 }
2668
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2669 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2670 Register res = OutputRegister(instr);
2671 Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2672 Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2673
2674 // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2675 // This fixup should be carried out for all multiply-accumulate instructions:
2676 // madd, msub, smaddl, smsubl, umaddl and umsubl.
2677 if (instr->GetType() == DataType::Type::kInt64 &&
2678 codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2679 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2680 ptrdiff_t off = masm->GetCursorOffset();
2681 if (off >= static_cast<ptrdiff_t>(kInstructionSize) &&
2682 masm->GetInstructionAt(off - static_cast<ptrdiff_t>(kInstructionSize))->IsLoadOrStore()) {
2683 // Make sure we emit only exactly one nop.
2684 ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2685 __ nop();
2686 }
2687 }
2688
2689 if (instr->GetOpKind() == HInstruction::kAdd) {
2690 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2691 __ Madd(res, mul_left, mul_right, accumulator);
2692 } else {
2693 DCHECK(instr->GetOpKind() == HInstruction::kSub);
2694 HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2695 if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2696 __ Mneg(res, mul_left, mul_right);
2697 } else {
2698 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2699 __ Msub(res, mul_left, mul_right, accumulator);
2700 }
2701 }
2702 }
2703
VisitArrayGet(HArrayGet * instruction)2704 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2705 bool object_array_get_with_read_barrier =
2706 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
2707 LocationSummary* locations =
2708 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2709 object_array_get_with_read_barrier
2710 ? LocationSummary::kCallOnSlowPath
2711 : LocationSummary::kNoCall);
2712 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2713 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2714 if (instruction->GetIndex()->IsConstant()) {
2715 // Array loads with constant index are treated as field loads.
2716 // We need a temporary register for the read barrier load in
2717 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2718 // only if the offset is too big.
2719 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2720 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2721 offset += index << DataType::SizeShift(DataType::Type::kReference);
2722 if (offset >= kReferenceLoadMinFarOffset) {
2723 locations->AddTemp(FixedTempLocation());
2724 }
2725 } else if (!instruction->GetArray()->IsIntermediateAddress()) {
2726 // We need a non-scratch temporary for the array data pointer in
2727 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no
2728 // intermediate address.
2729 locations->AddTemp(Location::RequiresRegister());
2730 }
2731 }
2732 locations->SetInAt(0, Location::RequiresRegister());
2733 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2734 if (DataType::IsFloatingPointType(instruction->GetType())) {
2735 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2736 } else {
2737 // The output overlaps in the case of an object array get with
2738 // read barriers enabled: we do not want the move to overwrite the
2739 // array's location, as we need it to emit the read barrier.
2740 locations->SetOut(
2741 Location::RequiresRegister(),
2742 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2743 }
2744 }
2745
VisitArrayGet(HArrayGet * instruction)2746 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2747 DataType::Type type = instruction->GetType();
2748 Register obj = InputRegisterAt(instruction, 0);
2749 LocationSummary* locations = instruction->GetLocations();
2750 Location index = locations->InAt(1);
2751 Location out = locations->Out();
2752 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2753 const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2754 instruction->IsStringCharAt();
2755 MacroAssembler* masm = GetVIXLAssembler();
2756 UseScratchRegisterScope temps(masm);
2757
2758 // The non-Baker read barrier instrumentation of object ArrayGet instructions
2759 // does not support the HIntermediateAddress instruction.
2760 DCHECK(!((type == DataType::Type::kReference) &&
2761 instruction->GetArray()->IsIntermediateAddress() &&
2762 codegen_->EmitNonBakerReadBarrier()));
2763
2764 if (type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) {
2765 // Object ArrayGet with Baker's read barrier case.
2766 // Note that a potential implicit null check is handled in the
2767 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2768 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
2769 if (index.IsConstant()) {
2770 DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2771 // Array load with a constant index can be treated as a field load.
2772 offset += Int64FromLocation(index) << DataType::SizeShift(type);
2773 Location maybe_temp =
2774 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2775 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2776 out,
2777 obj.W(),
2778 offset,
2779 maybe_temp,
2780 /* needs_null_check= */ false,
2781 /* use_load_acquire= */ false);
2782 } else {
2783 codegen_->GenerateArrayLoadWithBakerReadBarrier(
2784 instruction, out, obj.W(), offset, index, /* needs_null_check= */ false);
2785 }
2786 } else {
2787 // General case.
2788 MemOperand source = HeapOperand(obj);
2789 Register length;
2790 if (maybe_compressed_char_at) {
2791 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2792 length = temps.AcquireW();
2793 {
2794 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2795 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2796
2797 if (instruction->GetArray()->IsIntermediateAddress()) {
2798 DCHECK_LT(count_offset, offset);
2799 int64_t adjusted_offset =
2800 static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2801 // Note that `adjusted_offset` is negative, so this will be a LDUR.
2802 __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2803 } else {
2804 __ Ldr(length, HeapOperand(obj, count_offset));
2805 }
2806 codegen_->MaybeRecordImplicitNullCheck(instruction);
2807 }
2808 }
2809 if (index.IsConstant()) {
2810 if (maybe_compressed_char_at) {
2811 vixl::aarch64::Label uncompressed_load, done;
2812 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2813 "Expecting 0=compressed, 1=uncompressed");
2814 __ Tbnz(length.W(), 0, &uncompressed_load);
2815 __ Ldrb(Register(OutputCPURegister(instruction)),
2816 HeapOperand(obj, offset + Int64FromLocation(index)));
2817 __ B(&done);
2818 __ Bind(&uncompressed_load);
2819 __ Ldrh(Register(OutputCPURegister(instruction)),
2820 HeapOperand(obj, offset + (Int64FromLocation(index) << 1)));
2821 __ Bind(&done);
2822 } else {
2823 offset += Int64FromLocation(index) << DataType::SizeShift(type);
2824 source = HeapOperand(obj, offset);
2825 }
2826 } else {
2827 Register temp = temps.AcquireSameSizeAs(obj);
2828 if (instruction->GetArray()->IsIntermediateAddress()) {
2829 // We do not need to compute the intermediate address from the array: the
2830 // input instruction has done it already. See the comment in
2831 // `TryExtractArrayAccessAddress()`.
2832 if (kIsDebugBuild) {
2833 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2834 DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2835 }
2836 temp = obj;
2837 } else {
2838 __ Add(temp, obj, offset);
2839 }
2840 if (maybe_compressed_char_at) {
2841 vixl::aarch64::Label uncompressed_load, done;
2842 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2843 "Expecting 0=compressed, 1=uncompressed");
2844 __ Tbnz(length.W(), 0, &uncompressed_load);
2845 __ Ldrb(Register(OutputCPURegister(instruction)),
2846 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2847 __ B(&done);
2848 __ Bind(&uncompressed_load);
2849 __ Ldrh(Register(OutputCPURegister(instruction)),
2850 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2851 __ Bind(&done);
2852 } else {
2853 source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type));
2854 }
2855 }
2856 if (!maybe_compressed_char_at) {
2857 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2858 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2859 codegen_->Load(type, OutputCPURegister(instruction), source);
2860 codegen_->MaybeRecordImplicitNullCheck(instruction);
2861 }
2862
2863 if (type == DataType::Type::kReference) {
2864 static_assert(
2865 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2866 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2867 Location obj_loc = locations->InAt(0);
2868 if (index.IsConstant()) {
2869 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2870 } else {
2871 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2872 }
2873 }
2874 }
2875 }
2876
VisitArrayLength(HArrayLength * instruction)2877 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2878 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2879 locations->SetInAt(0, Location::RequiresRegister());
2880 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2881 }
2882
VisitArrayLength(HArrayLength * instruction)2883 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2884 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2885 vixl::aarch64::Register out = OutputRegister(instruction);
2886 {
2887 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2888 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2889 __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2890 codegen_->MaybeRecordImplicitNullCheck(instruction);
2891 }
2892 // Mask out compression flag from String's array length.
2893 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2894 __ Lsr(out.W(), out.W(), 1u);
2895 }
2896 }
2897
VisitArraySet(HArraySet * instruction)2898 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2899 DataType::Type value_type = instruction->GetComponentType();
2900
2901 bool needs_type_check = instruction->NeedsTypeCheck();
2902 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
2903 instruction,
2904 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
2905 locations->SetInAt(0, Location::RequiresRegister());
2906 locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetIndex()));
2907 HInstruction* value = instruction->GetValue();
2908 if (IsZeroBitPattern(value)) {
2909 locations->SetInAt(2, Location::ConstantLocation(value));
2910 } else if (DataType::IsFloatingPointType(value_type)) {
2911 locations->SetInAt(2, Location::RequiresFpuRegister());
2912 } else {
2913 locations->SetInAt(2, Location::RequiresRegister());
2914 }
2915 }
2916
VisitArraySet(HArraySet * instruction)2917 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2918 DataType::Type value_type = instruction->GetComponentType();
2919 LocationSummary* locations = instruction->GetLocations();
2920 bool needs_type_check = instruction->NeedsTypeCheck();
2921 const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
2922 bool needs_write_barrier =
2923 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
2924
2925 Register array = InputRegisterAt(instruction, 0);
2926 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2927 CPURegister source = value;
2928 Location index = locations->InAt(1);
2929 size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
2930 MemOperand destination = HeapOperand(array);
2931 MacroAssembler* masm = GetVIXLAssembler();
2932
2933 if (!needs_write_barrier) {
2934 if (codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind)) {
2935 codegen_->CheckGCCardIsValid(array);
2936 }
2937
2938 DCHECK(!needs_type_check);
2939 UseScratchRegisterScope temps(masm);
2940 if (index.IsConstant()) {
2941 offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2942 destination = HeapOperand(array, offset);
2943 } else {
2944 Register temp_dest = temps.AcquireSameSizeAs(array);
2945 if (instruction->GetArray()->IsIntermediateAddress()) {
2946 // We do not need to compute the intermediate address from the array: the
2947 // input instruction has done it already. See the comment in
2948 // `TryExtractArrayAccessAddress()`.
2949 if (kIsDebugBuild) {
2950 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2951 DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2952 }
2953 temp_dest = array;
2954 } else {
2955 __ Add(temp_dest, array, offset);
2956 }
2957 destination = HeapOperand(temp_dest,
2958 XRegisterFrom(index),
2959 LSL,
2960 DataType::SizeShift(value_type));
2961 }
2962
2963 if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
2964 DCHECK(value.IsW());
2965 Register temp_src = temps.AcquireW();
2966 __ Mov(temp_src, value.W());
2967 GetAssembler()->PoisonHeapReference(temp_src.W());
2968 source = temp_src;
2969 }
2970
2971 {
2972 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2973 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2974 codegen_->Store(value_type, source, destination);
2975 codegen_->MaybeRecordImplicitNullCheck(instruction);
2976 }
2977 } else {
2978 DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2979 bool can_value_be_null = true;
2980 // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
2981 // value is null (without an extra CompareAndBranchIfZero since we already checked if the
2982 // value is null for the type check).
2983 bool skip_marking_gc_card = false;
2984 SlowPathCodeARM64* slow_path = nullptr;
2985 vixl::aarch64::Label skip_writing_card;
2986 if (!Register(value).IsZero()) {
2987 can_value_be_null = instruction->GetValueCanBeNull();
2988 skip_marking_gc_card =
2989 can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
2990 vixl::aarch64::Label do_store;
2991 if (can_value_be_null) {
2992 if (skip_marking_gc_card) {
2993 __ Cbz(Register(value), &skip_writing_card);
2994 } else {
2995 __ Cbz(Register(value), &do_store);
2996 }
2997 }
2998
2999 if (needs_type_check) {
3000 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
3001 codegen_->AddSlowPath(slow_path);
3002
3003 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3004 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3005 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3006
3007 UseScratchRegisterScope temps(masm);
3008 Register temp = temps.AcquireSameSizeAs(array);
3009 Register temp2 = temps.AcquireSameSizeAs(array);
3010
3011 // Note that when Baker read barriers are enabled, the type
3012 // checks are performed without read barriers. This is fine,
3013 // even in the case where a class object is in the from-space
3014 // after the flip, as a comparison involving such a type would
3015 // not produce a false positive; it may of course produce a
3016 // false negative, in which case we would take the ArraySet
3017 // slow path.
3018
3019 // /* HeapReference<Class> */ temp = array->klass_
3020 {
3021 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
3022 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3023 __ Ldr(temp, HeapOperand(array, class_offset));
3024 codegen_->MaybeRecordImplicitNullCheck(instruction);
3025 }
3026 GetAssembler()->MaybeUnpoisonHeapReference(temp);
3027
3028 // /* HeapReference<Class> */ temp = temp->component_type_
3029 __ Ldr(temp, HeapOperand(temp, component_offset));
3030 // /* HeapReference<Class> */ temp2 = value->klass_
3031 __ Ldr(temp2, HeapOperand(Register(value), class_offset));
3032 // If heap poisoning is enabled, no need to unpoison `temp`
3033 // nor `temp2`, as we are comparing two poisoned references.
3034 __ Cmp(temp, temp2);
3035
3036 if (instruction->StaticTypeOfArrayIsObjectArray()) {
3037 vixl::aarch64::Label do_put;
3038 __ B(eq, &do_put);
3039 // If heap poisoning is enabled, the `temp` reference has
3040 // not been unpoisoned yet; unpoison it now.
3041 GetAssembler()->MaybeUnpoisonHeapReference(temp);
3042
3043 // /* HeapReference<Class> */ temp = temp->super_class_
3044 __ Ldr(temp, HeapOperand(temp, super_offset));
3045 // If heap poisoning is enabled, no need to unpoison
3046 // `temp`, as we are comparing against null below.
3047 __ Cbnz(temp, slow_path->GetEntryLabel());
3048 __ Bind(&do_put);
3049 } else {
3050 __ B(ne, slow_path->GetEntryLabel());
3051 }
3052 }
3053
3054 if (can_value_be_null && !skip_marking_gc_card) {
3055 DCHECK(do_store.IsLinked());
3056 __ Bind(&do_store);
3057 }
3058 }
3059
3060 DCHECK_NE(write_barrier_kind, WriteBarrierKind::kDontEmit);
3061 DCHECK_IMPLIES(Register(value).IsZero(),
3062 write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn);
3063 codegen_->MarkGCCard(array);
3064
3065 if (skip_marking_gc_card) {
3066 // Note that we don't check that the GC card is valid as it can be correctly clean.
3067 DCHECK(skip_writing_card.IsLinked());
3068 __ Bind(&skip_writing_card);
3069 }
3070
3071 UseScratchRegisterScope temps(masm);
3072 if (kPoisonHeapReferences) {
3073 DCHECK(value.IsW());
3074 Register temp_source = temps.AcquireW();
3075 __ Mov(temp_source, value.W());
3076 GetAssembler()->PoisonHeapReference(temp_source);
3077 source = temp_source;
3078 }
3079
3080 if (index.IsConstant()) {
3081 offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
3082 destination = HeapOperand(array, offset);
3083 } else {
3084 Register temp_base = temps.AcquireSameSizeAs(array);
3085 __ Add(temp_base, array, offset);
3086 destination = HeapOperand(temp_base,
3087 XRegisterFrom(index),
3088 LSL,
3089 DataType::SizeShift(value_type));
3090 }
3091
3092 {
3093 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
3094 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3095 __ Str(source, destination);
3096
3097 if (can_value_be_null || !needs_type_check) {
3098 codegen_->MaybeRecordImplicitNullCheck(instruction);
3099 }
3100 }
3101
3102 if (slow_path != nullptr) {
3103 __ Bind(slow_path->GetExitLabel());
3104 }
3105 }
3106 }
3107
VisitBoundsCheck(HBoundsCheck * instruction)3108 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3109 RegisterSet caller_saves = RegisterSet::Empty();
3110 InvokeRuntimeCallingConvention calling_convention;
3111 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3112 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
3113 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
3114
3115 // If both index and length are constant, we can check the bounds statically and
3116 // generate code accordingly. We want to make sure we generate constant locations
3117 // in that case, regardless of whether they are encodable in the comparison or not.
3118 HInstruction* index = instruction->InputAt(0);
3119 HInstruction* length = instruction->InputAt(1);
3120 bool both_const = index->IsConstant() && length->IsConstant();
3121 locations->SetInAt(0, both_const
3122 ? Location::ConstantLocation(index)
3123 : ARM64EncodableConstantOrRegister(index, instruction));
3124 locations->SetInAt(1, both_const
3125 ? Location::ConstantLocation(length)
3126 : ARM64EncodableConstantOrRegister(length, instruction));
3127 }
3128
VisitBoundsCheck(HBoundsCheck * instruction)3129 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3130 LocationSummary* locations = instruction->GetLocations();
3131 Location index_loc = locations->InAt(0);
3132 Location length_loc = locations->InAt(1);
3133
3134 int cmp_first_input = 0;
3135 int cmp_second_input = 1;
3136 Condition cond = hs;
3137
3138 if (index_loc.IsConstant()) {
3139 int64_t index = Int64FromLocation(index_loc);
3140 if (length_loc.IsConstant()) {
3141 int64_t length = Int64FromLocation(length_loc);
3142 if (index < 0 || index >= length) {
3143 BoundsCheckSlowPathARM64* slow_path =
3144 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
3145 codegen_->AddSlowPath(slow_path);
3146 __ B(slow_path->GetEntryLabel());
3147 } else {
3148 // BCE will remove the bounds check if we are guaranteed to pass.
3149 // However, some optimization after BCE may have generated this, and we should not
3150 // generate a bounds check if it is a valid range.
3151 }
3152 return;
3153 }
3154 // Only the index is constant: change the order of the operands and commute the condition
3155 // so we can use an immediate constant for the index (only the second input to a cmp
3156 // instruction can be an immediate).
3157 cmp_first_input = 1;
3158 cmp_second_input = 0;
3159 cond = ls;
3160 }
3161 BoundsCheckSlowPathARM64* slow_path =
3162 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
3163 __ Cmp(InputRegisterAt(instruction, cmp_first_input),
3164 InputOperandAt(instruction, cmp_second_input));
3165 codegen_->AddSlowPath(slow_path);
3166 __ B(slow_path->GetEntryLabel(), cond);
3167 }
3168
VisitClinitCheck(HClinitCheck * check)3169 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
3170 LocationSummary* locations =
3171 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
3172 locations->SetInAt(0, Location::RequiresRegister());
3173 if (check->HasUses()) {
3174 locations->SetOut(Location::SameAsFirstInput());
3175 }
3176 // Rely on the type initialization to save everything we need.
3177 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
3178 }
3179
VisitClinitCheck(HClinitCheck * check)3180 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
3181 // We assume the class is not null.
3182 SlowPathCodeARM64* slow_path =
3183 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check);
3184 codegen_->AddSlowPath(slow_path);
3185 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
3186 }
3187
IsFloatingPointZeroConstant(HInstruction * inst)3188 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
3189 return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
3190 || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
3191 }
3192
GenerateFcmp(HInstruction * instruction)3193 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
3194 VRegister lhs_reg = InputFPRegisterAt(instruction, 0);
3195 Location rhs_loc = instruction->GetLocations()->InAt(1);
3196 if (rhs_loc.IsConstant()) {
3197 // 0.0 is the only immediate that can be encoded directly in
3198 // an FCMP instruction.
3199 //
3200 // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
3201 // specify that in a floating-point comparison, positive zero
3202 // and negative zero are considered equal, so we can use the
3203 // literal 0.0 for both cases here.
3204 //
3205 // Note however that some methods (Float.equal, Float.compare,
3206 // Float.compareTo, Double.equal, Double.compare,
3207 // Double.compareTo, Math.max, Math.min, StrictMath.max,
3208 // StrictMath.min) consider 0.0 to be (strictly) greater than
3209 // -0.0. So if we ever translate calls to these methods into a
3210 // HCompare instruction, we must handle the -0.0 case with
3211 // care here.
3212 DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
3213 __ Fcmp(lhs_reg, 0.0);
3214 } else {
3215 __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
3216 }
3217 }
3218
VisitCompare(HCompare * compare)3219 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
3220 LocationSummary* locations =
3221 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
3222 DataType::Type in_type = compare->InputAt(0)->GetType();
3223 HInstruction* rhs = compare->InputAt(1);
3224 switch (in_type) {
3225 case DataType::Type::kBool:
3226 case DataType::Type::kUint8:
3227 case DataType::Type::kInt8:
3228 case DataType::Type::kUint16:
3229 case DataType::Type::kInt16:
3230 case DataType::Type::kInt32:
3231 case DataType::Type::kInt64: {
3232 locations->SetInAt(0, Location::RequiresRegister());
3233 locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, compare));
3234 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3235 break;
3236 }
3237 case DataType::Type::kFloat32:
3238 case DataType::Type::kFloat64: {
3239 locations->SetInAt(0, Location::RequiresFpuRegister());
3240 locations->SetInAt(1,
3241 IsFloatingPointZeroConstant(rhs)
3242 ? Location::ConstantLocation(rhs)
3243 : Location::RequiresFpuRegister());
3244 locations->SetOut(Location::RequiresRegister());
3245 break;
3246 }
3247 default:
3248 LOG(FATAL) << "Unexpected type for compare operation " << in_type;
3249 }
3250 }
3251
VisitCompare(HCompare * compare)3252 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
3253 DataType::Type in_type = compare->InputAt(0)->GetType();
3254
3255 // 0 if: left == right
3256 // 1 if: left > right
3257 // -1 if: left < right
3258 switch (in_type) {
3259 case DataType::Type::kBool:
3260 case DataType::Type::kUint8:
3261 case DataType::Type::kInt8:
3262 case DataType::Type::kUint16:
3263 case DataType::Type::kInt16:
3264 case DataType::Type::kInt32:
3265 case DataType::Type::kInt64: {
3266 Register result = OutputRegister(compare);
3267 Register left = InputRegisterAt(compare, 0);
3268 Operand right = InputOperandAt(compare, 1);
3269 __ Cmp(left, right);
3270 __ Cset(result, ne); // result == +1 if NE or 0 otherwise
3271 __ Cneg(result, result, lt); // result == -1 if LT or unchanged otherwise
3272 break;
3273 }
3274 case DataType::Type::kFloat32:
3275 case DataType::Type::kFloat64: {
3276 Register result = OutputRegister(compare);
3277 GenerateFcmp(compare);
3278 __ Cset(result, ne);
3279 __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
3280 break;
3281 }
3282 default:
3283 LOG(FATAL) << "Unimplemented compare type " << in_type;
3284 }
3285 }
3286
HandleCondition(HCondition * instruction)3287 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
3288 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
3289
3290 HInstruction* rhs = instruction->InputAt(1);
3291 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3292 locations->SetInAt(0, Location::RequiresFpuRegister());
3293 locations->SetInAt(1,
3294 IsFloatingPointZeroConstant(rhs)
3295 ? Location::ConstantLocation(rhs)
3296 : Location::RequiresFpuRegister());
3297 } else {
3298 // Integer cases.
3299 locations->SetInAt(0, Location::RequiresRegister());
3300 locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, instruction));
3301 }
3302
3303 if (!instruction->IsEmittedAtUseSite()) {
3304 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3305 }
3306 }
3307
HandleCondition(HCondition * instruction)3308 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
3309 if (instruction->IsEmittedAtUseSite()) {
3310 return;
3311 }
3312
3313 LocationSummary* locations = instruction->GetLocations();
3314 Register res = RegisterFrom(locations->Out(), instruction->GetType());
3315 IfCondition if_cond = instruction->GetCondition();
3316
3317 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3318 GenerateFcmp(instruction);
3319 __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
3320 } else {
3321 // Integer cases.
3322 Register lhs = InputRegisterAt(instruction, 0);
3323 Operand rhs = InputOperandAt(instruction, 1);
3324 __ Cmp(lhs, rhs);
3325 __ Cset(res, ARM64Condition(if_cond));
3326 }
3327 }
3328
3329 #define FOR_EACH_CONDITION_INSTRUCTION(M) \
3330 M(Equal) \
3331 M(NotEqual) \
3332 M(LessThan) \
3333 M(LessThanOrEqual) \
3334 M(GreaterThan) \
3335 M(GreaterThanOrEqual) \
3336 M(Below) \
3337 M(BelowOrEqual) \
3338 M(Above) \
3339 M(AboveOrEqual)
3340 #define DEFINE_CONDITION_VISITORS(Name) \
3341 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } \
3342 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)3343 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3344 #undef DEFINE_CONDITION_VISITORS
3345 #undef FOR_EACH_CONDITION_INSTRUCTION
3346
3347 void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) {
3348 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3349 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3350 DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
3351
3352 Register out = OutputRegister(instruction);
3353 Register dividend = InputRegisterAt(instruction, 0);
3354
3355 Register final_dividend;
3356 if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
3357 // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
3358 // NOTE: The generated code for HDiv correctly works for the INT32_MIN/INT64_MIN dividends:
3359 // imm == 2
3360 // add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
3361 // asr out, out(0x80000001), #1 => out = 0xc0000000
3362 // This is the same as 'asr out, 0x80000000, #1'
3363 //
3364 // imm > 2
3365 // add temp, dividend(0x80000000), imm - 1 => temp = 0b10..01..1, where the number
3366 // of the rightmost 1s is ctz_imm.
3367 // cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3368 // csel out, temp(0b10..01..1), dividend(0x80000000), lt => out = 0b10..01..1
3369 // asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
3370 // leftmost 1s is ctz_imm + 1.
3371 // This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
3372 //
3373 // imm == INT32_MIN
3374 // add tmp, dividend(0x80000000), #0x7fffffff => tmp = -1
3375 // cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3376 // csel out, temp(-1), dividend(0x80000000), lt => out = -1
3377 // neg out, out(-1), asr #31 => out = 1
3378 // This is the same as 'neg out, dividend(0x80000000), asr #31'.
3379 final_dividend = dividend;
3380 } else {
3381 if (abs_imm == 2) {
3382 int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte;
3383 __ Add(out, dividend, Operand(dividend, LSR, bits - 1));
3384 } else {
3385 UseScratchRegisterScope temps(GetVIXLAssembler());
3386 Register temp = temps.AcquireSameSizeAs(out);
3387 __ Add(temp, dividend, abs_imm - 1);
3388 __ Cmp(dividend, 0);
3389 __ Csel(out, temp, dividend, lt);
3390 }
3391 final_dividend = out;
3392 }
3393
3394 int ctz_imm = CTZ(abs_imm);
3395 if (imm > 0) {
3396 __ Asr(out, final_dividend, ctz_imm);
3397 } else {
3398 __ Neg(out, Operand(final_dividend, ASR, ctz_imm));
3399 }
3400 }
3401
3402 // Return true if the magic number was modified by subtracting 2^32(Int32 div) or 2^64(Int64 div).
3403 // So dividend needs to be added.
NeedToAddDividend(int64_t magic_number,int64_t divisor)3404 static inline bool NeedToAddDividend(int64_t magic_number, int64_t divisor) {
3405 return divisor > 0 && magic_number < 0;
3406 }
3407
3408 // Return true if the magic number was modified by adding 2^32(Int32 div) or 2^64(Int64 div).
3409 // So dividend needs to be subtracted.
NeedToSubDividend(int64_t magic_number,int64_t divisor)3410 static inline bool NeedToSubDividend(int64_t magic_number, int64_t divisor) {
3411 return divisor < 0 && magic_number > 0;
3412 }
3413
3414 // Generate code which increments the value in register 'in' by 1 if the value is negative.
3415 // It is done with 'add out, in, in, lsr #31 or #63'.
3416 // If the value is a result of an operation setting the N flag, CINC MI can be used
3417 // instead of ADD. 'use_cond_inc' controls this.
GenerateIncrementNegativeByOne(Register out,Register in,bool use_cond_inc)3418 void InstructionCodeGeneratorARM64::GenerateIncrementNegativeByOne(
3419 Register out,
3420 Register in,
3421 bool use_cond_inc) {
3422 if (use_cond_inc) {
3423 __ Cinc(out, in, mi);
3424 } else {
3425 __ Add(out, in, Operand(in, LSR, in.GetSizeInBits() - 1));
3426 }
3427 }
3428
3429 // Helper to generate code producing the result of HRem with a constant divisor.
GenerateResultRemWithAnyConstant(Register out,Register dividend,Register quotient,int64_t divisor,UseScratchRegisterScope * temps_scope)3430 void InstructionCodeGeneratorARM64::GenerateResultRemWithAnyConstant(
3431 Register out,
3432 Register dividend,
3433 Register quotient,
3434 int64_t divisor,
3435 UseScratchRegisterScope* temps_scope) {
3436 Register temp_imm = temps_scope->AcquireSameSizeAs(out);
3437 __ Mov(temp_imm, divisor);
3438 __ Msub(out, quotient, temp_imm, dividend);
3439 }
3440
3441 // Helper to generate code for HDiv/HRem instructions when a dividend is non-negative and
3442 // a divisor is a positive constant, not power of 2.
GenerateInt64UnsignedDivRemWithAnyPositiveConstant(HBinaryOperation * instruction)3443 void InstructionCodeGeneratorARM64::GenerateInt64UnsignedDivRemWithAnyPositiveConstant(
3444 HBinaryOperation* instruction) {
3445 DCHECK(instruction->IsDiv() || instruction->IsRem());
3446 DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
3447
3448 LocationSummary* locations = instruction->GetLocations();
3449 Location second = locations->InAt(1);
3450 DCHECK(second.IsConstant());
3451
3452 Register out = OutputRegister(instruction);
3453 Register dividend = InputRegisterAt(instruction, 0);
3454 int64_t imm = Int64FromConstant(second.GetConstant());
3455 DCHECK_GT(imm, 0);
3456
3457 int64_t magic;
3458 int shift;
3459 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
3460
3461 UseScratchRegisterScope temps(GetVIXLAssembler());
3462 Register temp = temps.AcquireSameSizeAs(out);
3463
3464 auto generate_unsigned_div_code = [this, magic, shift](Register out,
3465 Register dividend,
3466 Register temp) {
3467 // temp = get_high(dividend * magic)
3468 __ Mov(temp, magic);
3469 if (magic > 0 && shift == 0) {
3470 __ Smulh(out, dividend, temp);
3471 } else {
3472 __ Smulh(temp, dividend, temp);
3473 if (magic < 0) {
3474 // The negative magic means that the multiplier m is greater than INT64_MAX.
3475 // In such a case shift is never 0. See the proof in
3476 // InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant.
3477 __ Add(temp, temp, dividend);
3478 }
3479 DCHECK_NE(shift, 0);
3480 __ Lsr(out, temp, shift);
3481 }
3482 };
3483
3484 if (instruction->IsDiv()) {
3485 generate_unsigned_div_code(out, dividend, temp);
3486 } else {
3487 generate_unsigned_div_code(temp, dividend, temp);
3488 GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3489 }
3490 }
3491
3492 // Helper to generate code for HDiv/HRem instructions for any dividend and a constant divisor
3493 // (not power of 2).
GenerateInt64DivRemWithAnyConstant(HBinaryOperation * instruction)3494 void InstructionCodeGeneratorARM64::GenerateInt64DivRemWithAnyConstant(
3495 HBinaryOperation* instruction) {
3496 DCHECK(instruction->IsDiv() || instruction->IsRem());
3497 DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
3498
3499 LocationSummary* locations = instruction->GetLocations();
3500 Location second = locations->InAt(1);
3501 DCHECK(second.IsConstant());
3502
3503 Register out = OutputRegister(instruction);
3504 Register dividend = InputRegisterAt(instruction, 0);
3505 int64_t imm = Int64FromConstant(second.GetConstant());
3506
3507 int64_t magic;
3508 int shift;
3509 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
3510
3511 UseScratchRegisterScope temps(GetVIXLAssembler());
3512 Register temp = temps.AcquireSameSizeAs(out);
3513
3514 // temp = get_high(dividend * magic)
3515 __ Mov(temp, magic);
3516 __ Smulh(temp, dividend, temp);
3517
3518 // The multiplication result might need some corrections to be finalized.
3519 // The last correction is to increment by 1, if the result is negative.
3520 // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3521 // Such ADD usually has latency 2, e.g. on Cortex-A55.
3522 // However if one of the corrections is ADD or SUB, the sign can be detected
3523 // with ADDS/SUBS. They set the N flag if the result is negative.
3524 // This allows to use CINC MI which has latency 1.
3525 bool use_cond_inc = false;
3526
3527 // Some combinations of magic_number and the divisor require to correct the result.
3528 // Check whether the correction is needed.
3529 if (NeedToAddDividend(magic, imm)) {
3530 __ Adds(temp, temp, dividend);
3531 use_cond_inc = true;
3532 } else if (NeedToSubDividend(magic, imm)) {
3533 __ Subs(temp, temp, dividend);
3534 use_cond_inc = true;
3535 }
3536
3537 if (shift != 0) {
3538 __ Asr(temp, temp, shift);
3539 }
3540
3541 if (instruction->IsRem()) {
3542 GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3543 GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3544 } else {
3545 GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3546 }
3547 }
3548
GenerateInt32DivRemWithAnyConstant(HBinaryOperation * instruction)3549 void InstructionCodeGeneratorARM64::GenerateInt32DivRemWithAnyConstant(
3550 HBinaryOperation* instruction) {
3551 DCHECK(instruction->IsDiv() || instruction->IsRem());
3552 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
3553
3554 LocationSummary* locations = instruction->GetLocations();
3555 Location second = locations->InAt(1);
3556 DCHECK(second.IsConstant());
3557
3558 Register out = OutputRegister(instruction);
3559 Register dividend = InputRegisterAt(instruction, 0);
3560 int64_t imm = Int64FromConstant(second.GetConstant());
3561
3562 int64_t magic;
3563 int shift;
3564 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3565 UseScratchRegisterScope temps(GetVIXLAssembler());
3566 Register temp = temps.AcquireSameSizeAs(out);
3567
3568 // temp = get_high(dividend * magic)
3569 __ Mov(temp, magic);
3570 __ Smull(temp.X(), dividend, temp);
3571
3572 // The multiplication result might need some corrections to be finalized.
3573 // The last correction is to increment by 1, if the result is negative.
3574 // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3575 // Such ADD usually has latency 2, e.g. on Cortex-A55.
3576 // However if one of the corrections is ADD or SUB, the sign can be detected
3577 // with ADDS/SUBS. They set the N flag if the result is negative.
3578 // This allows to use CINC MI which has latency 1.
3579 bool use_cond_inc = false;
3580
3581 // ADD/SUB correction is performed in the high 32 bits
3582 // as high 32 bits are ignored because type are kInt32.
3583 if (NeedToAddDividend(magic, imm)) {
3584 __ Adds(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3585 use_cond_inc = true;
3586 } else if (NeedToSubDividend(magic, imm)) {
3587 __ Subs(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3588 use_cond_inc = true;
3589 }
3590
3591 // Extract the result from the high 32 bits and apply the final right shift.
3592 DCHECK_LT(shift, 32);
3593 if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
3594 // No need to adjust the result for a non-negative dividend and a positive divisor.
3595 if (instruction->IsDiv()) {
3596 __ Lsr(out.X(), temp.X(), 32 + shift);
3597 } else {
3598 __ Lsr(temp.X(), temp.X(), 32 + shift);
3599 GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3600 }
3601 } else {
3602 __ Asr(temp.X(), temp.X(), 32 + shift);
3603
3604 if (instruction->IsRem()) {
3605 GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3606 GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3607 } else {
3608 GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3609 }
3610 }
3611 }
3612
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction,int64_t divisor)3613 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction,
3614 int64_t divisor) {
3615 DCHECK(instruction->IsDiv() || instruction->IsRem());
3616 if (instruction->GetResultType() == DataType::Type::kInt64) {
3617 if (divisor > 0 && HasNonNegativeInputAt(instruction, 0)) {
3618 GenerateInt64UnsignedDivRemWithAnyPositiveConstant(instruction);
3619 } else {
3620 GenerateInt64DivRemWithAnyConstant(instruction);
3621 }
3622 } else {
3623 GenerateInt32DivRemWithAnyConstant(instruction);
3624 }
3625 }
3626
GenerateIntDivForConstDenom(HDiv * instruction)3627 void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) {
3628 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3629
3630 if (imm == 0) {
3631 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3632 return;
3633 }
3634
3635 if (IsPowerOfTwo(AbsOrMin(imm))) {
3636 GenerateIntDivForPower2Denom(instruction);
3637 } else {
3638 // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier.
3639 DCHECK(imm < -2 || imm > 2) << imm;
3640 GenerateDivRemWithAnyConstant(instruction, imm);
3641 }
3642 }
3643
GenerateIntDiv(HDiv * instruction)3644 void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) {
3645 DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
3646 << instruction->GetResultType();
3647
3648 if (instruction->GetLocations()->InAt(1).IsConstant()) {
3649 GenerateIntDivForConstDenom(instruction);
3650 } else {
3651 Register out = OutputRegister(instruction);
3652 Register dividend = InputRegisterAt(instruction, 0);
3653 Register divisor = InputRegisterAt(instruction, 1);
3654 __ Sdiv(out, dividend, divisor);
3655 }
3656 }
3657
VisitDiv(HDiv * div)3658 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3659 LocationSummary* locations =
3660 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3661 switch (div->GetResultType()) {
3662 case DataType::Type::kInt32:
3663 case DataType::Type::kInt64:
3664 locations->SetInAt(0, Location::RequiresRegister());
3665 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3666 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3667 break;
3668
3669 case DataType::Type::kFloat32:
3670 case DataType::Type::kFloat64:
3671 locations->SetInAt(0, Location::RequiresFpuRegister());
3672 locations->SetInAt(1, Location::RequiresFpuRegister());
3673 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3674 break;
3675
3676 default:
3677 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3678 }
3679 }
3680
VisitDiv(HDiv * div)3681 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3682 DataType::Type type = div->GetResultType();
3683 switch (type) {
3684 case DataType::Type::kInt32:
3685 case DataType::Type::kInt64:
3686 GenerateIntDiv(div);
3687 break;
3688
3689 case DataType::Type::kFloat32:
3690 case DataType::Type::kFloat64:
3691 __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3692 break;
3693
3694 default:
3695 LOG(FATAL) << "Unexpected div type " << type;
3696 }
3697 }
3698
VisitDivZeroCheck(HDivZeroCheck * instruction)3699 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3700 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3701 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3702 }
3703
VisitDivZeroCheck(HDivZeroCheck * instruction)3704 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3705 SlowPathCodeARM64* slow_path =
3706 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction);
3707 codegen_->AddSlowPath(slow_path);
3708 Location value = instruction->GetLocations()->InAt(0);
3709
3710 DataType::Type type = instruction->GetType();
3711
3712 if (!DataType::IsIntegralType(type)) {
3713 LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3714 UNREACHABLE();
3715 }
3716
3717 if (value.IsConstant()) {
3718 int64_t divisor = Int64FromLocation(value);
3719 if (divisor == 0) {
3720 __ B(slow_path->GetEntryLabel());
3721 } else {
3722 // A division by a non-null constant is valid. We don't need to perform
3723 // any check, so simply fall through.
3724 }
3725 } else {
3726 __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3727 }
3728 }
3729
VisitDoubleConstant(HDoubleConstant * constant)3730 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3731 LocationSummary* locations =
3732 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3733 locations->SetOut(Location::ConstantLocation(constant));
3734 }
3735
VisitDoubleConstant(HDoubleConstant * constant)3736 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3737 [[maybe_unused]] HDoubleConstant* constant) {
3738 // Will be generated at use site.
3739 }
3740
VisitExit(HExit * exit)3741 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3742 exit->SetLocations(nullptr);
3743 }
3744
VisitExit(HExit * exit)3745 void InstructionCodeGeneratorARM64::VisitExit([[maybe_unused]] HExit* exit) {}
3746
VisitFloatConstant(HFloatConstant * constant)3747 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3748 LocationSummary* locations =
3749 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3750 locations->SetOut(Location::ConstantLocation(constant));
3751 }
3752
VisitFloatConstant(HFloatConstant * constant)3753 void InstructionCodeGeneratorARM64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
3754 // Will be generated at use site.
3755 }
3756
HandleGoto(HInstruction * got,HBasicBlock * successor)3757 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3758 if (successor->IsExitBlock()) {
3759 DCHECK(got->GetPrevious()->AlwaysThrows());
3760 return; // no code needed
3761 }
3762
3763 HBasicBlock* block = got->GetBlock();
3764 HInstruction* previous = got->GetPrevious();
3765 HLoopInformation* info = block->GetLoopInformation();
3766
3767 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3768 codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
3769 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3770 return; // `GenerateSuspendCheck()` emitted the jump.
3771 }
3772 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3773 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3774 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
3775 }
3776 if (!codegen_->GoesToNextBlock(block, successor)) {
3777 __ B(codegen_->GetLabelOf(successor));
3778 }
3779 }
3780
VisitGoto(HGoto * got)3781 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3782 got->SetLocations(nullptr);
3783 }
3784
VisitGoto(HGoto * got)3785 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3786 HandleGoto(got, got->GetSuccessor());
3787 }
3788
VisitTryBoundary(HTryBoundary * try_boundary)3789 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3790 try_boundary->SetLocations(nullptr);
3791 }
3792
VisitTryBoundary(HTryBoundary * try_boundary)3793 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3794 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3795 if (!successor->IsExitBlock()) {
3796 HandleGoto(try_boundary, successor);
3797 }
3798 }
3799
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl::aarch64::Label * true_target,vixl::aarch64::Label * false_target)3800 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3801 size_t condition_input_index,
3802 vixl::aarch64::Label* true_target,
3803 vixl::aarch64::Label* false_target) {
3804 HInstruction* cond = instruction->InputAt(condition_input_index);
3805
3806 if (true_target == nullptr && false_target == nullptr) {
3807 // Nothing to do. The code always falls through.
3808 return;
3809 } else if (cond->IsIntConstant()) {
3810 // Constant condition, statically compared against "true" (integer value 1).
3811 if (cond->AsIntConstant()->IsTrue()) {
3812 if (true_target != nullptr) {
3813 __ B(true_target);
3814 }
3815 } else {
3816 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3817 if (false_target != nullptr) {
3818 __ B(false_target);
3819 }
3820 }
3821 return;
3822 }
3823
3824 // The following code generates these patterns:
3825 // (1) true_target == nullptr && false_target != nullptr
3826 // - opposite condition true => branch to false_target
3827 // (2) true_target != nullptr && false_target == nullptr
3828 // - condition true => branch to true_target
3829 // (3) true_target != nullptr && false_target != nullptr
3830 // - condition true => branch to true_target
3831 // - branch to false_target
3832 if (IsBooleanValueOrMaterializedCondition(cond)) {
3833 // The condition instruction has been materialized, compare the output to 0.
3834 Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3835 DCHECK(cond_val.IsRegister());
3836 if (true_target == nullptr) {
3837 __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3838 } else {
3839 __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3840 }
3841 } else {
3842 // The condition instruction has not been materialized, use its inputs as
3843 // the comparison and its condition as the branch condition.
3844 HCondition* condition = cond->AsCondition();
3845
3846 DataType::Type type = condition->InputAt(0)->GetType();
3847 if (DataType::IsFloatingPointType(type)) {
3848 GenerateFcmp(condition);
3849 if (true_target == nullptr) {
3850 IfCondition opposite_condition = condition->GetOppositeCondition();
3851 __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3852 } else {
3853 __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3854 }
3855 } else {
3856 // Integer cases.
3857 Register lhs = InputRegisterAt(condition, 0);
3858 Operand rhs = InputOperandAt(condition, 1);
3859
3860 Condition arm64_cond;
3861 vixl::aarch64::Label* non_fallthrough_target;
3862 if (true_target == nullptr) {
3863 arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3864 non_fallthrough_target = false_target;
3865 } else {
3866 arm64_cond = ARM64Condition(condition->GetCondition());
3867 non_fallthrough_target = true_target;
3868 }
3869
3870 if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3871 rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3872 switch (arm64_cond) {
3873 case eq:
3874 __ Cbz(lhs, non_fallthrough_target);
3875 break;
3876 case ne:
3877 __ Cbnz(lhs, non_fallthrough_target);
3878 break;
3879 case lt:
3880 // Test the sign bit and branch accordingly.
3881 __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3882 break;
3883 case ge:
3884 // Test the sign bit and branch accordingly.
3885 __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3886 break;
3887 default:
3888 // Without the `static_cast` the compiler throws an error for
3889 // `-Werror=sign-promo`.
3890 LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3891 }
3892 } else {
3893 __ Cmp(lhs, rhs);
3894 __ B(arm64_cond, non_fallthrough_target);
3895 }
3896 }
3897 }
3898
3899 // If neither branch falls through (case 3), the conditional branch to `true_target`
3900 // was already emitted (case 2) and we need to emit a jump to `false_target`.
3901 if (true_target != nullptr && false_target != nullptr) {
3902 __ B(false_target);
3903 }
3904 }
3905
VisitIf(HIf * if_instr)3906 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3907 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3908 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3909 locations->SetInAt(0, Location::RequiresRegister());
3910 }
3911 }
3912
VisitIf(HIf * if_instr)3913 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3914 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3915 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3916 vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3917 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3918 true_target = nullptr;
3919 }
3920 vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3921 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3922 false_target = nullptr;
3923 }
3924 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3925 if (GetGraph()->IsCompilingBaseline() &&
3926 codegen_->GetCompilerOptions().ProfileBranches() &&
3927 !Runtime::Current()->IsAotCompiler()) {
3928 DCHECK(if_instr->InputAt(0)->IsCondition());
3929 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3930 DCHECK(info != nullptr);
3931 BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
3932 // Currently, not all If branches are profiled.
3933 if (cache != nullptr) {
3934 uint64_t address =
3935 reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
3936 static_assert(
3937 BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
3938 "Unexpected offsets for BranchCache");
3939 vixl::aarch64::Label done;
3940 UseScratchRegisterScope temps(GetVIXLAssembler());
3941 Register temp = temps.AcquireX();
3942 Register counter = temps.AcquireW();
3943 Register condition = InputRegisterAt(if_instr, 0).X();
3944 __ Mov(temp, address);
3945 __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
3946 __ Add(counter, counter, 1);
3947 __ Tbnz(counter, 16, &done);
3948 __ Strh(counter, MemOperand(temp, condition, LSL, 1));
3949 __ Bind(&done);
3950 }
3951 }
3952 }
3953 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3954 }
3955
VisitDeoptimize(HDeoptimize * deoptimize)3956 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3957 LocationSummary* locations = new (GetGraph()->GetAllocator())
3958 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3959 InvokeRuntimeCallingConvention calling_convention;
3960 RegisterSet caller_saves = RegisterSet::Empty();
3961 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3962 locations->SetCustomSlowPathCallerSaves(caller_saves);
3963 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3964 locations->SetInAt(0, Location::RequiresRegister());
3965 }
3966 }
3967
VisitDeoptimize(HDeoptimize * deoptimize)3968 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3969 SlowPathCodeARM64* slow_path =
3970 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3971 GenerateTestAndBranch(deoptimize,
3972 /* condition_input_index= */ 0,
3973 slow_path->GetEntryLabel(),
3974 /* false_target= */ nullptr);
3975 }
3976
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3977 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3978 LocationSummary* locations = new (GetGraph()->GetAllocator())
3979 LocationSummary(flag, LocationSummary::kNoCall);
3980 locations->SetOut(Location::RequiresRegister());
3981 }
3982
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3983 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3984 __ Ldr(OutputRegister(flag),
3985 MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3986 }
3987
IsConditionOnFloatingPointValues(HInstruction * condition)3988 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3989 return condition->IsCondition() &&
3990 DataType::IsFloatingPointType(condition->InputAt(0)->GetType());
3991 }
3992
GetConditionForSelect(HCondition * condition)3993 static inline Condition GetConditionForSelect(HCondition* condition) {
3994 IfCondition cond = condition->GetCondition();
3995 return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3996 : ARM64Condition(cond);
3997 }
3998
VisitSelect(HSelect * select)3999 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
4000 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
4001 if (DataType::IsFloatingPointType(select->GetType())) {
4002 locations->SetInAt(0, Location::RequiresFpuRegister());
4003 locations->SetInAt(1, Location::RequiresFpuRegister());
4004 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4005 } else {
4006 HConstant* cst_true_value = select->GetTrueValue()->AsConstantOrNull();
4007 HConstant* cst_false_value = select->GetFalseValue()->AsConstantOrNull();
4008 bool is_true_value_constant = cst_true_value != nullptr;
4009 bool is_false_value_constant = cst_false_value != nullptr;
4010 // Ask VIXL whether we should synthesize constants in registers.
4011 // We give an arbitrary register to VIXL when dealing with non-constant inputs.
4012 Operand true_op = is_true_value_constant ?
4013 Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
4014 Operand false_op = is_false_value_constant ?
4015 Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
4016 bool true_value_in_register = false;
4017 bool false_value_in_register = false;
4018 MacroAssembler::GetCselSynthesisInformation(
4019 x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
4020 true_value_in_register |= !is_true_value_constant;
4021 false_value_in_register |= !is_false_value_constant;
4022
4023 locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
4024 : Location::ConstantLocation(cst_true_value));
4025 locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
4026 : Location::ConstantLocation(cst_false_value));
4027 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4028 }
4029
4030 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
4031 locations->SetInAt(2, Location::RequiresRegister());
4032 }
4033 }
4034
VisitSelect(HSelect * select)4035 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
4036 HInstruction* cond = select->GetCondition();
4037 Condition csel_cond;
4038
4039 if (IsBooleanValueOrMaterializedCondition(cond)) {
4040 if (cond->IsCondition() && cond->GetNext() == select) {
4041 // Use the condition flags set by the previous instruction.
4042 csel_cond = GetConditionForSelect(cond->AsCondition());
4043 } else {
4044 __ Cmp(InputRegisterAt(select, 2), 0);
4045 csel_cond = ne;
4046 }
4047 } else if (IsConditionOnFloatingPointValues(cond)) {
4048 GenerateFcmp(cond);
4049 csel_cond = GetConditionForSelect(cond->AsCondition());
4050 } else {
4051 __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
4052 csel_cond = GetConditionForSelect(cond->AsCondition());
4053 }
4054
4055 if (DataType::IsFloatingPointType(select->GetType())) {
4056 __ Fcsel(OutputFPRegister(select),
4057 InputFPRegisterAt(select, 1),
4058 InputFPRegisterAt(select, 0),
4059 csel_cond);
4060 } else {
4061 __ Csel(OutputRegister(select),
4062 InputOperandAt(select, 1),
4063 InputOperandAt(select, 0),
4064 csel_cond);
4065 }
4066 }
4067
VisitNop(HNop * nop)4068 void LocationsBuilderARM64::VisitNop(HNop* nop) {
4069 new (GetGraph()->GetAllocator()) LocationSummary(nop);
4070 }
4071
VisitNop(HNop *)4072 void InstructionCodeGeneratorARM64::VisitNop(HNop*) {
4073 // The environment recording already happened in CodeGenerator::Compile.
4074 }
4075
IncreaseFrame(size_t adjustment)4076 void CodeGeneratorARM64::IncreaseFrame(size_t adjustment) {
4077 __ Claim(adjustment);
4078 GetAssembler()->cfi().AdjustCFAOffset(adjustment);
4079 }
4080
DecreaseFrame(size_t adjustment)4081 void CodeGeneratorARM64::DecreaseFrame(size_t adjustment) {
4082 __ Drop(adjustment);
4083 GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
4084 }
4085
GenerateNop()4086 void CodeGeneratorARM64::GenerateNop() {
4087 __ Nop();
4088 }
4089
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4090 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4091 HandleFieldGet(instruction, instruction->GetFieldInfo());
4092 }
4093
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4094 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4095 HandleFieldGet(instruction, instruction->GetFieldInfo());
4096 }
4097
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4098 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4099 HandleFieldSet(instruction);
4100 }
4101
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4102 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4103 HandleFieldSet(instruction,
4104 instruction->GetFieldInfo(),
4105 instruction->GetValueCanBeNull(),
4106 instruction->GetWriteBarrierKind());
4107 }
4108
4109 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)4110 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
4111 if (emit_read_barrier &&
4112 (kUseBakerReadBarrier ||
4113 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
4114 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
4115 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
4116 return 1;
4117 }
4118 return 0;
4119 }
4120
4121 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
4122 // interface pointer, one for loading the current interface.
4123 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)4124 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
4125 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
4126 return 3;
4127 }
4128 return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
4129 }
4130
VisitInstanceOf(HInstanceOf * instruction)4131 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
4132 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4133 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4134 bool baker_read_barrier_slow_path = false;
4135 switch (type_check_kind) {
4136 case TypeCheckKind::kExactCheck:
4137 case TypeCheckKind::kAbstractClassCheck:
4138 case TypeCheckKind::kClassHierarchyCheck:
4139 case TypeCheckKind::kArrayObjectCheck:
4140 case TypeCheckKind::kInterfaceCheck: {
4141 bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
4142 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
4143 baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
4144 (type_check_kind != TypeCheckKind::kInterfaceCheck);
4145 break;
4146 }
4147 case TypeCheckKind::kArrayCheck:
4148 case TypeCheckKind::kUnresolvedCheck:
4149 call_kind = LocationSummary::kCallOnSlowPath;
4150 break;
4151 case TypeCheckKind::kBitstringCheck:
4152 break;
4153 }
4154
4155 LocationSummary* locations =
4156 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4157 if (baker_read_barrier_slow_path) {
4158 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4159 }
4160 locations->SetInAt(0, Location::RequiresRegister());
4161 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
4162 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
4163 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
4164 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
4165 } else {
4166 locations->SetInAt(1, Location::RequiresRegister());
4167 }
4168 // The "out" register is used as a temporary, so it overlaps with the inputs.
4169 // Note that TypeCheckSlowPathARM64 uses this register too.
4170 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4171 // Add temps if necessary for read barriers.
4172 locations->AddRegisterTemps(
4173 NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
4174 }
4175
VisitInstanceOf(HInstanceOf * instruction)4176 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
4177 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4178 LocationSummary* locations = instruction->GetLocations();
4179 Location obj_loc = locations->InAt(0);
4180 Register obj = InputRegisterAt(instruction, 0);
4181 Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
4182 ? Register()
4183 : InputRegisterAt(instruction, 1);
4184 Location out_loc = locations->Out();
4185 Register out = OutputRegister(instruction);
4186 const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
4187 DCHECK_LE(num_temps, 1u);
4188 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
4189 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4190 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4191 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4192 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4193 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4194 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4195 const uint32_t object_array_data_offset =
4196 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4197
4198 vixl::aarch64::Label done, zero;
4199 SlowPathCodeARM64* slow_path = nullptr;
4200
4201 // Return 0 if `obj` is null.
4202 // Avoid null check if we know `obj` is not null.
4203 if (instruction->MustDoNullCheck()) {
4204 __ Cbz(obj, &zero);
4205 }
4206
4207 switch (type_check_kind) {
4208 case TypeCheckKind::kExactCheck: {
4209 ReadBarrierOption read_barrier_option =
4210 codegen_->ReadBarrierOptionForInstanceOf(instruction);
4211 // /* HeapReference<Class> */ out = obj->klass_
4212 GenerateReferenceLoadTwoRegisters(instruction,
4213 out_loc,
4214 obj_loc,
4215 class_offset,
4216 maybe_temp_loc,
4217 read_barrier_option);
4218 __ Cmp(out, cls);
4219 __ Cset(out, eq);
4220 if (zero.IsLinked()) {
4221 __ B(&done);
4222 }
4223 break;
4224 }
4225
4226 case TypeCheckKind::kAbstractClassCheck: {
4227 ReadBarrierOption read_barrier_option =
4228 codegen_->ReadBarrierOptionForInstanceOf(instruction);
4229 // /* HeapReference<Class> */ out = obj->klass_
4230 GenerateReferenceLoadTwoRegisters(instruction,
4231 out_loc,
4232 obj_loc,
4233 class_offset,
4234 maybe_temp_loc,
4235 read_barrier_option);
4236 // If the class is abstract, we eagerly fetch the super class of the
4237 // object to avoid doing a comparison we know will fail.
4238 vixl::aarch64::Label loop, success;
4239 __ Bind(&loop);
4240 // /* HeapReference<Class> */ out = out->super_class_
4241 GenerateReferenceLoadOneRegister(instruction,
4242 out_loc,
4243 super_offset,
4244 maybe_temp_loc,
4245 read_barrier_option);
4246 // If `out` is null, we use it for the result, and jump to `done`.
4247 __ Cbz(out, &done);
4248 __ Cmp(out, cls);
4249 __ B(ne, &loop);
4250 __ Mov(out, 1);
4251 if (zero.IsLinked()) {
4252 __ B(&done);
4253 }
4254 break;
4255 }
4256
4257 case TypeCheckKind::kClassHierarchyCheck: {
4258 ReadBarrierOption read_barrier_option =
4259 codegen_->ReadBarrierOptionForInstanceOf(instruction);
4260 // /* HeapReference<Class> */ out = obj->klass_
4261 GenerateReferenceLoadTwoRegisters(instruction,
4262 out_loc,
4263 obj_loc,
4264 class_offset,
4265 maybe_temp_loc,
4266 read_barrier_option);
4267 // Walk over the class hierarchy to find a match.
4268 vixl::aarch64::Label loop, success;
4269 __ Bind(&loop);
4270 __ Cmp(out, cls);
4271 __ B(eq, &success);
4272 // /* HeapReference<Class> */ out = out->super_class_
4273 GenerateReferenceLoadOneRegister(instruction,
4274 out_loc,
4275 super_offset,
4276 maybe_temp_loc,
4277 read_barrier_option);
4278 __ Cbnz(out, &loop);
4279 // If `out` is null, we use it for the result, and jump to `done`.
4280 __ B(&done);
4281 __ Bind(&success);
4282 __ Mov(out, 1);
4283 if (zero.IsLinked()) {
4284 __ B(&done);
4285 }
4286 break;
4287 }
4288
4289 case TypeCheckKind::kArrayObjectCheck: {
4290 ReadBarrierOption read_barrier_option =
4291 codegen_->ReadBarrierOptionForInstanceOf(instruction);
4292 // /* HeapReference<Class> */ out = obj->klass_
4293 GenerateReferenceLoadTwoRegisters(instruction,
4294 out_loc,
4295 obj_loc,
4296 class_offset,
4297 maybe_temp_loc,
4298 read_barrier_option);
4299 // Do an exact check.
4300 vixl::aarch64::Label exact_check;
4301 __ Cmp(out, cls);
4302 __ B(eq, &exact_check);
4303 // Otherwise, we need to check that the object's class is a non-primitive array.
4304 // /* HeapReference<Class> */ out = out->component_type_
4305 GenerateReferenceLoadOneRegister(instruction,
4306 out_loc,
4307 component_offset,
4308 maybe_temp_loc,
4309 read_barrier_option);
4310 // If `out` is null, we use it for the result, and jump to `done`.
4311 __ Cbz(out, &done);
4312 __ Ldrh(out, HeapOperand(out, primitive_offset));
4313 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4314 __ Cbnz(out, &zero);
4315 __ Bind(&exact_check);
4316 __ Mov(out, 1);
4317 __ B(&done);
4318 break;
4319 }
4320
4321 case TypeCheckKind::kArrayCheck: {
4322 // No read barrier since the slow path will retry upon failure.
4323 // /* HeapReference<Class> */ out = obj->klass_
4324 GenerateReferenceLoadTwoRegisters(instruction,
4325 out_loc,
4326 obj_loc,
4327 class_offset,
4328 maybe_temp_loc,
4329 kWithoutReadBarrier);
4330 __ Cmp(out, cls);
4331 DCHECK(locations->OnlyCallsOnSlowPath());
4332 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4333 instruction, /* is_fatal= */ false);
4334 codegen_->AddSlowPath(slow_path);
4335 __ B(ne, slow_path->GetEntryLabel());
4336 __ Mov(out, 1);
4337 if (zero.IsLinked()) {
4338 __ B(&done);
4339 }
4340 break;
4341 }
4342
4343 case TypeCheckKind::kInterfaceCheck: {
4344 if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
4345 DCHECK(locations->OnlyCallsOnSlowPath());
4346 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4347 instruction, /* is_fatal= */ false);
4348 codegen_->AddSlowPath(slow_path);
4349 if (codegen_->EmitNonBakerReadBarrier()) {
4350 __ B(slow_path->GetEntryLabel());
4351 break;
4352 }
4353 // For Baker read barrier, take the slow path while marking.
4354 __ Cbnz(mr, slow_path->GetEntryLabel());
4355 }
4356
4357 // Fast-path without read barriers.
4358 UseScratchRegisterScope temps(GetVIXLAssembler());
4359 Register temp = temps.AcquireW();
4360 Register temp2 = temps.AcquireW();
4361 // /* HeapReference<Class> */ temp = obj->klass_
4362 __ Ldr(temp, HeapOperand(obj, class_offset));
4363 GetAssembler()->MaybeUnpoisonHeapReference(temp);
4364 // /* HeapReference<Class> */ temp = temp->iftable_
4365 __ Ldr(temp, HeapOperand(temp, iftable_offset));
4366 GetAssembler()->MaybeUnpoisonHeapReference(temp);
4367 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
4368 __ Ldr(out, HeapOperand(temp, array_length_offset));
4369 // Loop through the `IfTable` and check if any class matches.
4370 vixl::aarch64::Label loop;
4371 __ Bind(&loop);
4372 __ Cbz(out, &done); // If taken, the result in `out` is already 0 (false).
4373 __ Ldr(temp2, HeapOperand(temp, object_array_data_offset));
4374 GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4375 // Go to next interface.
4376 __ Add(temp, temp, 2 * kHeapReferenceSize);
4377 __ Sub(out, out, 2);
4378 // Compare the classes and continue the loop if they do not match.
4379 __ Cmp(cls, temp2);
4380 __ B(ne, &loop);
4381 __ Mov(out, 1);
4382 if (zero.IsLinked()) {
4383 __ B(&done);
4384 }
4385 break;
4386 }
4387
4388 case TypeCheckKind::kUnresolvedCheck: {
4389 // Note that we indeed only call on slow path, but we always go
4390 // into the slow path for the unresolved check case.
4391 //
4392 // We cannot directly call the InstanceofNonTrivial runtime
4393 // entry point without resorting to a type checking slow path
4394 // here (i.e. by calling InvokeRuntime directly), as it would
4395 // require to assign fixed registers for the inputs of this
4396 // HInstanceOf instruction (following the runtime calling
4397 // convention), which might be cluttered by the potential first
4398 // read barrier emission at the beginning of this method.
4399 //
4400 // TODO: Introduce a new runtime entry point taking the object
4401 // to test (instead of its class) as argument, and let it deal
4402 // with the read barrier issues. This will let us refactor this
4403 // case of the `switch` code as it was previously (with a direct
4404 // call to the runtime not using a type checking slow path).
4405 // This should also be beneficial for the other cases above.
4406 DCHECK(locations->OnlyCallsOnSlowPath());
4407 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4408 instruction, /* is_fatal= */ false);
4409 codegen_->AddSlowPath(slow_path);
4410 __ B(slow_path->GetEntryLabel());
4411 break;
4412 }
4413
4414 case TypeCheckKind::kBitstringCheck: {
4415 // /* HeapReference<Class> */ temp = obj->klass_
4416 GenerateReferenceLoadTwoRegisters(instruction,
4417 out_loc,
4418 obj_loc,
4419 class_offset,
4420 maybe_temp_loc,
4421 kWithoutReadBarrier);
4422
4423 GenerateBitstringTypeCheckCompare(instruction, out);
4424 __ Cset(out, eq);
4425 if (zero.IsLinked()) {
4426 __ B(&done);
4427 }
4428 break;
4429 }
4430 }
4431
4432 if (zero.IsLinked()) {
4433 __ Bind(&zero);
4434 __ Mov(out, 0);
4435 }
4436
4437 if (done.IsLinked()) {
4438 __ Bind(&done);
4439 }
4440
4441 if (slow_path != nullptr) {
4442 __ Bind(slow_path->GetExitLabel());
4443 }
4444 }
4445
VisitCheckCast(HCheckCast * instruction)4446 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
4447 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4448 LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
4449 LocationSummary* locations =
4450 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4451 locations->SetInAt(0, Location::RequiresRegister());
4452 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
4453 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
4454 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
4455 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
4456 } else {
4457 locations->SetInAt(1, Location::RequiresRegister());
4458 }
4459 locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
4460 }
4461
VisitCheckCast(HCheckCast * instruction)4462 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
4463 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4464 LocationSummary* locations = instruction->GetLocations();
4465 Location obj_loc = locations->InAt(0);
4466 Register obj = InputRegisterAt(instruction, 0);
4467 Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
4468 ? Register()
4469 : InputRegisterAt(instruction, 1);
4470 const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
4471 DCHECK_GE(num_temps, 1u);
4472 DCHECK_LE(num_temps, 3u);
4473 Location temp_loc = locations->GetTemp(0);
4474 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
4475 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
4476 Register temp = WRegisterFrom(temp_loc);
4477 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4478 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4479 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4480 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4481 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4482 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4483 const uint32_t object_array_data_offset =
4484 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4485
4486 bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
4487 SlowPathCodeARM64* type_check_slow_path =
4488 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4489 instruction, is_type_check_slow_path_fatal);
4490 codegen_->AddSlowPath(type_check_slow_path);
4491
4492 vixl::aarch64::Label done;
4493 // Avoid null check if we know obj is not null.
4494 if (instruction->MustDoNullCheck()) {
4495 __ Cbz(obj, &done);
4496 }
4497
4498 switch (type_check_kind) {
4499 case TypeCheckKind::kExactCheck:
4500 case TypeCheckKind::kArrayCheck: {
4501 // /* HeapReference<Class> */ temp = obj->klass_
4502 GenerateReferenceLoadTwoRegisters(instruction,
4503 temp_loc,
4504 obj_loc,
4505 class_offset,
4506 maybe_temp2_loc,
4507 kWithoutReadBarrier);
4508
4509 __ Cmp(temp, cls);
4510 // Jump to slow path for throwing the exception or doing a
4511 // more involved array check.
4512 __ B(ne, type_check_slow_path->GetEntryLabel());
4513 break;
4514 }
4515
4516 case TypeCheckKind::kAbstractClassCheck: {
4517 // /* HeapReference<Class> */ temp = obj->klass_
4518 GenerateReferenceLoadTwoRegisters(instruction,
4519 temp_loc,
4520 obj_loc,
4521 class_offset,
4522 maybe_temp2_loc,
4523 kWithoutReadBarrier);
4524
4525 // If the class is abstract, we eagerly fetch the super class of the
4526 // object to avoid doing a comparison we know will fail.
4527 vixl::aarch64::Label loop;
4528 __ Bind(&loop);
4529 // /* HeapReference<Class> */ temp = temp->super_class_
4530 GenerateReferenceLoadOneRegister(instruction,
4531 temp_loc,
4532 super_offset,
4533 maybe_temp2_loc,
4534 kWithoutReadBarrier);
4535
4536 // If the class reference currently in `temp` is null, jump to the slow path to throw the
4537 // exception.
4538 __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4539 // Otherwise, compare classes.
4540 __ Cmp(temp, cls);
4541 __ B(ne, &loop);
4542 break;
4543 }
4544
4545 case TypeCheckKind::kClassHierarchyCheck: {
4546 // /* HeapReference<Class> */ temp = obj->klass_
4547 GenerateReferenceLoadTwoRegisters(instruction,
4548 temp_loc,
4549 obj_loc,
4550 class_offset,
4551 maybe_temp2_loc,
4552 kWithoutReadBarrier);
4553
4554 // Walk over the class hierarchy to find a match.
4555 vixl::aarch64::Label loop;
4556 __ Bind(&loop);
4557 __ Cmp(temp, cls);
4558 __ B(eq, &done);
4559
4560 // /* HeapReference<Class> */ temp = temp->super_class_
4561 GenerateReferenceLoadOneRegister(instruction,
4562 temp_loc,
4563 super_offset,
4564 maybe_temp2_loc,
4565 kWithoutReadBarrier);
4566
4567 // If the class reference currently in `temp` is not null, jump
4568 // back at the beginning of the loop.
4569 __ Cbnz(temp, &loop);
4570 // Otherwise, jump to the slow path to throw the exception.
4571 __ B(type_check_slow_path->GetEntryLabel());
4572 break;
4573 }
4574
4575 case TypeCheckKind::kArrayObjectCheck: {
4576 // /* HeapReference<Class> */ temp = obj->klass_
4577 GenerateReferenceLoadTwoRegisters(instruction,
4578 temp_loc,
4579 obj_loc,
4580 class_offset,
4581 maybe_temp2_loc,
4582 kWithoutReadBarrier);
4583
4584 // Do an exact check.
4585 __ Cmp(temp, cls);
4586 __ B(eq, &done);
4587
4588 // Otherwise, we need to check that the object's class is a non-primitive array.
4589 // /* HeapReference<Class> */ temp = temp->component_type_
4590 GenerateReferenceLoadOneRegister(instruction,
4591 temp_loc,
4592 component_offset,
4593 maybe_temp2_loc,
4594 kWithoutReadBarrier);
4595
4596 // If the component type is null, jump to the slow path to throw the exception.
4597 __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4598 // Otherwise, the object is indeed an array. Further check that this component type is not a
4599 // primitive type.
4600 __ Ldrh(temp, HeapOperand(temp, primitive_offset));
4601 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4602 __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
4603 break;
4604 }
4605
4606 case TypeCheckKind::kUnresolvedCheck:
4607 // We always go into the type check slow path for the unresolved check cases.
4608 //
4609 // We cannot directly call the CheckCast runtime entry point
4610 // without resorting to a type checking slow path here (i.e. by
4611 // calling InvokeRuntime directly), as it would require to
4612 // assign fixed registers for the inputs of this HInstanceOf
4613 // instruction (following the runtime calling convention), which
4614 // might be cluttered by the potential first read barrier
4615 // emission at the beginning of this method.
4616 __ B(type_check_slow_path->GetEntryLabel());
4617 break;
4618 case TypeCheckKind::kInterfaceCheck: {
4619 // /* HeapReference<Class> */ temp = obj->klass_
4620 GenerateReferenceLoadTwoRegisters(instruction,
4621 temp_loc,
4622 obj_loc,
4623 class_offset,
4624 maybe_temp2_loc,
4625 kWithoutReadBarrier);
4626
4627 // /* HeapReference<Class> */ temp = temp->iftable_
4628 GenerateReferenceLoadOneRegister(instruction,
4629 temp_loc,
4630 iftable_offset,
4631 maybe_temp2_loc,
4632 kWithoutReadBarrier);
4633 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
4634 __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4635 // Loop through the iftable and check if any class matches.
4636 vixl::aarch64::Label start_loop;
4637 __ Bind(&start_loop);
4638 __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4639 __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4640 GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4641 // Go to next interface.
4642 __ Add(temp, temp, 2 * kHeapReferenceSize);
4643 __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4644 // Compare the classes and continue the loop if they do not match.
4645 __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4646 __ B(ne, &start_loop);
4647 break;
4648 }
4649
4650 case TypeCheckKind::kBitstringCheck: {
4651 // /* HeapReference<Class> */ temp = obj->klass_
4652 GenerateReferenceLoadTwoRegisters(instruction,
4653 temp_loc,
4654 obj_loc,
4655 class_offset,
4656 maybe_temp2_loc,
4657 kWithoutReadBarrier);
4658
4659 GenerateBitstringTypeCheckCompare(instruction, temp);
4660 __ B(ne, type_check_slow_path->GetEntryLabel());
4661 break;
4662 }
4663 }
4664 __ Bind(&done);
4665
4666 __ Bind(type_check_slow_path->GetExitLabel());
4667 }
4668
VisitIntConstant(HIntConstant * constant)4669 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4670 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4671 locations->SetOut(Location::ConstantLocation(constant));
4672 }
4673
VisitIntConstant(HIntConstant * constant)4674 void InstructionCodeGeneratorARM64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
4675 // Will be generated at use site.
4676 }
4677
VisitNullConstant(HNullConstant * constant)4678 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4679 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4680 locations->SetOut(Location::ConstantLocation(constant));
4681 }
4682
VisitNullConstant(HNullConstant * constant)4683 void InstructionCodeGeneratorARM64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
4684 // Will be generated at use site.
4685 }
4686
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4687 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4688 // The trampoline uses the same calling convention as dex calling conventions,
4689 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4690 // the method_idx.
4691 HandleInvoke(invoke);
4692 }
4693
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4694 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4695 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4696 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4697 }
4698
HandleInvoke(HInvoke * invoke)4699 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4700 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4701 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4702 }
4703
VisitInvokeInterface(HInvokeInterface * invoke)4704 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4705 HandleInvoke(invoke);
4706 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
4707 // We cannot request ip1 as it's blocked by the register allocator.
4708 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
4709 }
4710 }
4711
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)4712 void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
4713 Register klass) {
4714 DCHECK_EQ(klass.GetCode(), 0u);
4715 if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
4716 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
4717 DCHECK(info != nullptr);
4718 InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
4719 info, GetCompilerOptions(), instruction->AsInvoke());
4720 if (cache != nullptr) {
4721 uint64_t address = reinterpret_cast64<uint64_t>(cache);
4722 vixl::aarch64::Label done;
4723 __ Mov(x8, address);
4724 __ Ldr(w9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
4725 // Fast path for a monomorphic cache.
4726 __ Cmp(klass.W(), w9);
4727 __ B(eq, &done);
4728 InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
4729 __ Bind(&done);
4730 } else {
4731 // This is unexpected, but we don't guarantee stable compilation across
4732 // JIT runs so just warn about it.
4733 ScopedObjectAccess soa(Thread::Current());
4734 LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
4735 }
4736 }
4737 }
4738
VisitInvokeInterface(HInvokeInterface * invoke)4739 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4740 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4741 LocationSummary* locations = invoke->GetLocations();
4742 Register temp = XRegisterFrom(locations->GetTemp(0));
4743 Location receiver = locations->InAt(0);
4744 Offset class_offset = mirror::Object::ClassOffset();
4745 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4746
4747 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4748 if (receiver.IsStackSlot()) {
4749 __ Ldr(temp.W(), StackOperandFrom(receiver));
4750 {
4751 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4752 // /* HeapReference<Class> */ temp = temp->klass_
4753 __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4754 codegen_->MaybeRecordImplicitNullCheck(invoke);
4755 }
4756 } else {
4757 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4758 // /* HeapReference<Class> */ temp = receiver->klass_
4759 __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4760 codegen_->MaybeRecordImplicitNullCheck(invoke);
4761 }
4762
4763 // Instead of simply (possibly) unpoisoning `temp` here, we should
4764 // emit a read barrier for the previous class reference load.
4765 // However this is not required in practice, as this is an
4766 // intermediate/temporary reference and because the current
4767 // concurrent copying collector keeps the from-space memory
4768 // intact/accessible until the end of the marking phase (the
4769 // concurrent copying collector may not in the future).
4770 GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4771
4772 // If we're compiling baseline, update the inline cache.
4773 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
4774
4775 // The register ip1 is required to be used for the hidden argument in
4776 // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4777 MacroAssembler* masm = GetVIXLAssembler();
4778 UseScratchRegisterScope scratch_scope(masm);
4779 scratch_scope.Exclude(ip1);
4780 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
4781 Location interface_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
4782 if (interface_method.IsStackSlot()) {
4783 __ Ldr(ip1, StackOperandFrom(interface_method));
4784 } else {
4785 __ Mov(ip1, XRegisterFrom(interface_method));
4786 }
4787 // If the load kind is through a runtime call, we will pass the method we
4788 // fetch the IMT, which will either be a no-op if we don't hit the conflict
4789 // stub, or will make us always go through the trampoline when there is a
4790 // conflict.
4791 } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
4792 codegen_->LoadMethod(
4793 invoke->GetHiddenArgumentLoadKind(), Location::RegisterLocation(ip1.GetCode()), invoke);
4794 }
4795
4796 __ Ldr(temp,
4797 MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4798 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4799 invoke->GetImtIndex(), kArm64PointerSize));
4800 // temp = temp->GetImtEntryAt(method_offset);
4801 __ Ldr(temp, MemOperand(temp, method_offset));
4802 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
4803 // We pass the method from the IMT in case of a conflict. This will ensure
4804 // we go into the runtime to resolve the actual method.
4805 __ Mov(ip1, temp);
4806 }
4807 // lr = temp->GetEntryPoint();
4808 __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4809
4810 {
4811 // Ensure the pc position is recorded immediately after the `blr` instruction.
4812 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4813
4814 // lr();
4815 __ blr(lr);
4816 DCHECK(!codegen_->IsLeafMethod());
4817 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4818 }
4819
4820 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4821 }
4822
VisitInvokeVirtual(HInvokeVirtual * invoke)4823 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4824 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4825 if (intrinsic.TryDispatch(invoke)) {
4826 return;
4827 }
4828
4829 HandleInvoke(invoke);
4830 }
4831
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4832 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4833 // Explicit clinit checks triggered by static invokes must have been pruned by
4834 // art::PrepareForRegisterAllocation.
4835 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4836
4837 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4838 if (intrinsic.TryDispatch(invoke)) {
4839 return;
4840 }
4841
4842 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
4843 CriticalNativeCallingConventionVisitorARM64 calling_convention_visitor(
4844 /*for_register_allocation=*/ true);
4845 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4846 } else {
4847 HandleInvoke(invoke);
4848 }
4849 }
4850
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARM64 * codegen)4851 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4852 if (invoke->GetLocations()->Intrinsified()) {
4853 IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4854 intrinsic.Dispatch(invoke);
4855 return true;
4856 }
4857 return false;
4858 }
4859
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)4860 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4861 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4862 [[maybe_unused]] ArtMethod* method) {
4863 // On ARM64 we support all dispatch types.
4864 return desired_dispatch_info;
4865 }
4866
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)4867 void CodeGeneratorARM64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
4868 switch (load_kind) {
4869 case MethodLoadKind::kBootImageLinkTimePcRelative: {
4870 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4871 // Add ADRP with its PC-relative method patch.
4872 vixl::aarch64::Label* adrp_label =
4873 NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
4874 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4875 // Add ADD with its PC-relative method patch.
4876 vixl::aarch64::Label* add_label =
4877 NewBootImageMethodPatch(invoke->GetResolvedMethodReference(), adrp_label);
4878 EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
4879 break;
4880 }
4881 case MethodLoadKind::kBootImageRelRo: {
4882 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
4883 uint32_t boot_image_offset = GetBootImageOffset(invoke);
4884 LoadBootImageRelRoEntry(WRegisterFrom(temp), boot_image_offset);
4885 break;
4886 }
4887 case MethodLoadKind::kBssEntry: {
4888 // Add ADRP with its PC-relative .bss entry patch.
4889 vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(invoke->GetMethodReference());
4890 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4891 // Add LDR with its PC-relative .bss entry patch.
4892 vixl::aarch64::Label* ldr_label =
4893 NewMethodBssEntryPatch(invoke->GetMethodReference(), adrp_label);
4894 // All aligned loads are implicitly atomic consume operations on ARM64.
4895 EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4896 break;
4897 }
4898 case MethodLoadKind::kJitDirectAddress: {
4899 // Load method address from literal pool.
4900 __ Ldr(XRegisterFrom(temp),
4901 jit_patches_.DeduplicateUint64Literal(
4902 reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
4903 break;
4904 }
4905 case MethodLoadKind::kRuntimeCall: {
4906 // Test situation, don't do anything.
4907 break;
4908 }
4909 default: {
4910 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
4911 UNREACHABLE();
4912 }
4913 }
4914 }
4915
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4916 void CodeGeneratorARM64::GenerateStaticOrDirectCall(
4917 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4918 // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4919 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
4920 switch (invoke->GetMethodLoadKind()) {
4921 case MethodLoadKind::kStringInit: {
4922 uint32_t offset =
4923 GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4924 // temp = thread->string_init_entrypoint
4925 __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4926 break;
4927 }
4928 case MethodLoadKind::kRecursive:
4929 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
4930 break;
4931 case MethodLoadKind::kRuntimeCall:
4932 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4933 return; // No code pointer retrieval; the runtime performs the call directly.
4934 case MethodLoadKind::kBootImageLinkTimePcRelative:
4935 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4936 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
4937 // Do not materialize the method pointer, load directly the entrypoint.
4938 // Add ADRP with its PC-relative JNI entrypoint patch.
4939 vixl::aarch64::Label* adrp_label =
4940 NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference());
4941 EmitAdrpPlaceholder(adrp_label, lr);
4942 // Add the LDR with its PC-relative method patch.
4943 vixl::aarch64::Label* add_label =
4944 NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), adrp_label);
4945 EmitLdrOffsetPlaceholder(add_label, lr, lr);
4946 break;
4947 }
4948 FALLTHROUGH_INTENDED;
4949 default:
4950 LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
4951 break;
4952 }
4953
4954 auto call_lr = [&]() {
4955 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4956 ExactAssemblyScope eas(GetVIXLAssembler(),
4957 kInstructionSize,
4958 CodeBufferCheckScope::kExactSize);
4959 // lr()
4960 __ blr(lr);
4961 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4962 };
4963 switch (invoke->GetCodePtrLocation()) {
4964 case CodePtrLocation::kCallSelf:
4965 {
4966 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
4967 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4968 ExactAssemblyScope eas(GetVIXLAssembler(),
4969 kInstructionSize,
4970 CodeBufferCheckScope::kExactSize);
4971 __ bl(&frame_entry_label_);
4972 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4973 }
4974 break;
4975 case CodePtrLocation::kCallCriticalNative: {
4976 size_t out_frame_size =
4977 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
4978 kAapcs64StackAlignment,
4979 GetCriticalNativeDirectCallFrameSize>(invoke);
4980 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
4981 call_lr();
4982 } else {
4983 // LR = callee_method->ptr_sized_fields_.data_; // EntryPointFromJni
4984 MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kArm64PointerSize);
4985 __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
4986 // lr()
4987 call_lr();
4988 }
4989 // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
4990 switch (invoke->GetType()) {
4991 case DataType::Type::kBool:
4992 __ Ubfx(w0, w0, 0, 8);
4993 break;
4994 case DataType::Type::kInt8:
4995 __ Sbfx(w0, w0, 0, 8);
4996 break;
4997 case DataType::Type::kUint16:
4998 __ Ubfx(w0, w0, 0, 16);
4999 break;
5000 case DataType::Type::kInt16:
5001 __ Sbfx(w0, w0, 0, 16);
5002 break;
5003 case DataType::Type::kInt32:
5004 case DataType::Type::kInt64:
5005 case DataType::Type::kFloat32:
5006 case DataType::Type::kFloat64:
5007 case DataType::Type::kVoid:
5008 break;
5009 default:
5010 DCHECK(false) << invoke->GetType();
5011 break;
5012 }
5013 if (out_frame_size != 0u) {
5014 DecreaseFrame(out_frame_size);
5015 }
5016 break;
5017 }
5018 case CodePtrLocation::kCallArtMethod: {
5019 // LR = callee_method->ptr_sized_fields_.entry_point_from_quick_compiled_code_;
5020 MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
5021 __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
5022 // lr()
5023 call_lr();
5024 break;
5025 }
5026 }
5027
5028 DCHECK(!IsLeafMethod());
5029 }
5030
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5031 void CodeGeneratorARM64::GenerateVirtualCall(
5032 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5033 // Use the calling convention instead of the location of the receiver, as
5034 // intrinsics may have put the receiver in a different register. In the intrinsics
5035 // slow path, the arguments have been moved to the right place, so here we are
5036 // guaranteed that the receiver is the first register of the calling convention.
5037 InvokeDexCallingConvention calling_convention;
5038 Register receiver = calling_convention.GetRegisterAt(0);
5039 Register temp = XRegisterFrom(temp_in);
5040 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5041 invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
5042 Offset class_offset = mirror::Object::ClassOffset();
5043 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
5044
5045 DCHECK(receiver.IsRegister());
5046
5047 {
5048 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
5049 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5050 // /* HeapReference<Class> */ temp = receiver->klass_
5051 __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
5052 MaybeRecordImplicitNullCheck(invoke);
5053 }
5054 // Instead of simply (possibly) unpoisoning `temp` here, we should
5055 // emit a read barrier for the previous class reference load.
5056 // However this is not required in practice, as this is an
5057 // intermediate/temporary reference and because the current
5058 // concurrent copying collector keeps the from-space memory
5059 // intact/accessible until the end of the marking phase (the
5060 // concurrent copying collector may not in the future).
5061 GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
5062
5063 // If we're compiling baseline, update the inline cache.
5064 MaybeGenerateInlineCacheCheck(invoke, temp);
5065
5066 // temp = temp->GetMethodAt(method_offset);
5067 __ Ldr(temp, MemOperand(temp, method_offset));
5068 // lr = temp->GetEntryPoint();
5069 __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
5070 {
5071 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
5072 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
5073 // lr();
5074 __ blr(lr);
5075 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5076 }
5077 }
5078
MoveFromReturnRegister(Location trg,DataType::Type type)5079 void CodeGeneratorARM64::MoveFromReturnRegister(Location trg, DataType::Type type) {
5080 if (!trg.IsValid()) {
5081 DCHECK(type == DataType::Type::kVoid);
5082 return;
5083 }
5084
5085 DCHECK_NE(type, DataType::Type::kVoid);
5086
5087 if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
5088 Register trg_reg = RegisterFrom(trg, type);
5089 Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
5090 __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
5091 } else {
5092 VRegister trg_reg = FPRegisterFrom(trg, type);
5093 VRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
5094 __ Fmov(trg_reg, res_reg);
5095 }
5096 }
5097
VisitInvokePolymorphic(HInvokePolymorphic * invoke)5098 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
5099 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
5100 if (intrinsic.TryDispatch(invoke)) {
5101 return;
5102 }
5103 HandleInvoke(invoke);
5104 }
5105
VisitInvokePolymorphic(HInvokePolymorphic * invoke)5106 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
5107 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5108 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5109 return;
5110 }
5111 codegen_->GenerateInvokePolymorphicCall(invoke);
5112 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5113 }
5114
VisitInvokeCustom(HInvokeCustom * invoke)5115 void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
5116 HandleInvoke(invoke);
5117 }
5118
VisitInvokeCustom(HInvokeCustom * invoke)5119 void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
5120 codegen_->GenerateInvokeCustomCall(invoke);
5121 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5122 }
5123
NewBootImageIntrinsicPatch(uint32_t intrinsic_data,vixl::aarch64::Label * adrp_label)5124 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch(
5125 uint32_t intrinsic_data,
5126 vixl::aarch64::Label* adrp_label) {
5127 return NewPcRelativePatch(
5128 /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_other_patches_);
5129 }
5130
NewBootImageRelRoPatch(uint32_t boot_image_offset,vixl::aarch64::Label * adrp_label)5131 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
5132 uint32_t boot_image_offset,
5133 vixl::aarch64::Label* adrp_label) {
5134 return NewPcRelativePatch(
5135 /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_other_patches_);
5136 }
5137
NewBootImageMethodPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)5138 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
5139 MethodReference target_method,
5140 vixl::aarch64::Label* adrp_label) {
5141 return NewPcRelativePatch(
5142 target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_);
5143 }
5144
NewMethodBssEntryPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)5145 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
5146 MethodReference target_method,
5147 vixl::aarch64::Label* adrp_label) {
5148 return NewPcRelativePatch(
5149 target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_);
5150 }
5151
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)5152 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch(
5153 const DexFile& dex_file,
5154 dex::TypeIndex type_index,
5155 vixl::aarch64::Label* adrp_label) {
5156 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_);
5157 }
5158
NewAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)5159 vixl::aarch64::Label* CodeGeneratorARM64::NewAppImageTypePatch(
5160 const DexFile& dex_file,
5161 dex::TypeIndex type_index,
5162 vixl::aarch64::Label* adrp_label) {
5163 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &app_image_type_patches_);
5164 }
5165
NewBssEntryTypePatch(HLoadClass * load_class,vixl::aarch64::Label * adrp_label)5166 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
5167 HLoadClass* load_class,
5168 vixl::aarch64::Label* adrp_label) {
5169 const DexFile& dex_file = load_class->GetDexFile();
5170 dex::TypeIndex type_index = load_class->GetTypeIndex();
5171 ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
5172 switch (load_class->GetLoadKind()) {
5173 case HLoadClass::LoadKind::kBssEntry:
5174 patches = &type_bss_entry_patches_;
5175 break;
5176 case HLoadClass::LoadKind::kBssEntryPublic:
5177 patches = &public_type_bss_entry_patches_;
5178 break;
5179 case HLoadClass::LoadKind::kBssEntryPackage:
5180 patches = &package_type_bss_entry_patches_;
5181 break;
5182 default:
5183 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5184 UNREACHABLE();
5185 }
5186 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, patches);
5187 }
5188
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)5189 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch(
5190 const DexFile& dex_file,
5191 dex::StringIndex string_index,
5192 vixl::aarch64::Label* adrp_label) {
5193 return NewPcRelativePatch(
5194 &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_);
5195 }
5196
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)5197 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
5198 const DexFile& dex_file,
5199 dex::StringIndex string_index,
5200 vixl::aarch64::Label* adrp_label) {
5201 return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
5202 }
5203
NewBootImageJniEntrypointPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)5204 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageJniEntrypointPatch(
5205 MethodReference target_method,
5206 vixl::aarch64::Label* adrp_label) {
5207 return NewPcRelativePatch(
5208 target_method.dex_file, target_method.index, adrp_label, &boot_image_jni_entrypoint_patches_);
5209 }
5210
EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset)5211 void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) {
5212 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
5213 DCHECK(!GetCompilerOptions().IsJitCompiler());
5214 call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
5215 vixl::aarch64::Label* bl_label = &call_entrypoint_patches_.back().label;
5216 __ bind(bl_label);
5217 __ bl(static_cast<int64_t>(0)); // Placeholder, patched at link-time.
5218 }
5219
EmitBakerReadBarrierCbnz(uint32_t custom_data)5220 void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) {
5221 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
5222 if (GetCompilerOptions().IsJitCompiler()) {
5223 auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
5224 vixl::aarch64::Label* slow_path_entry = &it->second.label;
5225 __ cbnz(mr, slow_path_entry);
5226 } else {
5227 baker_read_barrier_patches_.emplace_back(custom_data);
5228 vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label;
5229 __ bind(cbnz_label);
5230 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
5231 }
5232 }
5233
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,vixl::aarch64::Label * adrp_label,ArenaDeque<PcRelativePatchInfo> * patches)5234 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
5235 const DexFile* dex_file,
5236 uint32_t offset_or_index,
5237 vixl::aarch64::Label* adrp_label,
5238 ArenaDeque<PcRelativePatchInfo>* patches) {
5239 // Add a patch entry and return the label.
5240 patches->emplace_back(dex_file, offset_or_index);
5241 PcRelativePatchInfo* info = &patches->back();
5242 vixl::aarch64::Label* label = &info->label;
5243 // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
5244 info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
5245 return label;
5246 }
5247
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)5248 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
5249 jit_patches_.EmitJitRootPatches(code, roots_data, *GetCodeGenerationData());
5250 }
5251
EmitAdrpPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register reg)5252 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
5253 vixl::aarch64::Register reg) {
5254 DCHECK(reg.IsX());
5255 SingleEmissionCheckScope guard(GetVIXLAssembler());
5256 __ Bind(fixup_label);
5257 __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
5258 }
5259
EmitAddPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)5260 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
5261 vixl::aarch64::Register out,
5262 vixl::aarch64::Register base) {
5263 DCHECK(out.IsX());
5264 DCHECK(base.IsX());
5265 SingleEmissionCheckScope guard(GetVIXLAssembler());
5266 __ Bind(fixup_label);
5267 __ add(out, base, Operand(/* offset placeholder */ 0));
5268 }
5269
EmitLdrOffsetPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)5270 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
5271 vixl::aarch64::Register out,
5272 vixl::aarch64::Register base) {
5273 DCHECK(base.IsX());
5274 SingleEmissionCheckScope guard(GetVIXLAssembler());
5275 __ Bind(fixup_label);
5276 __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
5277 }
5278
LoadBootImageRelRoEntry(vixl::aarch64::Register reg,uint32_t boot_image_offset)5279 void CodeGeneratorARM64::LoadBootImageRelRoEntry(vixl::aarch64::Register reg,
5280 uint32_t boot_image_offset) {
5281 DCHECK(reg.IsW());
5282 // Add ADRP with its PC-relative boot image .data.img.rel.ro patch.
5283 vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
5284 EmitAdrpPlaceholder(adrp_label, reg.X());
5285 // Add LDR with its PC-relative boot image .data.img.rel.ro patch.
5286 vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
5287 EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X());
5288 }
5289
LoadBootImageAddress(vixl::aarch64::Register reg,uint32_t boot_image_reference)5290 void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
5291 uint32_t boot_image_reference) {
5292 if (GetCompilerOptions().IsBootImage()) {
5293 // Add ADRP with its PC-relative type patch.
5294 vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference);
5295 EmitAdrpPlaceholder(adrp_label, reg.X());
5296 // Add ADD with its PC-relative type patch.
5297 vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label);
5298 EmitAddPlaceholder(add_label, reg.X(), reg.X());
5299 } else if (GetCompilerOptions().GetCompilePic()) {
5300 LoadBootImageRelRoEntry(reg, boot_image_reference);
5301 } else {
5302 DCHECK(GetCompilerOptions().IsJitCompiler());
5303 gc::Heap* heap = Runtime::Current()->GetHeap();
5304 DCHECK(!heap->GetBootImageSpaces().empty());
5305 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5306 __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address)));
5307 }
5308 }
5309
LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,TypeReference target_type)5310 void CodeGeneratorARM64::LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,
5311 TypeReference target_type) {
5312 // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5313 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5314 // Add ADRP with its PC-relative type patch.
5315 vixl::aarch64::Label* adrp_label =
5316 NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
5317 EmitAdrpPlaceholder(adrp_label, reg.X());
5318 // Add ADD with its PC-relative type patch.
5319 vixl::aarch64::Label* add_label =
5320 NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex(), adrp_label);
5321 EmitAddPlaceholder(add_label, reg.X(), reg.X());
5322 }
5323
LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg,HInvoke * invoke)5324 void CodeGeneratorARM64::LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg, HInvoke* invoke) {
5325 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5326 if (GetCompilerOptions().IsBootImage()) {
5327 MethodReference target_method = invoke->GetResolvedMethodReference();
5328 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5329 LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
5330 } else {
5331 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5332 LoadBootImageAddress(reg, boot_image_offset);
5333 }
5334 }
5335
LoadClassRootForIntrinsic(vixl::aarch64::Register reg,ClassRoot class_root)5336 void CodeGeneratorARM64::LoadClassRootForIntrinsic(vixl::aarch64::Register reg,
5337 ClassRoot class_root) {
5338 if (GetCompilerOptions().IsBootImage()) {
5339 ScopedObjectAccess soa(Thread::Current());
5340 ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
5341 TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
5342 LoadTypeForBootImageIntrinsic(reg, target_type);
5343 } else {
5344 uint32_t boot_image_offset = GetBootImageOffset(class_root);
5345 LoadBootImageAddress(reg, boot_image_offset);
5346 }
5347 }
5348
5349 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5350 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
5351 const ArenaDeque<PcRelativePatchInfo>& infos,
5352 ArenaVector<linker::LinkerPatch>* linker_patches) {
5353 for (const PcRelativePatchInfo& info : infos) {
5354 linker_patches->push_back(Factory(info.label.GetLocation(),
5355 info.target_dex_file,
5356 info.pc_insn_label->GetLocation(),
5357 info.offset_or_index));
5358 }
5359 }
5360
5361 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5362 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5363 const DexFile* target_dex_file,
5364 uint32_t pc_insn_offset,
5365 uint32_t boot_image_offset) {
5366 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
5367 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5368 }
5369
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5370 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5371 DCHECK(linker_patches->empty());
5372 size_t size =
5373 boot_image_method_patches_.size() +
5374 method_bss_entry_patches_.size() +
5375 boot_image_type_patches_.size() +
5376 app_image_type_patches_.size() +
5377 type_bss_entry_patches_.size() +
5378 public_type_bss_entry_patches_.size() +
5379 package_type_bss_entry_patches_.size() +
5380 boot_image_string_patches_.size() +
5381 string_bss_entry_patches_.size() +
5382 boot_image_jni_entrypoint_patches_.size() +
5383 boot_image_other_patches_.size() +
5384 call_entrypoint_patches_.size() +
5385 baker_read_barrier_patches_.size();
5386 linker_patches->reserve(size);
5387 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5388 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5389 boot_image_method_patches_, linker_patches);
5390 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5391 boot_image_type_patches_, linker_patches);
5392 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5393 boot_image_string_patches_, linker_patches);
5394 } else {
5395 DCHECK(boot_image_method_patches_.empty());
5396 DCHECK(boot_image_type_patches_.empty());
5397 DCHECK(boot_image_string_patches_.empty());
5398 }
5399 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
5400 if (GetCompilerOptions().IsBootImage()) {
5401 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5402 boot_image_other_patches_, linker_patches);
5403 } else {
5404 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
5405 boot_image_other_patches_, linker_patches);
5406 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
5407 app_image_type_patches_, linker_patches);
5408 }
5409 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5410 method_bss_entry_patches_, linker_patches);
5411 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5412 type_bss_entry_patches_, linker_patches);
5413 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5414 public_type_bss_entry_patches_, linker_patches);
5415 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5416 package_type_bss_entry_patches_, linker_patches);
5417 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5418 string_bss_entry_patches_, linker_patches);
5419 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5420 boot_image_jni_entrypoint_patches_, linker_patches);
5421 for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) {
5422 DCHECK(info.target_dex_file == nullptr);
5423 linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
5424 info.label.GetLocation(), info.offset_or_index));
5425 }
5426 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
5427 linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
5428 info.label.GetLocation(), info.custom_data));
5429 }
5430 DCHECK_EQ(size, linker_patches->size());
5431 }
5432
NeedsThunkCode(const linker::LinkerPatch & patch) const5433 bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
5434 return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
5435 patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
5436 patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
5437 }
5438
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)5439 void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
5440 /*out*/ ArenaVector<uint8_t>* code,
5441 /*out*/ std::string* debug_name) {
5442 Arm64Assembler assembler(GetGraph()->GetAllocator());
5443 switch (patch.GetType()) {
5444 case linker::LinkerPatch::Type::kCallRelative: {
5445 // The thunk just uses the entry point in the ArtMethod. This works even for calls
5446 // to the generic JNI and interpreter trampolines.
5447 Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5448 kArm64PointerSize).Int32Value());
5449 assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
5450 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
5451 *debug_name = "MethodCallThunk";
5452 }
5453 break;
5454 }
5455 case linker::LinkerPatch::Type::kCallEntrypoint: {
5456 Offset offset(patch.EntrypointOffset());
5457 assembler.JumpTo(ManagedRegister(arm64::TR), offset, ManagedRegister(arm64::IP0));
5458 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
5459 *debug_name = "EntrypointCallThunk_" + std::to_string(offset.Uint32Value());
5460 }
5461 break;
5462 }
5463 case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
5464 DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
5465 CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
5466 break;
5467 }
5468 default:
5469 LOG(FATAL) << "Unexpected patch type " << patch.GetType();
5470 UNREACHABLE();
5471 }
5472
5473 // Ensure we emit the literal pool if any.
5474 assembler.FinalizeCode();
5475 code->resize(assembler.CodeSize());
5476 MemoryRegion code_region(code->data(), code->size());
5477 assembler.CopyInstructions(code_region);
5478 }
5479
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)5480 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
5481 // Explicit clinit checks triggered by static invokes must have been pruned by
5482 // art::PrepareForRegisterAllocation.
5483 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
5484
5485 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5486 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5487 return;
5488 }
5489
5490 LocationSummary* locations = invoke->GetLocations();
5491 codegen_->GenerateStaticOrDirectCall(
5492 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
5493
5494 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5495 }
5496
VisitInvokeVirtual(HInvokeVirtual * invoke)5497 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
5498 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5499 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5500 return;
5501 }
5502
5503 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
5504 DCHECK(!codegen_->IsLeafMethod());
5505
5506 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5507 }
5508
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5509 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
5510 HLoadClass::LoadKind desired_class_load_kind) {
5511 switch (desired_class_load_kind) {
5512 case HLoadClass::LoadKind::kInvalid:
5513 LOG(FATAL) << "UNREACHABLE";
5514 UNREACHABLE();
5515 case HLoadClass::LoadKind::kReferrersClass:
5516 break;
5517 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5518 case HLoadClass::LoadKind::kBootImageRelRo:
5519 case HLoadClass::LoadKind::kAppImageRelRo:
5520 case HLoadClass::LoadKind::kBssEntry:
5521 case HLoadClass::LoadKind::kBssEntryPublic:
5522 case HLoadClass::LoadKind::kBssEntryPackage:
5523 DCHECK(!GetCompilerOptions().IsJitCompiler());
5524 break;
5525 case HLoadClass::LoadKind::kJitBootImageAddress:
5526 case HLoadClass::LoadKind::kJitTableAddress:
5527 DCHECK(GetCompilerOptions().IsJitCompiler());
5528 break;
5529 case HLoadClass::LoadKind::kRuntimeCall:
5530 break;
5531 }
5532 return desired_class_load_kind;
5533 }
5534
VisitLoadClass(HLoadClass * cls)5535 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
5536 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5537 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5538 InvokeRuntimeCallingConvention calling_convention;
5539 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5540 cls,
5541 LocationFrom(calling_convention.GetRegisterAt(0)),
5542 LocationFrom(vixl::aarch64::x0));
5543 DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
5544 return;
5545 }
5546 DCHECK_EQ(cls->NeedsAccessCheck(),
5547 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5548 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
5549
5550 const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
5551 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5552 ? LocationSummary::kCallOnSlowPath
5553 : LocationSummary::kNoCall;
5554 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5555 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5556 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5557 }
5558
5559 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5560 locations->SetInAt(0, Location::RequiresRegister());
5561 }
5562 locations->SetOut(Location::RequiresRegister());
5563 if (load_kind == HLoadClass::LoadKind::kBssEntry ||
5564 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5565 load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
5566 if (codegen_->EmitNonBakerReadBarrier()) {
5567 // For non-Baker read barrier we have a temp-clobbering call.
5568 } else {
5569 // Rely on the type resolution or initialization and marking to save everything we need.
5570 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5571 }
5572 }
5573 }
5574
5575 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5576 // move.
VisitLoadClass(HLoadClass * cls)5577 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5578 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5579 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5580 codegen_->GenerateLoadClassRuntimeCall(cls);
5581 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5582 return;
5583 }
5584 DCHECK_EQ(cls->NeedsAccessCheck(),
5585 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5586 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
5587
5588 Location out_loc = cls->GetLocations()->Out();
5589 Register out = OutputRegister(cls);
5590
5591 const ReadBarrierOption read_barrier_option =
5592 cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
5593 bool generate_null_check = false;
5594 switch (load_kind) {
5595 case HLoadClass::LoadKind::kReferrersClass: {
5596 DCHECK(!cls->CanCallRuntime());
5597 DCHECK(!cls->MustGenerateClinitCheck());
5598 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5599 Register current_method = InputRegisterAt(cls, 0);
5600 codegen_->GenerateGcRootFieldLoad(cls,
5601 out_loc,
5602 current_method,
5603 ArtMethod::DeclaringClassOffset().Int32Value(),
5604 /* fixup_label= */ nullptr,
5605 read_barrier_option);
5606 break;
5607 }
5608 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
5609 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
5610 codegen_->GetCompilerOptions().IsBootImageExtension());
5611 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5612 // Add ADRP with its PC-relative type patch.
5613 const DexFile& dex_file = cls->GetDexFile();
5614 dex::TypeIndex type_index = cls->GetTypeIndex();
5615 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
5616 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5617 // Add ADD with its PC-relative type patch.
5618 vixl::aarch64::Label* add_label =
5619 codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
5620 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5621 break;
5622 }
5623 case HLoadClass::LoadKind::kBootImageRelRo: {
5624 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5625 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
5626 codegen_->LoadBootImageRelRoEntry(out.W(), boot_image_offset);
5627 break;
5628 }
5629 case HLoadClass::LoadKind::kAppImageRelRo: {
5630 DCHECK(codegen_->GetCompilerOptions().IsAppImage());
5631 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5632 // Add ADRP with its PC-relative type patch.
5633 const DexFile& dex_file = cls->GetDexFile();
5634 dex::TypeIndex type_index = cls->GetTypeIndex();
5635 vixl::aarch64::Label* adrp_label = codegen_->NewAppImageTypePatch(dex_file, type_index);
5636 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5637 // Add LDR with its PC-relative type patch.
5638 vixl::aarch64::Label* ldr_label =
5639 codegen_->NewAppImageTypePatch(dex_file, type_index, adrp_label);
5640 codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
5641 break;
5642 }
5643 case HLoadClass::LoadKind::kBssEntry:
5644 case HLoadClass::LoadKind::kBssEntryPublic:
5645 case HLoadClass::LoadKind::kBssEntryPackage: {
5646 // Add ADRP with its PC-relative Class .bss entry patch.
5647 vixl::aarch64::Register temp = XRegisterFrom(out_loc);
5648 vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(cls);
5649 codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5650 // Add LDR with its PC-relative Class .bss entry patch.
5651 vixl::aarch64::Label* ldr_label = codegen_->NewBssEntryTypePatch(cls, adrp_label);
5652 // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
5653 // All aligned loads are implicitly atomic consume operations on ARM64.
5654 codegen_->GenerateGcRootFieldLoad(cls,
5655 out_loc,
5656 temp,
5657 /* offset placeholder */ 0u,
5658 ldr_label,
5659 read_barrier_option);
5660 generate_null_check = true;
5661 break;
5662 }
5663 case HLoadClass::LoadKind::kJitBootImageAddress: {
5664 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5665 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
5666 DCHECK_NE(address, 0u);
5667 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5668 break;
5669 }
5670 case HLoadClass::LoadKind::kJitTableAddress: {
5671 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
5672 cls->GetTypeIndex(),
5673 cls->GetClass()));
5674 codegen_->GenerateGcRootFieldLoad(cls,
5675 out_loc,
5676 out.X(),
5677 /* offset= */ 0,
5678 /* fixup_label= */ nullptr,
5679 read_barrier_option);
5680 break;
5681 }
5682 case HLoadClass::LoadKind::kRuntimeCall:
5683 case HLoadClass::LoadKind::kInvalid:
5684 LOG(FATAL) << "UNREACHABLE";
5685 UNREACHABLE();
5686 }
5687
5688 bool do_clinit = cls->MustGenerateClinitCheck();
5689 if (generate_null_check || do_clinit) {
5690 DCHECK(cls->CanCallRuntime());
5691 SlowPathCodeARM64* slow_path =
5692 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls);
5693 codegen_->AddSlowPath(slow_path);
5694 if (generate_null_check) {
5695 __ Cbz(out, slow_path->GetEntryLabel());
5696 }
5697 if (cls->MustGenerateClinitCheck()) {
5698 GenerateClassInitializationCheck(slow_path, out);
5699 } else {
5700 __ Bind(slow_path->GetExitLabel());
5701 }
5702 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5703 }
5704 }
5705
VisitLoadMethodHandle(HLoadMethodHandle * load)5706 void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5707 InvokeRuntimeCallingConvention calling_convention;
5708 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5709 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
5710 }
5711
VisitLoadMethodHandle(HLoadMethodHandle * load)5712 void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5713 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
5714 }
5715
VisitLoadMethodType(HLoadMethodType * load)5716 void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) {
5717 InvokeRuntimeCallingConvention calling_convention;
5718 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5719 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
5720 }
5721
VisitLoadMethodType(HLoadMethodType * load)5722 void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) {
5723 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
5724 }
5725
GetExceptionTlsAddress()5726 static MemOperand GetExceptionTlsAddress() {
5727 return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
5728 }
5729
VisitLoadException(HLoadException * load)5730 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
5731 LocationSummary* locations =
5732 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
5733 locations->SetOut(Location::RequiresRegister());
5734 }
5735
VisitLoadException(HLoadException * instruction)5736 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
5737 __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
5738 }
5739
VisitClearException(HClearException * clear)5740 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
5741 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
5742 }
5743
VisitClearException(HClearException * clear)5744 void InstructionCodeGeneratorARM64::VisitClearException([[maybe_unused]] HClearException* clear) {
5745 __ Str(wzr, GetExceptionTlsAddress());
5746 }
5747
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5748 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
5749 HLoadString::LoadKind desired_string_load_kind) {
5750 switch (desired_string_load_kind) {
5751 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5752 case HLoadString::LoadKind::kBootImageRelRo:
5753 case HLoadString::LoadKind::kBssEntry:
5754 DCHECK(!GetCompilerOptions().IsJitCompiler());
5755 break;
5756 case HLoadString::LoadKind::kJitBootImageAddress:
5757 case HLoadString::LoadKind::kJitTableAddress:
5758 DCHECK(GetCompilerOptions().IsJitCompiler());
5759 break;
5760 case HLoadString::LoadKind::kRuntimeCall:
5761 break;
5762 }
5763 return desired_string_load_kind;
5764 }
5765
VisitLoadString(HLoadString * load)5766 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
5767 LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
5768 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
5769 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
5770 InvokeRuntimeCallingConvention calling_convention;
5771 locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
5772 } else {
5773 locations->SetOut(Location::RequiresRegister());
5774 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5775 if (codegen_->EmitNonBakerReadBarrier()) {
5776 // For non-Baker read barrier we have a temp-clobbering call.
5777 } else {
5778 // Rely on the pResolveString and marking to save everything we need.
5779 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5780 }
5781 }
5782 }
5783 }
5784
5785 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5786 // move.
VisitLoadString(HLoadString * load)5787 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5788 Register out = OutputRegister(load);
5789 Location out_loc = load->GetLocations()->Out();
5790
5791 switch (load->GetLoadKind()) {
5792 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5793 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
5794 codegen_->GetCompilerOptions().IsBootImageExtension());
5795 // Add ADRP with its PC-relative String patch.
5796 const DexFile& dex_file = load->GetDexFile();
5797 const dex::StringIndex string_index = load->GetStringIndex();
5798 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
5799 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5800 // Add ADD with its PC-relative String patch.
5801 vixl::aarch64::Label* add_label =
5802 codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
5803 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5804 return;
5805 }
5806 case HLoadString::LoadKind::kBootImageRelRo: {
5807 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5808 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
5809 codegen_->LoadBootImageRelRoEntry(out.W(), boot_image_offset);
5810 return;
5811 }
5812 case HLoadString::LoadKind::kBssEntry: {
5813 // Add ADRP with its PC-relative String .bss entry patch.
5814 const DexFile& dex_file = load->GetDexFile();
5815 const dex::StringIndex string_index = load->GetStringIndex();
5816 Register temp = XRegisterFrom(out_loc);
5817 vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
5818 codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5819 // Add LDR with its PC-relative String .bss entry patch.
5820 vixl::aarch64::Label* ldr_label =
5821 codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
5822 // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
5823 // All aligned loads are implicitly atomic consume operations on ARM64.
5824 codegen_->GenerateGcRootFieldLoad(load,
5825 out_loc,
5826 temp,
5827 /* offset placeholder */ 0u,
5828 ldr_label,
5829 codegen_->GetCompilerReadBarrierOption());
5830 SlowPathCodeARM64* slow_path =
5831 new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
5832 codegen_->AddSlowPath(slow_path);
5833 __ Cbz(out.X(), slow_path->GetEntryLabel());
5834 __ Bind(slow_path->GetExitLabel());
5835 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5836 return;
5837 }
5838 case HLoadString::LoadKind::kJitBootImageAddress: {
5839 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
5840 DCHECK_NE(address, 0u);
5841 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5842 return;
5843 }
5844 case HLoadString::LoadKind::kJitTableAddress: {
5845 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
5846 load->GetStringIndex(),
5847 load->GetString()));
5848 codegen_->GenerateGcRootFieldLoad(load,
5849 out_loc,
5850 out.X(),
5851 /* offset= */ 0,
5852 /* fixup_label= */ nullptr,
5853 codegen_->GetCompilerReadBarrierOption());
5854 return;
5855 }
5856 default:
5857 break;
5858 }
5859
5860 InvokeRuntimeCallingConvention calling_convention;
5861 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
5862 __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
5863 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
5864 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5865 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5866 }
5867
VisitLongConstant(HLongConstant * constant)5868 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
5869 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
5870 locations->SetOut(Location::ConstantLocation(constant));
5871 }
5872
VisitLongConstant(HLongConstant * constant)5873 void InstructionCodeGeneratorARM64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
5874 // Will be generated at use site.
5875 }
5876
VisitMonitorOperation(HMonitorOperation * instruction)5877 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5878 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5879 instruction, LocationSummary::kCallOnMainOnly);
5880 InvokeRuntimeCallingConvention calling_convention;
5881 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5882 }
5883
VisitMonitorOperation(HMonitorOperation * instruction)5884 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5885 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
5886 instruction,
5887 instruction->GetDexPc());
5888 if (instruction->IsEnter()) {
5889 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
5890 } else {
5891 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
5892 }
5893 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5894 }
5895
VisitMul(HMul * mul)5896 void LocationsBuilderARM64::VisitMul(HMul* mul) {
5897 LocationSummary* locations =
5898 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
5899 switch (mul->GetResultType()) {
5900 case DataType::Type::kInt32:
5901 case DataType::Type::kInt64:
5902 locations->SetInAt(0, Location::RequiresRegister());
5903 locations->SetInAt(1, Location::RequiresRegister());
5904 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5905 break;
5906
5907 case DataType::Type::kFloat32:
5908 case DataType::Type::kFloat64:
5909 locations->SetInAt(0, Location::RequiresFpuRegister());
5910 locations->SetInAt(1, Location::RequiresFpuRegister());
5911 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5912 break;
5913
5914 default:
5915 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5916 }
5917 }
5918
VisitMul(HMul * mul)5919 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
5920 switch (mul->GetResultType()) {
5921 case DataType::Type::kInt32:
5922 case DataType::Type::kInt64:
5923 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
5924 break;
5925
5926 case DataType::Type::kFloat32:
5927 case DataType::Type::kFloat64:
5928 __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
5929 break;
5930
5931 default:
5932 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5933 }
5934 }
5935
VisitNeg(HNeg * neg)5936 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
5937 LocationSummary* locations =
5938 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
5939 switch (neg->GetResultType()) {
5940 case DataType::Type::kInt32:
5941 case DataType::Type::kInt64:
5942 locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
5943 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5944 break;
5945
5946 case DataType::Type::kFloat32:
5947 case DataType::Type::kFloat64:
5948 locations->SetInAt(0, Location::RequiresFpuRegister());
5949 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5950 break;
5951
5952 default:
5953 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5954 }
5955 }
5956
VisitNeg(HNeg * neg)5957 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
5958 switch (neg->GetResultType()) {
5959 case DataType::Type::kInt32:
5960 case DataType::Type::kInt64:
5961 __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
5962 break;
5963
5964 case DataType::Type::kFloat32:
5965 case DataType::Type::kFloat64:
5966 __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
5967 break;
5968
5969 default:
5970 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5971 }
5972 }
5973
VisitNewArray(HNewArray * instruction)5974 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5975 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5976 instruction, LocationSummary::kCallOnMainOnly);
5977 InvokeRuntimeCallingConvention calling_convention;
5978 locations->SetOut(LocationFrom(x0));
5979 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5980 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5981 }
5982
VisitNewArray(HNewArray * instruction)5983 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5984 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5985 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5986 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5987 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5988 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5989 }
5990
VisitNewInstance(HNewInstance * instruction)5991 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5992 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5993 instruction, LocationSummary::kCallOnMainOnly);
5994 InvokeRuntimeCallingConvention calling_convention;
5995 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5996 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
5997 }
5998
VisitNewInstance(HNewInstance * instruction)5999 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
6000 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
6001 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
6002 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6003 }
6004
VisitNot(HNot * instruction)6005 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
6006 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6007 locations->SetInAt(0, Location::RequiresRegister());
6008 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6009 }
6010
VisitNot(HNot * instruction)6011 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
6012 switch (instruction->GetResultType()) {
6013 case DataType::Type::kInt32:
6014 case DataType::Type::kInt64:
6015 __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
6016 break;
6017
6018 default:
6019 LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
6020 }
6021 }
6022
VisitBooleanNot(HBooleanNot * instruction)6023 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
6024 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6025 locations->SetInAt(0, Location::RequiresRegister());
6026 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6027 }
6028
VisitBooleanNot(HBooleanNot * instruction)6029 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
6030 __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
6031 }
6032
VisitNullCheck(HNullCheck * instruction)6033 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
6034 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6035 locations->SetInAt(0, Location::RequiresRegister());
6036 }
6037
GenerateImplicitNullCheck(HNullCheck * instruction)6038 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
6039 if (CanMoveNullCheckToUser(instruction)) {
6040 return;
6041 }
6042 {
6043 // Ensure that between load and RecordPcInfo there are no pools emitted.
6044 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6045 Location obj = instruction->GetLocations()->InAt(0);
6046 __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
6047 RecordPcInfo(instruction, instruction->GetDexPc());
6048 }
6049 }
6050
GenerateExplicitNullCheck(HNullCheck * instruction)6051 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
6052 SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction);
6053 AddSlowPath(slow_path);
6054
6055 LocationSummary* locations = instruction->GetLocations();
6056 Location obj = locations->InAt(0);
6057
6058 __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
6059 }
6060
VisitNullCheck(HNullCheck * instruction)6061 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
6062 codegen_->GenerateNullCheck(instruction);
6063 }
6064
VisitOr(HOr * instruction)6065 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
6066 HandleBinaryOp(instruction);
6067 }
6068
VisitOr(HOr * instruction)6069 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
6070 HandleBinaryOp(instruction);
6071 }
6072
VisitParallelMove(HParallelMove * instruction)6073 void LocationsBuilderARM64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6074 LOG(FATAL) << "Unreachable";
6075 }
6076
VisitParallelMove(HParallelMove * instruction)6077 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
6078 if (instruction->GetNext()->IsSuspendCheck() &&
6079 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6080 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6081 // The back edge will generate the suspend check.
6082 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6083 }
6084
6085 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6086 }
6087
VisitParameterValue(HParameterValue * instruction)6088 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
6089 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6090 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
6091 if (location.IsStackSlot()) {
6092 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
6093 } else if (location.IsDoubleStackSlot()) {
6094 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
6095 }
6096 locations->SetOut(location);
6097 }
6098
VisitParameterValue(HParameterValue * instruction)6099 void InstructionCodeGeneratorARM64::VisitParameterValue(
6100 [[maybe_unused]] HParameterValue* instruction) {
6101 // Nothing to do, the parameter is already at its location.
6102 }
6103
VisitCurrentMethod(HCurrentMethod * instruction)6104 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
6105 LocationSummary* locations =
6106 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6107 locations->SetOut(LocationFrom(kArtMethodRegister));
6108 }
6109
VisitCurrentMethod(HCurrentMethod * instruction)6110 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
6111 [[maybe_unused]] HCurrentMethod* instruction) {
6112 // Nothing to do, the method is already at its location.
6113 }
6114
VisitPhi(HPhi * instruction)6115 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
6116 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6117 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
6118 locations->SetInAt(i, Location::Any());
6119 }
6120 locations->SetOut(Location::Any());
6121 }
6122
VisitPhi(HPhi * instruction)6123 void InstructionCodeGeneratorARM64::VisitPhi([[maybe_unused]] HPhi* instruction) {
6124 LOG(FATAL) << "Unreachable";
6125 }
6126
VisitRem(HRem * rem)6127 void LocationsBuilderARM64::VisitRem(HRem* rem) {
6128 DataType::Type type = rem->GetResultType();
6129 LocationSummary::CallKind call_kind =
6130 DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
6131 : LocationSummary::kNoCall;
6132 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
6133
6134 switch (type) {
6135 case DataType::Type::kInt32:
6136 case DataType::Type::kInt64:
6137 locations->SetInAt(0, Location::RequiresRegister());
6138 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
6139 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6140 break;
6141
6142 case DataType::Type::kFloat32:
6143 case DataType::Type::kFloat64: {
6144 InvokeRuntimeCallingConvention calling_convention;
6145 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
6146 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
6147 locations->SetOut(calling_convention.GetReturnLocation(type));
6148
6149 break;
6150 }
6151
6152 default:
6153 LOG(FATAL) << "Unexpected rem type " << type;
6154 }
6155 }
6156
GenerateIntRemForPower2Denom(HRem * instruction)6157 void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) {
6158 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
6159 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
6160 DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
6161
6162 Register out = OutputRegister(instruction);
6163 Register dividend = InputRegisterAt(instruction, 0);
6164
6165 if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
6166 // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
6167 // NOTE: The generated code for HRem correctly works for the INT32_MIN/INT64_MIN dividends.
6168 // INT*_MIN % imm must be 0 for any imm of power 2. 'and' works only with bits
6169 // 0..30 (Int32 case)/0..62 (Int64 case) of a dividend. For INT32_MIN/INT64_MIN they are zeros.
6170 // So 'and' always produces zero.
6171 __ And(out, dividend, abs_imm - 1);
6172 } else {
6173 if (abs_imm == 2) {
6174 __ Cmp(dividend, 0);
6175 __ And(out, dividend, 1);
6176 __ Csneg(out, out, out, ge);
6177 } else {
6178 UseScratchRegisterScope temps(GetVIXLAssembler());
6179 Register temp = temps.AcquireSameSizeAs(out);
6180
6181 __ Negs(temp, dividend);
6182 __ And(out, dividend, abs_imm - 1);
6183 __ And(temp, temp, abs_imm - 1);
6184 __ Csneg(out, out, temp, mi);
6185 }
6186 }
6187 }
6188
GenerateIntRemForConstDenom(HRem * instruction)6189 void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) {
6190 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
6191
6192 if (imm == 0) {
6193 // Do not generate anything.
6194 // DivZeroCheck would prevent any code to be executed.
6195 return;
6196 }
6197
6198 if (IsPowerOfTwo(AbsOrMin(imm))) {
6199 // Cases imm == -1 or imm == 1 are handled in constant folding by
6200 // InstructionWithAbsorbingInputSimplifier.
6201 // If the cases have survided till code generation they are handled in
6202 // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0).
6203 // The correct code is generated for them, just more instructions.
6204 GenerateIntRemForPower2Denom(instruction);
6205 } else {
6206 DCHECK(imm < -2 || imm > 2) << imm;
6207 GenerateDivRemWithAnyConstant(instruction, imm);
6208 }
6209 }
6210
GenerateIntRem(HRem * instruction)6211 void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) {
6212 DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
6213 << instruction->GetResultType();
6214
6215 if (instruction->GetLocations()->InAt(1).IsConstant()) {
6216 GenerateIntRemForConstDenom(instruction);
6217 } else {
6218 Register out = OutputRegister(instruction);
6219 Register dividend = InputRegisterAt(instruction, 0);
6220 Register divisor = InputRegisterAt(instruction, 1);
6221 UseScratchRegisterScope temps(GetVIXLAssembler());
6222 Register temp = temps.AcquireSameSizeAs(out);
6223 __ Sdiv(temp, dividend, divisor);
6224 __ Msub(out, temp, divisor, dividend);
6225 }
6226 }
6227
VisitRem(HRem * rem)6228 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
6229 DataType::Type type = rem->GetResultType();
6230
6231 switch (type) {
6232 case DataType::Type::kInt32:
6233 case DataType::Type::kInt64: {
6234 GenerateIntRem(rem);
6235 break;
6236 }
6237
6238 case DataType::Type::kFloat32:
6239 case DataType::Type::kFloat64: {
6240 QuickEntrypointEnum entrypoint =
6241 (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
6242 codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
6243 if (type == DataType::Type::kFloat32) {
6244 CheckEntrypointTypes<kQuickFmodf, float, float, float>();
6245 } else {
6246 CheckEntrypointTypes<kQuickFmod, double, double, double>();
6247 }
6248 break;
6249 }
6250
6251 default:
6252 LOG(FATAL) << "Unexpected rem type " << type;
6253 UNREACHABLE();
6254 }
6255 }
6256
VisitMin(HMin * min)6257 void LocationsBuilderARM64::VisitMin(HMin* min) {
6258 HandleBinaryOp(min);
6259 }
6260
VisitMin(HMin * min)6261 void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
6262 HandleBinaryOp(min);
6263 }
6264
VisitMax(HMax * max)6265 void LocationsBuilderARM64::VisitMax(HMax* max) {
6266 HandleBinaryOp(max);
6267 }
6268
VisitMax(HMax * max)6269 void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
6270 HandleBinaryOp(max);
6271 }
6272
VisitAbs(HAbs * abs)6273 void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
6274 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
6275 switch (abs->GetResultType()) {
6276 case DataType::Type::kInt32:
6277 case DataType::Type::kInt64:
6278 locations->SetInAt(0, Location::RequiresRegister());
6279 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6280 break;
6281 case DataType::Type::kFloat32:
6282 case DataType::Type::kFloat64:
6283 locations->SetInAt(0, Location::RequiresFpuRegister());
6284 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6285 break;
6286 default:
6287 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
6288 }
6289 }
6290
VisitAbs(HAbs * abs)6291 void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) {
6292 switch (abs->GetResultType()) {
6293 case DataType::Type::kInt32:
6294 case DataType::Type::kInt64: {
6295 Register in_reg = InputRegisterAt(abs, 0);
6296 Register out_reg = OutputRegister(abs);
6297 __ Cmp(in_reg, Operand(0));
6298 __ Cneg(out_reg, in_reg, lt);
6299 break;
6300 }
6301 case DataType::Type::kFloat32:
6302 case DataType::Type::kFloat64: {
6303 VRegister in_reg = InputFPRegisterAt(abs, 0);
6304 VRegister out_reg = OutputFPRegister(abs);
6305 __ Fabs(out_reg, in_reg);
6306 break;
6307 }
6308 default:
6309 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
6310 }
6311 }
6312
VisitConstructorFence(HConstructorFence * constructor_fence)6313 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
6314 constructor_fence->SetLocations(nullptr);
6315 }
6316
VisitConstructorFence(HConstructorFence * constructor_fence)6317 void InstructionCodeGeneratorARM64::VisitConstructorFence(
6318 [[maybe_unused]] HConstructorFence* constructor_fence) {
6319 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
6320 }
6321
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)6322 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
6323 memory_barrier->SetLocations(nullptr);
6324 }
6325
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)6326 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
6327 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
6328 }
6329
VisitReturn(HReturn * instruction)6330 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
6331 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6332 DataType::Type return_type = instruction->InputAt(0)->GetType();
6333 locations->SetInAt(0, ARM64ReturnLocation(return_type));
6334 }
6335
VisitReturn(HReturn * ret)6336 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* ret) {
6337 if (GetGraph()->IsCompilingOsr()) {
6338 // To simplify callers of an OSR method, we put the return value in both
6339 // floating point and core register.
6340 switch (ret->InputAt(0)->GetType()) {
6341 case DataType::Type::kFloat32:
6342 __ Fmov(w0, s0);
6343 break;
6344 case DataType::Type::kFloat64:
6345 __ Fmov(x0, d0);
6346 break;
6347 default:
6348 break;
6349 }
6350 }
6351 codegen_->GenerateFrameExit();
6352 }
6353
VisitReturnVoid(HReturnVoid * instruction)6354 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
6355 instruction->SetLocations(nullptr);
6356 }
6357
VisitReturnVoid(HReturnVoid * instruction)6358 void InstructionCodeGeneratorARM64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) {
6359 codegen_->GenerateFrameExit();
6360 }
6361
VisitRor(HRor * ror)6362 void LocationsBuilderARM64::VisitRor(HRor* ror) {
6363 HandleBinaryOp(ror);
6364 }
6365
VisitRor(HRor * ror)6366 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
6367 HandleBinaryOp(ror);
6368 }
6369
VisitShl(HShl * shl)6370 void LocationsBuilderARM64::VisitShl(HShl* shl) {
6371 HandleShift(shl);
6372 }
6373
VisitShl(HShl * shl)6374 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
6375 HandleShift(shl);
6376 }
6377
VisitShr(HShr * shr)6378 void LocationsBuilderARM64::VisitShr(HShr* shr) {
6379 HandleShift(shr);
6380 }
6381
VisitShr(HShr * shr)6382 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
6383 HandleShift(shr);
6384 }
6385
VisitSub(HSub * instruction)6386 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
6387 HandleBinaryOp(instruction);
6388 }
6389
VisitSub(HSub * instruction)6390 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
6391 HandleBinaryOp(instruction);
6392 }
6393
VisitStaticFieldGet(HStaticFieldGet * instruction)6394 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6395 HandleFieldGet(instruction, instruction->GetFieldInfo());
6396 }
6397
VisitStaticFieldGet(HStaticFieldGet * instruction)6398 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6399 HandleFieldGet(instruction, instruction->GetFieldInfo());
6400 }
6401
VisitStaticFieldSet(HStaticFieldSet * instruction)6402 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6403 HandleFieldSet(instruction);
6404 }
6405
VisitStaticFieldSet(HStaticFieldSet * instruction)6406 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6407 HandleFieldSet(instruction,
6408 instruction->GetFieldInfo(),
6409 instruction->GetValueCanBeNull(),
6410 instruction->GetWriteBarrierKind());
6411 }
6412
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6413 void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6414 codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(x0));
6415 }
6416
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6417 void InstructionCodeGeneratorARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6418 __ Mov(w0, instruction->GetFormat()->GetValue());
6419 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6420 }
6421
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6422 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
6423 HUnresolvedInstanceFieldGet* instruction) {
6424 FieldAccessCallingConventionARM64 calling_convention;
6425 codegen_->CreateUnresolvedFieldLocationSummary(
6426 instruction, instruction->GetFieldType(), calling_convention);
6427 }
6428
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6429 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
6430 HUnresolvedInstanceFieldGet* instruction) {
6431 FieldAccessCallingConventionARM64 calling_convention;
6432 codegen_->GenerateUnresolvedFieldAccess(instruction,
6433 instruction->GetFieldType(),
6434 instruction->GetFieldIndex(),
6435 instruction->GetDexPc(),
6436 calling_convention);
6437 }
6438
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6439 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
6440 HUnresolvedInstanceFieldSet* instruction) {
6441 FieldAccessCallingConventionARM64 calling_convention;
6442 codegen_->CreateUnresolvedFieldLocationSummary(
6443 instruction, instruction->GetFieldType(), calling_convention);
6444 }
6445
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6446 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
6447 HUnresolvedInstanceFieldSet* instruction) {
6448 FieldAccessCallingConventionARM64 calling_convention;
6449 codegen_->GenerateUnresolvedFieldAccess(instruction,
6450 instruction->GetFieldType(),
6451 instruction->GetFieldIndex(),
6452 instruction->GetDexPc(),
6453 calling_convention);
6454 }
6455
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6456 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
6457 HUnresolvedStaticFieldGet* instruction) {
6458 FieldAccessCallingConventionARM64 calling_convention;
6459 codegen_->CreateUnresolvedFieldLocationSummary(
6460 instruction, instruction->GetFieldType(), calling_convention);
6461 }
6462
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6463 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
6464 HUnresolvedStaticFieldGet* instruction) {
6465 FieldAccessCallingConventionARM64 calling_convention;
6466 codegen_->GenerateUnresolvedFieldAccess(instruction,
6467 instruction->GetFieldType(),
6468 instruction->GetFieldIndex(),
6469 instruction->GetDexPc(),
6470 calling_convention);
6471 }
6472
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6473 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
6474 HUnresolvedStaticFieldSet* instruction) {
6475 FieldAccessCallingConventionARM64 calling_convention;
6476 codegen_->CreateUnresolvedFieldLocationSummary(
6477 instruction, instruction->GetFieldType(), calling_convention);
6478 }
6479
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6480 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
6481 HUnresolvedStaticFieldSet* instruction) {
6482 FieldAccessCallingConventionARM64 calling_convention;
6483 codegen_->GenerateUnresolvedFieldAccess(instruction,
6484 instruction->GetFieldType(),
6485 instruction->GetFieldIndex(),
6486 instruction->GetDexPc(),
6487 calling_convention);
6488 }
6489
VisitSuspendCheck(HSuspendCheck * instruction)6490 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
6491 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6492 instruction, LocationSummary::kCallOnSlowPath);
6493 // In suspend check slow path, usually there are no caller-save registers at all.
6494 // If SIMD instructions are present, however, we force spilling all live SIMD
6495 // registers in full width (since the runtime only saves/restores lower part).
6496 // Note that only a suspend check can see live SIMD registers. In the
6497 // loop optimization, we make sure this does not happen for any other slow
6498 // path.
6499 locations->SetCustomSlowPathCallerSaves(
6500 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6501 }
6502
VisitSuspendCheck(HSuspendCheck * instruction)6503 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
6504 HBasicBlock* block = instruction->GetBlock();
6505 if (block->GetLoopInformation() != nullptr) {
6506 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6507 // The back edge will generate the suspend check.
6508 return;
6509 }
6510 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6511 // The goto will generate the suspend check.
6512 return;
6513 }
6514 GenerateSuspendCheck(instruction, nullptr);
6515 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6516 }
6517
VisitThrow(HThrow * instruction)6518 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
6519 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6520 instruction, LocationSummary::kCallOnMainOnly);
6521 InvokeRuntimeCallingConvention calling_convention;
6522 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
6523 }
6524
VisitThrow(HThrow * instruction)6525 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
6526 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6527 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6528 }
6529
VisitTypeConversion(HTypeConversion * conversion)6530 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
6531 LocationSummary* locations =
6532 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
6533 DataType::Type input_type = conversion->GetInputType();
6534 DataType::Type result_type = conversion->GetResultType();
6535 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6536 << input_type << " -> " << result_type;
6537 if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
6538 (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
6539 LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
6540 }
6541
6542 if (DataType::IsFloatingPointType(input_type)) {
6543 locations->SetInAt(0, Location::RequiresFpuRegister());
6544 } else {
6545 locations->SetInAt(0, Location::RequiresRegister());
6546 }
6547
6548 if (DataType::IsFloatingPointType(result_type)) {
6549 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6550 } else {
6551 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6552 }
6553 }
6554
VisitTypeConversion(HTypeConversion * conversion)6555 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
6556 DataType::Type result_type = conversion->GetResultType();
6557 DataType::Type input_type = conversion->GetInputType();
6558
6559 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6560 << input_type << " -> " << result_type;
6561
6562 if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
6563 int result_size = DataType::Size(result_type);
6564 int input_size = DataType::Size(input_type);
6565 int min_size = std::min(result_size, input_size);
6566 Register output = OutputRegister(conversion);
6567 Register source = InputRegisterAt(conversion, 0);
6568 if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) {
6569 // 'int' values are used directly as W registers, discarding the top
6570 // bits, so we don't need to sign-extend and can just perform a move.
6571 // We do not pass the `kDiscardForSameWReg` argument to force clearing the
6572 // top 32 bits of the target register. We theoretically could leave those
6573 // bits unchanged, but we would have to make sure that no code uses a
6574 // 32bit input value as a 64bit value assuming that the top 32 bits are
6575 // zero.
6576 __ Mov(output.W(), source.W());
6577 } else if (DataType::IsUnsignedType(result_type) ||
6578 (DataType::IsUnsignedType(input_type) && input_size < result_size)) {
6579 __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte);
6580 } else {
6581 __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
6582 }
6583 } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
6584 __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
6585 } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
6586 CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
6587 __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
6588 } else if (DataType::IsFloatingPointType(result_type) &&
6589 DataType::IsFloatingPointType(input_type)) {
6590 __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
6591 } else {
6592 LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
6593 << " to " << result_type;
6594 }
6595 }
6596
VisitUShr(HUShr * ushr)6597 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
6598 HandleShift(ushr);
6599 }
6600
VisitUShr(HUShr * ushr)6601 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
6602 HandleShift(ushr);
6603 }
6604
VisitXor(HXor * instruction)6605 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
6606 HandleBinaryOp(instruction);
6607 }
6608
VisitXor(HXor * instruction)6609 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
6610 HandleBinaryOp(instruction);
6611 }
6612
VisitBoundType(HBoundType * instruction)6613 void LocationsBuilderARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
6614 // Nothing to do, this should be removed during prepare for register allocator.
6615 LOG(FATAL) << "Unreachable";
6616 }
6617
VisitBoundType(HBoundType * instruction)6618 void InstructionCodeGeneratorARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
6619 // Nothing to do, this should be removed during prepare for register allocator.
6620 LOG(FATAL) << "Unreachable";
6621 }
6622
6623 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6624 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6625 LocationSummary* locations =
6626 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6627 locations->SetInAt(0, Location::RequiresRegister());
6628 }
6629
VisitPackedSwitch(HPackedSwitch * switch_instr)6630 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6631 int32_t lower_bound = switch_instr->GetStartValue();
6632 uint32_t num_entries = switch_instr->GetNumEntries();
6633 Register value_reg = InputRegisterAt(switch_instr, 0);
6634 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6635
6636 // Roughly set 16 as max average assemblies generated per HIR in a graph.
6637 static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
6638 // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
6639 // make sure we don't emit it if the target may run out of range.
6640 // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
6641 // ranges and emit the tables only as required.
6642 static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
6643
6644 if (num_entries <= kPackedSwitchCompareJumpThreshold ||
6645 // Current instruction id is an upper bound of the number of HIRs in the graph.
6646 GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
6647 // Create a series of compare/jumps.
6648 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6649 Register temp = temps.AcquireW();
6650 __ Subs(temp, value_reg, Operand(lower_bound));
6651
6652 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6653 // Jump to successors[0] if value == lower_bound.
6654 __ B(eq, codegen_->GetLabelOf(successors[0]));
6655 int32_t last_index = 0;
6656 for (; num_entries - last_index > 2; last_index += 2) {
6657 __ Subs(temp, temp, Operand(2));
6658 // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
6659 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
6660 // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
6661 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
6662 }
6663 if (num_entries - last_index == 2) {
6664 // The last missing case_value.
6665 __ Cmp(temp, Operand(1));
6666 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
6667 }
6668
6669 // And the default for any other value.
6670 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6671 __ B(codegen_->GetLabelOf(default_block));
6672 }
6673 } else {
6674 JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
6675
6676 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6677
6678 // Below instructions should use at most one blocked register. Since there are two blocked
6679 // registers, we are free to block one.
6680 Register temp_w = temps.AcquireW();
6681 Register index;
6682 // Remove the bias.
6683 if (lower_bound != 0) {
6684 index = temp_w;
6685 __ Sub(index, value_reg, Operand(lower_bound));
6686 } else {
6687 index = value_reg;
6688 }
6689
6690 // Jump to default block if index is out of the range.
6691 __ Cmp(index, Operand(num_entries));
6692 __ B(hs, codegen_->GetLabelOf(default_block));
6693
6694 // In current VIXL implementation, it won't require any blocked registers to encode the
6695 // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
6696 // register pressure.
6697 Register table_base = temps.AcquireX();
6698 // Load jump offset from the table.
6699 __ Adr(table_base, jump_table->GetTableStartLabel());
6700 Register jump_offset = temp_w;
6701 __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
6702
6703 // Jump to target block by branching to table_base(pc related) + offset.
6704 Register target_address = table_base;
6705 __ Add(target_address, table_base, Operand(jump_offset, SXTW));
6706 __ Br(target_address);
6707 }
6708 }
6709
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6710 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
6711 HInstruction* instruction,
6712 Location out,
6713 uint32_t offset,
6714 Location maybe_temp,
6715 ReadBarrierOption read_barrier_option) {
6716 DataType::Type type = DataType::Type::kReference;
6717 Register out_reg = RegisterFrom(out, type);
6718 if (read_barrier_option == kWithReadBarrier) {
6719 DCHECK(codegen_->EmitReadBarrier());
6720 if (kUseBakerReadBarrier) {
6721 // Load with fast path based Baker's read barrier.
6722 // /* HeapReference<Object> */ out = *(out + offset)
6723 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6724 out,
6725 out_reg,
6726 offset,
6727 maybe_temp,
6728 /* needs_null_check= */ false,
6729 /* use_load_acquire= */ false);
6730 } else {
6731 // Load with slow path based read barrier.
6732 // Save the value of `out` into `maybe_temp` before overwriting it
6733 // in the following move operation, as we will need it for the
6734 // read barrier below.
6735 Register temp_reg = RegisterFrom(maybe_temp, type);
6736 __ Mov(temp_reg, out_reg);
6737 // /* HeapReference<Object> */ out = *(out + offset)
6738 __ Ldr(out_reg, HeapOperand(out_reg, offset));
6739 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6740 }
6741 } else {
6742 // Plain load with no read barrier.
6743 // /* HeapReference<Object> */ out = *(out + offset)
6744 __ Ldr(out_reg, HeapOperand(out_reg, offset));
6745 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
6746 }
6747 }
6748
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6749 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
6750 HInstruction* instruction,
6751 Location out,
6752 Location obj,
6753 uint32_t offset,
6754 Location maybe_temp,
6755 ReadBarrierOption read_barrier_option) {
6756 DataType::Type type = DataType::Type::kReference;
6757 Register out_reg = RegisterFrom(out, type);
6758 Register obj_reg = RegisterFrom(obj, type);
6759 if (read_barrier_option == kWithReadBarrier) {
6760 DCHECK(codegen_->EmitReadBarrier());
6761 if (kUseBakerReadBarrier) {
6762 // Load with fast path based Baker's read barrier.
6763 // /* HeapReference<Object> */ out = *(obj + offset)
6764 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6765 out,
6766 obj_reg,
6767 offset,
6768 maybe_temp,
6769 /* needs_null_check= */ false,
6770 /* use_load_acquire= */ false);
6771 } else {
6772 // Load with slow path based read barrier.
6773 // /* HeapReference<Object> */ out = *(obj + offset)
6774 __ Ldr(out_reg, HeapOperand(obj_reg, offset));
6775 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6776 }
6777 } else {
6778 // Plain load with no read barrier.
6779 // /* HeapReference<Object> */ out = *(obj + offset)
6780 __ Ldr(out_reg, HeapOperand(obj_reg, offset));
6781 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
6782 }
6783 }
6784
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,Register obj,uint32_t offset,vixl::aarch64::Label * fixup_label,ReadBarrierOption read_barrier_option)6785 void CodeGeneratorARM64::GenerateGcRootFieldLoad(
6786 HInstruction* instruction,
6787 Location root,
6788 Register obj,
6789 uint32_t offset,
6790 vixl::aarch64::Label* fixup_label,
6791 ReadBarrierOption read_barrier_option) {
6792 DCHECK(fixup_label == nullptr || offset == 0u);
6793 Register root_reg = RegisterFrom(root, DataType::Type::kReference);
6794 if (read_barrier_option == kWithReadBarrier) {
6795 DCHECK(EmitReadBarrier());
6796 if (kUseBakerReadBarrier) {
6797 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6798 // Baker's read barrier are used.
6799
6800 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
6801 // the Marking Register) to decide whether we need to enter
6802 // the slow path to mark the GC root.
6803 //
6804 // We use shared thunks for the slow path; shared within the method
6805 // for JIT, across methods for AOT. That thunk checks the reference
6806 // and jumps to the entrypoint if needed.
6807 //
6808 // lr = &return_address;
6809 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
6810 // if (mr) { // Thread::Current()->GetIsGcMarking()
6811 // goto gc_root_thunk<root_reg>(lr)
6812 // }
6813 // return_address:
6814
6815 UseScratchRegisterScope temps(GetVIXLAssembler());
6816 DCHECK(temps.IsAvailable(ip0));
6817 DCHECK(temps.IsAvailable(ip1));
6818 temps.Exclude(ip0, ip1);
6819 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
6820
6821 ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6822 vixl::aarch64::Label return_address;
6823 __ adr(lr, &return_address);
6824 if (fixup_label != nullptr) {
6825 __ bind(fixup_label);
6826 }
6827 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6828 "GC root LDR must be 2 instructions (8B) before the return address label.");
6829 __ ldr(root_reg, MemOperand(obj.X(), offset));
6830 EmitBakerReadBarrierCbnz(custom_data);
6831 __ bind(&return_address);
6832 } else {
6833 // GC root loaded through a slow path for read barriers other
6834 // than Baker's.
6835 // /* GcRoot<mirror::Object>* */ root = obj + offset
6836 if (fixup_label == nullptr) {
6837 __ Add(root_reg.X(), obj.X(), offset);
6838 } else {
6839 EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
6840 }
6841 // /* mirror::Object* */ root = root->Read()
6842 GenerateReadBarrierForRootSlow(instruction, root, root);
6843 }
6844 } else {
6845 // Plain GC root load with no read barrier.
6846 // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6847 if (fixup_label == nullptr) {
6848 __ Ldr(root_reg, MemOperand(obj, offset));
6849 } else {
6850 EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
6851 }
6852 // Note that GC roots are not affected by heap poisoning, thus we
6853 // do not have to unpoison `root_reg` here.
6854 }
6855 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6856 }
6857
GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value,vixl::aarch64::Register old_value)6858 void CodeGeneratorARM64::GenerateIntrinsicMoveWithBakerReadBarrier(
6859 vixl::aarch64::Register marked_old_value,
6860 vixl::aarch64::Register old_value) {
6861 DCHECK(EmitBakerReadBarrier());
6862
6863 // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
6864 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode());
6865
6866 ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6867 vixl::aarch64::Label return_address;
6868 __ adr(lr, &return_address);
6869 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6870 "GC root LDR must be 2 instructions (8B) before the return address label.");
6871 __ mov(marked_old_value, old_value);
6872 EmitBakerReadBarrierCbnz(custom_data);
6873 __ bind(&return_address);
6874 }
6875
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl::aarch64::Register obj,const vixl::aarch64::MemOperand & src,bool needs_null_check,bool use_load_acquire)6876 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6877 Location ref,
6878 vixl::aarch64::Register obj,
6879 const vixl::aarch64::MemOperand& src,
6880 bool needs_null_check,
6881 bool use_load_acquire) {
6882 DCHECK(EmitBakerReadBarrier());
6883
6884 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6885 // Marking Register) to decide whether we need to enter the slow
6886 // path to mark the reference. Then, in the slow path, check the
6887 // gray bit in the lock word of the reference's holder (`obj`) to
6888 // decide whether to mark `ref` or not.
6889 //
6890 // We use shared thunks for the slow path; shared within the method
6891 // for JIT, across methods for AOT. That thunk checks the holder
6892 // and jumps to the entrypoint if needed. If the holder is not gray,
6893 // it creates a fake dependency and returns to the LDR instruction.
6894 //
6895 // lr = &gray_return_address;
6896 // if (mr) { // Thread::Current()->GetIsGcMarking()
6897 // goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr)
6898 // }
6899 // not_gray_return_address:
6900 // // Original reference load. If the offset is too large to fit
6901 // // into LDR, we use an adjusted base register here.
6902 // HeapReference<mirror::Object> reference = *(obj+offset);
6903 // gray_return_address:
6904
6905 DCHECK(src.GetAddrMode() == vixl::aarch64::Offset);
6906 DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>));
6907
6908 UseScratchRegisterScope temps(GetVIXLAssembler());
6909 DCHECK(temps.IsAvailable(ip0));
6910 DCHECK(temps.IsAvailable(ip1));
6911 temps.Exclude(ip0, ip1);
6912 uint32_t custom_data = use_load_acquire
6913 ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode())
6914 : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode());
6915
6916 {
6917 ExactAssemblyScope guard(GetVIXLAssembler(),
6918 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6919 vixl::aarch64::Label return_address;
6920 __ adr(lr, &return_address);
6921 EmitBakerReadBarrierCbnz(custom_data);
6922 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6923 "Field LDR must be 1 instruction (4B) before the return address label; "
6924 " 2 instructions (8B) for heap poisoning.");
6925 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6926 if (use_load_acquire) {
6927 DCHECK_EQ(src.GetOffset(), 0);
6928 __ ldar(ref_reg, src);
6929 } else {
6930 __ ldr(ref_reg, src);
6931 }
6932 if (needs_null_check) {
6933 MaybeRecordImplicitNullCheck(instruction);
6934 }
6935 // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
6936 // macro instructions disallowed in ExactAssemblyScope.
6937 if (kPoisonHeapReferences) {
6938 __ neg(ref_reg, Operand(ref_reg));
6939 }
6940 __ bind(&return_address);
6941 }
6942 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
6943 }
6944
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check,bool use_load_acquire)6945 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6946 Location ref,
6947 Register obj,
6948 uint32_t offset,
6949 Location maybe_temp,
6950 bool needs_null_check,
6951 bool use_load_acquire) {
6952 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
6953 Register base = obj;
6954 if (use_load_acquire) {
6955 DCHECK(maybe_temp.IsRegister());
6956 base = WRegisterFrom(maybe_temp);
6957 __ Add(base, obj, offset);
6958 offset = 0u;
6959 } else if (offset >= kReferenceLoadMinFarOffset) {
6960 DCHECK(maybe_temp.IsRegister());
6961 base = WRegisterFrom(maybe_temp);
6962 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
6963 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
6964 offset &= (kReferenceLoadMinFarOffset - 1u);
6965 }
6966 MemOperand src(base.X(), offset);
6967 GenerateFieldLoadWithBakerReadBarrier(
6968 instruction, ref, obj, src, needs_null_check, use_load_acquire);
6969 }
6970
GenerateArrayLoadWithBakerReadBarrier(HArrayGet * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)6971 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
6972 Location ref,
6973 Register obj,
6974 uint32_t data_offset,
6975 Location index,
6976 bool needs_null_check) {
6977 DCHECK(EmitBakerReadBarrier());
6978
6979 static_assert(
6980 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6981 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6982 size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
6983
6984 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6985 // Marking Register) to decide whether we need to enter the slow
6986 // path to mark the reference. Then, in the slow path, check the
6987 // gray bit in the lock word of the reference's holder (`obj`) to
6988 // decide whether to mark `ref` or not.
6989 //
6990 // We use shared thunks for the slow path; shared within the method
6991 // for JIT, across methods for AOT. That thunk checks the holder
6992 // and jumps to the entrypoint if needed. If the holder is not gray,
6993 // it creates a fake dependency and returns to the LDR instruction.
6994 //
6995 // lr = &gray_return_address;
6996 // if (mr) { // Thread::Current()->GetIsGcMarking()
6997 // goto array_thunk<base_reg>(lr)
6998 // }
6999 // not_gray_return_address:
7000 // // Original reference load. If the offset is too large to fit
7001 // // into LDR, we use an adjusted base register here.
7002 // HeapReference<mirror::Object> reference = data[index];
7003 // gray_return_address:
7004
7005 DCHECK(index.IsValid());
7006 Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
7007 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
7008
7009 UseScratchRegisterScope temps(GetVIXLAssembler());
7010 DCHECK(temps.IsAvailable(ip0));
7011 DCHECK(temps.IsAvailable(ip1));
7012 temps.Exclude(ip0, ip1);
7013
7014 Register temp;
7015 if (instruction->GetArray()->IsIntermediateAddress()) {
7016 // We do not need to compute the intermediate address from the array: the
7017 // input instruction has done it already. See the comment in
7018 // `TryExtractArrayAccessAddress()`.
7019 if (kIsDebugBuild) {
7020 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
7021 DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
7022 }
7023 temp = obj;
7024 } else {
7025 temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
7026 __ Add(temp.X(), obj.X(), Operand(data_offset));
7027 }
7028
7029 uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
7030
7031 {
7032 ExactAssemblyScope guard(GetVIXLAssembler(),
7033 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
7034 vixl::aarch64::Label return_address;
7035 __ adr(lr, &return_address);
7036 EmitBakerReadBarrierCbnz(custom_data);
7037 static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
7038 "Array LDR must be 1 instruction (4B) before the return address label; "
7039 " 2 instructions (8B) for heap poisoning.");
7040 __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
7041 DCHECK(!needs_null_check); // The thunk cannot handle the null check.
7042 // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
7043 // macro instructions disallowed in ExactAssemblyScope.
7044 if (kPoisonHeapReferences) {
7045 __ neg(ref_reg, Operand(ref_reg));
7046 }
7047 __ bind(&return_address);
7048 }
7049 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
7050 }
7051
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)7052 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
7053 // The following condition is a compile-time one, so it does not have a run-time cost.
7054 if (kIsDebugBuild && EmitBakerReadBarrier()) {
7055 // The following condition is a run-time one; it is executed after the
7056 // previous compile-time test, to avoid penalizing non-debug builds.
7057 if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
7058 UseScratchRegisterScope temps(GetVIXLAssembler());
7059 Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
7060 GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
7061 }
7062 }
7063 }
7064
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7065 SlowPathCodeARM64* CodeGeneratorARM64::AddReadBarrierSlowPath(HInstruction* instruction,
7066 Location out,
7067 Location ref,
7068 Location obj,
7069 uint32_t offset,
7070 Location index) {
7071 SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
7072 ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
7073 AddSlowPath(slow_path);
7074 return slow_path;
7075 }
7076
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7077 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
7078 Location out,
7079 Location ref,
7080 Location obj,
7081 uint32_t offset,
7082 Location index) {
7083 DCHECK(EmitReadBarrier());
7084
7085 // Insert a slow path based read barrier *after* the reference load.
7086 //
7087 // If heap poisoning is enabled, the unpoisoning of the loaded
7088 // reference will be carried out by the runtime within the slow
7089 // path.
7090 //
7091 // Note that `ref` currently does not get unpoisoned (when heap
7092 // poisoning is enabled), which is alright as the `ref` argument is
7093 // not used by the artReadBarrierSlow entry point.
7094 //
7095 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7096 SlowPathCodeARM64* slow_path = AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
7097
7098 __ B(slow_path->GetEntryLabel());
7099 __ Bind(slow_path->GetExitLabel());
7100 }
7101
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7102 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7103 Location out,
7104 Location ref,
7105 Location obj,
7106 uint32_t offset,
7107 Location index) {
7108 if (EmitReadBarrier()) {
7109 // Baker's read barriers shall be handled by the fast path
7110 // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
7111 DCHECK(!kUseBakerReadBarrier);
7112 // If heap poisoning is enabled, unpoisoning will be taken care of
7113 // by the runtime within the slow path.
7114 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7115 } else if (kPoisonHeapReferences) {
7116 GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
7117 }
7118 }
7119
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7120 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7121 Location out,
7122 Location root) {
7123 DCHECK(EmitReadBarrier());
7124
7125 // Insert a slow path based read barrier *after* the GC root load.
7126 //
7127 // Note that GC roots are not affected by heap poisoning, so we do
7128 // not need to do anything special for this here.
7129 SlowPathCodeARM64* slow_path =
7130 new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
7131 AddSlowPath(slow_path);
7132
7133 __ B(slow_path->GetEntryLabel());
7134 __ Bind(slow_path->GetExitLabel());
7135 }
7136
VisitClassTableGet(HClassTableGet * instruction)7137 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
7138 LocationSummary* locations =
7139 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7140 locations->SetInAt(0, Location::RequiresRegister());
7141 locations->SetOut(Location::RequiresRegister());
7142 }
7143
VisitClassTableGet(HClassTableGet * instruction)7144 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
7145 LocationSummary* locations = instruction->GetLocations();
7146 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
7147 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
7148 instruction->GetIndex(), kArm64PointerSize).SizeValue();
7149 __ Ldr(XRegisterFrom(locations->Out()),
7150 MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
7151 } else {
7152 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
7153 instruction->GetIndex(), kArm64PointerSize));
7154 __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
7155 mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
7156 __ Ldr(XRegisterFrom(locations->Out()),
7157 MemOperand(XRegisterFrom(locations->Out()), method_offset));
7158 }
7159 }
7160
VecNEONAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)7161 MemOperand InstructionCodeGeneratorARM64::VecNEONAddress(
7162 HVecMemoryOperation* instruction,
7163 UseScratchRegisterScope* temps_scope,
7164 size_t size,
7165 bool is_string_char_at,
7166 /*out*/ Register* scratch) {
7167 LocationSummary* locations = instruction->GetLocations();
7168 Register base = InputRegisterAt(instruction, 0);
7169
7170 if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
7171 DCHECK(!is_string_char_at);
7172 return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
7173 }
7174
7175 Location index = locations->InAt(1);
7176 uint32_t offset = is_string_char_at
7177 ? mirror::String::ValueOffset().Uint32Value()
7178 : mirror::Array::DataOffset(size).Uint32Value();
7179 size_t shift = ComponentSizeShiftWidth(size);
7180
7181 // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
7182 DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
7183
7184 if (index.IsConstant()) {
7185 offset += Int64FromLocation(index) << shift;
7186 return HeapOperand(base, offset);
7187 } else {
7188 *scratch = temps_scope->AcquireSameSizeAs(base);
7189 __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
7190 return HeapOperand(*scratch, offset);
7191 }
7192 }
7193
VecSVEAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)7194 SVEMemOperand InstructionCodeGeneratorARM64::VecSVEAddress(
7195 HVecMemoryOperation* instruction,
7196 UseScratchRegisterScope* temps_scope,
7197 size_t size,
7198 bool is_string_char_at,
7199 /*out*/ Register* scratch) {
7200 LocationSummary* locations = instruction->GetLocations();
7201 Register base = InputRegisterAt(instruction, 0);
7202 Location index = locations->InAt(1);
7203
7204 DCHECK(!instruction->InputAt(1)->IsIntermediateAddressIndex());
7205 DCHECK(!index.IsConstant());
7206
7207 uint32_t offset = is_string_char_at
7208 ? mirror::String::ValueOffset().Uint32Value()
7209 : mirror::Array::DataOffset(size).Uint32Value();
7210 size_t shift = ComponentSizeShiftWidth(size);
7211
7212 if (instruction->InputAt(0)->IsIntermediateAddress()) {
7213 return SVEMemOperand(base.X(), XRegisterFrom(index), LSL, shift);
7214 }
7215
7216 *scratch = temps_scope->AcquireSameSizeAs(base);
7217 __ Add(*scratch, base, offset);
7218 return SVEMemOperand(scratch->X(), XRegisterFrom(index), LSL, shift);
7219 }
7220
7221 #undef __
7222 #undef QUICK_ENTRY_POINT
7223
7224 #define __ assembler.GetVIXLAssembler()->
7225
EmitGrayCheckAndFastPath(arm64::Arm64Assembler & assembler,vixl::aarch64::Register base_reg,vixl::aarch64::MemOperand & lock_word,vixl::aarch64::Label * slow_path,vixl::aarch64::Label * throw_npe=nullptr)7226 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
7227 vixl::aarch64::Register base_reg,
7228 vixl::aarch64::MemOperand& lock_word,
7229 vixl::aarch64::Label* slow_path,
7230 vixl::aarch64::Label* throw_npe = nullptr) {
7231 vixl::aarch64::Label throw_npe_cont;
7232 // Load the lock word containing the rb_state.
7233 __ Ldr(ip0.W(), lock_word);
7234 // Given the numeric representation, it's enough to check the low bit of the rb_state.
7235 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7236 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7237 __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
7238 static_assert(
7239 BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
7240 "Field and array LDR offsets must be the same to reuse the same code.");
7241 // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
7242 if (throw_npe != nullptr) {
7243 __ Bind(&throw_npe_cont);
7244 }
7245 // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
7246 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
7247 "Field LDR must be 1 instruction (4B) before the return address label; "
7248 " 2 instructions (8B) for heap poisoning.");
7249 __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
7250 // Introduce a dependency on the lock_word including rb_state,
7251 // to prevent load-load reordering, and without using
7252 // a memory barrier (which would be more expensive).
7253 __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
7254 __ Br(lr); // And return back to the function.
7255 if (throw_npe != nullptr) {
7256 // Clear IP0 before returning to the fast path.
7257 __ Bind(throw_npe);
7258 __ Mov(ip0.X(), xzr);
7259 __ B(&throw_npe_cont);
7260 }
7261 // Note: The fake dependency is unnecessary for the slow path.
7262 }
7263
7264 // Load the read barrier introspection entrypoint in register `entrypoint`.
LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler & assembler,vixl::aarch64::Register entrypoint)7265 static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
7266 vixl::aarch64::Register entrypoint) {
7267 // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
7268 DCHECK_EQ(ip0.GetCode(), 16u);
7269 const int32_t entry_point_offset =
7270 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
7271 __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
7272 }
7273
CompileBakerReadBarrierThunk(Arm64Assembler & assembler,uint32_t encoded_data,std::string * debug_name)7274 void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
7275 uint32_t encoded_data,
7276 /*out*/ std::string* debug_name) {
7277 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
7278 switch (kind) {
7279 case BakerReadBarrierKind::kField:
7280 case BakerReadBarrierKind::kAcquire: {
7281 Register base_reg =
7282 vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7283 CheckValidReg(base_reg.GetCode());
7284 Register holder_reg =
7285 vixl::aarch64::XRegister(BakerReadBarrierSecondRegField::Decode(encoded_data));
7286 CheckValidReg(holder_reg.GetCode());
7287 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7288 temps.Exclude(ip0, ip1);
7289 // In the case of a field load (with relaxed semantic), if `base_reg` differs from
7290 // `holder_reg`, the offset was too large and we must have emitted (during the construction
7291 // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
7292 // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
7293 // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
7294 // not necessarily do that check before going to the thunk.
7295 //
7296 // In the case of a field load with load-acquire semantics (where `base_reg` always differs
7297 // from `holder_reg`), we also need an explicit null check when implicit null checks are
7298 // allowed, as we do not emit one before going to the thunk.
7299 vixl::aarch64::Label throw_npe_label;
7300 vixl::aarch64::Label* throw_npe = nullptr;
7301 if (GetCompilerOptions().GetImplicitNullChecks() &&
7302 (holder_reg.Is(base_reg) || (kind == BakerReadBarrierKind::kAcquire))) {
7303 throw_npe = &throw_npe_label;
7304 __ Cbz(holder_reg.W(), throw_npe);
7305 }
7306 // Check if the holder is gray and, if not, add fake dependency to the base register
7307 // and return to the LDR instruction to load the reference. Otherwise, use introspection
7308 // to load the reference and call the entrypoint that performs further checks on the
7309 // reference and marks it if needed.
7310 vixl::aarch64::Label slow_path;
7311 MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
7312 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
7313 __ Bind(&slow_path);
7314 if (kind == BakerReadBarrierKind::kField) {
7315 MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
7316 __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset.
7317 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7318 __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset.
7319 __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference.
7320 } else {
7321 DCHECK(kind == BakerReadBarrierKind::kAcquire);
7322 DCHECK(!base_reg.Is(holder_reg));
7323 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7324 __ Ldar(ip0.W(), MemOperand(base_reg));
7325 }
7326 // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
7327 __ Br(ip1); // Jump to the entrypoint.
7328 break;
7329 }
7330 case BakerReadBarrierKind::kArray: {
7331 Register base_reg =
7332 vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7333 CheckValidReg(base_reg.GetCode());
7334 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7335 BakerReadBarrierSecondRegField::Decode(encoded_data));
7336 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7337 temps.Exclude(ip0, ip1);
7338 vixl::aarch64::Label slow_path;
7339 int32_t data_offset =
7340 mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
7341 MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
7342 DCHECK_LT(lock_word.GetOffset(), 0);
7343 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
7344 __ Bind(&slow_path);
7345 MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
7346 __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset.
7347 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7348 __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set).
7349 __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create
7350 // a switch case target based on the index register.
7351 __ Mov(ip0, base_reg); // Move the base register to ip0.
7352 __ Br(ip1); // Jump to the entrypoint's array switch case.
7353 break;
7354 }
7355 case BakerReadBarrierKind::kGcRoot: {
7356 // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
7357 // and it does not have a forwarding address), call the correct introspection entrypoint;
7358 // otherwise return the reference (or the extracted forwarding address).
7359 // There is no gray bit check for GC roots.
7360 Register root_reg =
7361 vixl::aarch64::WRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7362 CheckValidReg(root_reg.GetCode());
7363 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7364 BakerReadBarrierSecondRegField::Decode(encoded_data));
7365 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7366 temps.Exclude(ip0, ip1);
7367 vixl::aarch64::Label return_label, not_marked, forwarding_address;
7368 __ Cbz(root_reg, &return_label);
7369 MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
7370 __ Ldr(ip0.W(), lock_word);
7371 __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, ¬_marked);
7372 __ Bind(&return_label);
7373 __ Br(lr);
7374 __ Bind(¬_marked);
7375 __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
7376 __ B(&forwarding_address, mi);
7377 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7378 // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
7379 // art_quick_read_barrier_mark_introspection_gc_roots.
7380 __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
7381 __ Mov(ip0.W(), root_reg);
7382 __ Br(ip1);
7383 __ Bind(&forwarding_address);
7384 __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
7385 __ Br(lr);
7386 break;
7387 }
7388 default:
7389 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
7390 UNREACHABLE();
7391 }
7392
7393 // For JIT, the slow path is considered part of the compiled method,
7394 // so JIT should pass null as `debug_name`.
7395 DCHECK_IMPLIES(GetCompilerOptions().IsJitCompiler(), debug_name == nullptr);
7396 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
7397 std::ostringstream oss;
7398 oss << "BakerReadBarrierThunk";
7399 switch (kind) {
7400 case BakerReadBarrierKind::kField:
7401 oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
7402 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
7403 break;
7404 case BakerReadBarrierKind::kAcquire:
7405 oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
7406 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
7407 break;
7408 case BakerReadBarrierKind::kArray:
7409 oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
7410 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7411 BakerReadBarrierSecondRegField::Decode(encoded_data));
7412 break;
7413 case BakerReadBarrierKind::kGcRoot:
7414 oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
7415 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7416 BakerReadBarrierSecondRegField::Decode(encoded_data));
7417 break;
7418 }
7419 *debug_name = oss.str();
7420 }
7421 }
7422
7423 #undef __
7424
7425 } // namespace arm64
7426 } // namespace art
7427